|
GTPin
|
The Bounds_check is a GTPin tool for profiling out-of-bounds accesses to memory buffers in GPU kernels. The tool leverages High Level Instrumentation interface (HLIF).
To run Bounds_check tool use the following command:
Profilers/Bin/gtpin -t bounds_check [bounds_check args] [GTPin args] -- app [application args]
The output example below shows the results of profiling the following kernel
__kernel void vector_copy(__global char* outB, __global char* inB, int oob_offset) { uint gid = get_global_id(0); outB[gid] = inB[gid + oob_offset]; }
which was run with oob_offset value 45:
------------------------------------------------------------------------------------------------------------------------
0: vector_copy___CS_asm950d7dfbd2b6e54b_simd32_950d7dfbd2b6e54b_0
------------------------------------------------------------------------------------------------------------------------
Dispatch ID Instruction ID #OOB violations Execution descriptor
------------------------------------------------------------------------------------------------------------------------
0 43 16 0 3 0 0
0 44 29 0 3 0 0
The instruction #43 performed 16 and instruction #44 performed 29 out-of-bounds accesses. Overall, the kernel accessed out of buffer bounds 45 times.
(Back to the list of all GTPin Sample Tools)
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2024-2026 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /*! 00008 * @file A tool that detects out-of-bounds (OOB) memory accesses in kernels 00009 */ 00010 00011 #ifndef BOUNDS_CHECK_H_ 00012 #define BOUNDS_CHECK_H_ 00013 00014 #include "hlif_basic_defs.h" 00015 #include "block_2d.h" 00016 00017 #if defined(__cplusplus) 00018 #include "gtpin_api.h" 00019 using namespace gtpin; 00020 #endif 00021 00022 #pragma pack(push, 8) 00023 00024 /* ============================================================================================= */ 00025 // Struct MemRange 00026 /* ============================================================================================= */ 00027 /// Range in the memory address space 00028 typedef struct MemRange 00029 { 00030 uintptr_t base; ///< Base address of the range 00031 size_t size; ///< Size of the range in bytes 00032 00033 #if defined(__cplusplus) 00034 constexpr MemRange(uintptr_t b = 0, size_t s = 0) : base(b), size(s) {} ///< Constructor 00035 constexpr MemRange(const AddrRange& r) : base(r.Base()), size(r.Size()) {} ///< Constructor 00036 #endif 00037 } MemRange; 00038 00039 /* ============================================================================================= */ 00040 // Struct MemBounds 00041 /* ============================================================================================= */ 00042 /// Flexible (variable size) array of memory ranges that represent bounds of available memory resources 00043 typedef struct MemBounds 00044 { 00045 uint32_t numRanges; ///< Number of elements in the 'ranges' array 00046 MemRange ranges[1]; ///< Flexible array of memory ranges 00047 00048 #if defined(__cplusplus) 00049 /// @return Size of this structure in bytes 00050 constexpr size_t SizeOf() const 00051 { return (sizeof(MemBounds) - sizeof(MemRange) + (numRanges * sizeof(MemRange))); } 00052 00053 /// Given a number of memory ranges, return size of this structure in bytes 00054 static constexpr size_t SizeOf(uint32_t numRanges) 00055 { return (sizeof(MemBounds) - sizeof(MemRange) + (numRanges * sizeof(MemRange))); } 00056 #endif 00057 00058 } MemBounds; 00059 00060 /* ============================================================================================= */ 00061 // Struct BoundsCheckArgs 00062 /* ============================================================================================= */ 00063 /// Common arguments of HLI functions that detect OOB violations in memory access instructions 00064 typedef struct BoundsCheckArgs 00065 { 00066 struct 00067 { 00068 uint32_t numAddresses; ///< Number of elements in the address payload of the instruction 00069 uint32_t dataSize; ///< Size, in bytes, of the memory range referenced by the corresponding address 00070 uint32_t surfaceSize; ///< Size, in bytes, of the available memory space for a surface access. 00071 ///< This value is valid for stateful (BTS), bindless (SS), and SLM accesses only. 00072 ///< For other address models, it is set to UINT32_MAX. 00073 } in; 00074 struct 00075 { 00076 uint32_t oobCount; ///< Counter of detected OOB violations 00077 }out; 00078 00079 #if defined(__cplusplus) 00080 /// Constructor 00081 BoundsCheckArgs(uint32_t nAddr = 0, uint32_t dSize = 0, uint32_t sSize = 0) : 00082 in({ nAddr, dSize, sSize }), out({ 0 }) {} 00083 #endif 00084 } BoundsCheckArgs; 00085 00086 /* ============================================================================================= */ 00087 // Function CheckA64Access 00088 /* ============================================================================================= */ 00089 /*! 00090 * @brief HLI function that detects OOB violations in the FLAT A64 memory access 00091 * @param[in] allocatedBuffers Array of global global memory buffers available to the kernel 00092 * @param[in] addr Base address of the accessed memory range 00093 * @param[in] accessMask Per-channel mask of memory accesses 00094 * @param[in][out] boundsCheckArgs Information about memory access instruction and results of the bounds check 00095 */ 00096 IGC_STACK_CALL void CheckA64Access(__global const MemBounds* allocatedBuffers, 00097 uint64_t addr, 00098 uint32_t accessMask, 00099 __global BoundsCheckArgs* boundsCheckArgs); 00100 #if defined(__cplusplus) 00101 using CheckA64AccessFunc = GtHliFunction<void, const MemBounds*, uint64_t, uint32_t, BoundsCheckArgs*>; 00102 #endif 00103 00104 /* ============================================================================================= */ 00105 // Function CheckA32Access 00106 /* ============================================================================================= */ 00107 /*! 00108 * @brief HLI function that detects OOB violations in the FLAT A32 memory access 00109 * @copydetails CheckA64Access 00110 */ 00111 IGC_STACK_CALL void CheckA32Access(__global const MemBounds* allocatedBuffers, 00112 uint32_t addr, 00113 uint32_t accessMask, 00114 __global BoundsCheckArgs* boundsCheckArgs); 00115 #if defined(__cplusplus) 00116 using CheckA32AccessFunc = GtHliFunction<void, const MemBounds*, uint32_t, uint32_t, BoundsCheckArgs*>; 00117 #endif 00118 00119 /* ============================================================================================= */ 00120 // Function CheckSurfaceAccess 00121 /* ============================================================================================= */ 00122 /*! 00123 * @brief HLI function that detects OOB violations in the surface (BTS/SLM/scratch) memory access 00124 * @param[in] offset Offset of the accessed memory range within the surface space 00125 * @param[in] accessMask Per-channel mask of memory accesses 00126 * @param[in][out] boundsCheckArgs Information about memory access instruction and results of the bounds check 00127 */ 00128 IGC_STACK_CALL void CheckSurfaceAccess(uint32_t offset, uint32_t accessMask, __global BoundsCheckArgs* boundsCheckArgs); 00129 #if defined(__cplusplus) 00130 using CheckSurfaceAccessFunc = GtHliFunction<void, uint32_t, uint32_t, BoundsCheckArgs*>; 00131 #endif 00132 00133 /* ============================================================================================= */ 00134 // Function CheckA64ScatteredAccess 00135 /* ============================================================================================= */ 00136 /*! 00137 * @brief HLI function that detects OOB violations in the scattered FLAT A64 memory access 00138 * @param[in] allocatedBuffers Array of global memory buffers available to the kernel 00139 * @param[in] addrPayload Array of base addresses of accessed memory ranges 00140 * @param[in] accessMask Per-channel mask of memory accesses 00141 * @param[in][out] boundsCheckArgs Information about memory access instruction and results of the bounds check 00142 */ 00143 IGC_STACK_CALL void CheckA64ScatteredAccess(__global const MemBounds* allocatedBuffers, 00144 __global const uint64_t* addrPayload, 00145 uint32_t accessMask, 00146 __global BoundsCheckArgs* boundsCheckArgs); 00147 #if defined(__cplusplus) 00148 using CheckA64ScatteredAccessFunc = GtHliFunction<void, const MemBounds*, const uint64_t*, uint32_t, BoundsCheckArgs*>; 00149 #endif 00150 00151 /* ============================================================================================= */ 00152 // Function CheckA32ScatteredAccess 00153 /* ============================================================================================= */ 00154 /*! 00155 * @brief HLI function that detects OOB violations in the scattered FLAT A32 memory access 00156 * @copydetails CheckA64ScatteredAccess 00157 */ 00158 IGC_STACK_CALL void CheckA32ScatteredAccess(__global const MemBounds* allocatedBuffers, 00159 __global const uint32_t* addrPayload, 00160 uint32_t accessMask, 00161 __global BoundsCheckArgs* boundsCheckArgs); 00162 #if defined(__cplusplus) 00163 using CheckA32ScatteredAccessFunc = GtHliFunction<void, const MemBounds*, const uint32_t*, uint32_t, BoundsCheckArgs*>; 00164 #endif 00165 00166 /* ============================================================================================= */ 00167 // Function CheckSurfaceScatteredAccess 00168 /* ============================================================================================= */ 00169 /*! 00170 * @brief HLI function that detects OOB violations in the scattered surface (BTS/SLM/scratch) memory access 00171 * @param[in] offsets Array of offsets of accessed memory ranges within the surface space 00172 * @param[in] accessMask Per-channel mask of memory accesses 00173 * @param[in][out] boundsCheckArgs Information about memory access instruction and results of the bounds check 00174 */ 00175 IGC_STACK_CALL void CheckSurfaceScatteredAccess(__global const uint32_t* offsets, 00176 uint32_t accessMask, 00177 __global BoundsCheckArgs* boundsCheckArgs); 00178 #if defined(__cplusplus) 00179 using CheckSurfaceScatteredAccessFunc = GtHliFunction<void, const uint32_t*, uint32_t, BoundsCheckArgs*>; 00180 #endif 00181 00182 /*! 00183 * @brief HLI function that detects OOB violations in the block 2D memory access 00184 * @param[in] allocatedBuffers Array of global memory buffers available to the kernel 00185 * @param[in] block Pointer to the block 2D structure representing the accessed memory range 00186 * @param[in] accessMask Per-channel mask of memory accesses 00187 * @param[in][out] boundsCheckArgs Information about memory access instruction and results of the bounds check 00188 */ 00189 IGC_STACK_CALL void CheckBlock2DAccess(__global const MemBounds* allocatedBuffers, 00190 __global const Block2D* block, 00191 uint32_t accessMask, 00192 __global BoundsCheckArgs* boundsCheckArgs); 00193 #if defined(__cplusplus) 00194 using CheckBlock2DAccessFunc = GtHliFunction<void, const MemBounds*, const uint64_t*, uint32_t, BoundsCheckArgs*>; 00195 #endif 00196 00197 #pragma pack(pop) 00198 00199 #endif
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2024-2025 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /*! 00008 * @file A tool that detects out-of-bounds (OOB) memory accesses in kernels 00009 */ 00010 00011 #include <cstring> 00012 #include <map> 00013 #include <vector> 00014 #include <set> 00015 00016 #include "bounds_check.h" 00017 00018 #include "gtpin_api.h" 00019 #include "gtpin_tool_utils.h" 00020 #include "gen_send_decoder.h" 00021 #include "ged.h" 00022 00023 /* ============================================================================================= */ 00024 // Configuration 00025 /* ============================================================================================= */ 00026 Knob<int> KNOB_MAX_KERNEL_BUFFERS("max_kernel_buffers", 256, "Maximum number of global buffers available to the kernel"); 00027 Knob<bool> KNOB_CHECK_ACCESS_TYPE("check_access_type", false, "Check access type compatibility (readonly vs. write)"); 00028 Knob<bool> KNOB_NO_COUT("no_cout", false, "Do not send profiling results to standard output device"); 00029 00030 /* ============================================================================================= */ 00031 // Class MemAccess 00032 /* ============================================================================================= */ 00033 /// Information about memory access instruction 00034 struct MemAccess 00035 { 00036 /// Constructor. If memory access is unsupported, the reason can be queried by Error() 00037 explicit MemAccess(const IGtIns &ins, const DcSendMsg &msg); 00038 00039 bool IsValid() const { return _isValid; } ///< @return true for a supported memory access 00040 InsId Id() const { return _insId; } ///< @return Instruction ID 00041 GtAccessType AccessType() const { return _accessType; } ///< @return Access type: read, write or read-write 00042 GtRegNum FirstAddrReg() const { return _firstAddrReg; } ///< @return First register in the address payload 00043 uint32_t NumAddresses() const { return _numAddresses; } ///< @return Number of elements in the address payload 00044 uint32_t DataSize() const { return _dataSize; } ///< @return Data size, in bytes, per each address 00045 const GtMemoryAddrModel& AddrModel() const { return _addrModel; } ///< @return Address model of the memory access 00046 const std::string& Error() const { return _errMsg; } ///< @return Error message on unsupported memory access 00047 00048 /*! 00049 * @return Arguments of the HLI function that detects OOB violations in this memory access 00050 * @note This object owns the BoundsCheckArgs structure, but its content is controlled externally 00051 */ 00052 const BoundsCheckArgs& GetBoundsCheckArgs() const { return _bcArgs; } 00053 BoundsCheckArgs& GetBoundsCheckArgs() { return _bcArgs; } 00054 00055 private: 00056 bool _isValid = false; ///< True, if this structure represents supported memory access 00057 InsId _insId; ///< ID of the memory access instruction 00058 GtAccessType _accessType; ///< Access type: read-only, write-only or read-write 00059 GtMemoryAddrModel _addrModel; ///< Address model of the memory access 00060 GtRegNum _firstAddrReg; ///< First register in the address payload of the instruction 00061 uint32_t _numAddresses = 0; ///< Number of elements in the address payload of the instruction 00062 uint32_t _dataSize = 0; ///< Size, in bytes, of the memory range referenced by a single address 00063 BoundsCheckArgs _bcArgs; ///< Common arguments of bounds check functions 00064 std::string _errMsg; ///< Error message on unsupported memory access 00065 }; 00066 00067 /* ============================================================================================= */ 00068 // Struct ProfileResults 00069 /* ============================================================================================= */ 00070 /*! 00071 * Profile results per kernel dispatch / per instuction 00072 */ 00073 struct ProfileResults 00074 { 00075 ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess); 00076 00077 uint64_t dispatchId; ///< Unique ID of the kernel dispatch assigned by GTPin 00078 GtKernelExecDesc kernelExecDesc; ///< Kernel execution descriptor 00079 InsId insId; ///< ID of the memory access instruction 00080 uint32_t surfaceSize; ///< Size, in bytes, of the available memory space for a surface access 00081 uint32_t oobCount; ///< Counter of OOB violations in memory accesses performed by the instruction 00082 }; 00083 00084 /* ============================================================================================= */ 00085 // Class KernelProfile 00086 /* ============================================================================================= */ 00087 /// Static properties of the kernel, and its profile data updated on each kernel run 00088 class KernelProfile 00089 { 00090 public: 00091 using MemAccessMap = std::map<InsId, MemAccess>; ///< Information about memory accesses by kernel instructions 00092 00093 public: 00094 KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg); ///< Constructor 00095 00096 inline GtKernelId Id() const; ///< @return Unique identifier of the kernel 00097 inline GtGpuPlatform Platform() const; ///< @return Kernel's platform 00098 inline const std::string& Name() const; ///< @return Name of the kernel 00099 inline const std::string& UniqueName() const; ///< @return Unique name of the kernel 00100 inline std::string BoundsCheckResults() const; ///< @return Profiling results of kernel runs, in text format 00101 inline void DumpAsm() const; ///< Store kernel's assembly text in the file 00102 inline const MemAccessMap& GetMemAccessMap() const; ///< @return Information about memory accesses in the kernel 00103 inline MemAccessMap& GetMemAccessMap(); ///< @return Information about memory accesses in the kernel 00104 00105 void RecordBoundsCheckResults(IGtKernelDispatch& dispatcher); ///< Update profile data with the latest bounds check results 00106 00107 void RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg); ///< Record unsupported instruction 00108 private: 00109 GtKernelId _id; ///< Unique identifier of the kernel 00110 GtGpuPlatform _platform; ///< Kernel's platform 00111 std::string _name; ///< Name of the kernel 00112 std::string _uniqueName; ///< Unique name of the kernel 00113 std::string _asmText; ///< Assembly text of the kernel 00114 std::vector<std::pair<InsId, std::string>> _unsupportedInstructions; ///< Instructions that could not be instrumented 00115 00116 std::map<InsId, MemAccess> _memAccessMap; ///< Map: Instruction ID to memory access information 00117 std::list<ProfileResults> _profileResults; ///< Profile results per kernel dispatch / per instuction 00118 }; 00119 00120 /* ============================================================================================= */ 00121 // Class BoundsCheck 00122 /* ============================================================================================= */ 00123 /*! 00124 * A tool that detects out-of-bounds (OOB) memory accesses in kernels 00125 */ 00126 class BoundsCheck : public GtTool 00127 { 00128 public: 00129 // Implementation of the IGtTool interface 00130 const char* Name() const override { return "Bounds check"; } 00131 void OnKernelBuild(IGtKernelInstrument&) override; 00132 void OnKernelRun(IGtKernelDispatch&) override; 00133 void OnKernelComplete(IGtKernelDispatch&) override; 00134 00135 void LoadHliLibrary(); ///< Compile and load library of HLI functions 00136 static BoundsCheck* Instance(); ///< Return single instance of this class 00137 static void OnFini() { Instance()->Fini(); } ///< Termination handler registered with atexit() 00138 00139 private: 00140 00141 BoundsCheck(); ///< Default constructor 00142 BoundsCheck(const BoundsCheck&) = delete; ///< Disabled copy constructor 00143 BoundsCheck& operator = (const BoundsCheck&) = delete; ///< Disabled assignment operator 00144 ~BoundsCheck(); ///< Destructor 00145 void Fini(); ///< Post process and dump profiling data 00146 00147 /*! 00148 * Insert a call to HLI function that detects out-of-bounds (OOB) memory accesses in the specified instruction 00149 * @param[in] ins The memory access instruction 00150 * @param[in] memAccess Information about memory access 00151 * @param[in] instrumentor Instrumentation interface 00152 * @return true - success, false - the instruction or memory operation is not supported 00153 */ 00154 bool InsertBoundsCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor); 00155 00156 /*! 00157 * Given BTI index, return memory buffer referenced by the corresponding stateful argument. 00158 * Return empty buffer, if BTI index cannot be dereferenced 00159 */ 00160 GtMemoryBuffer GetBtiBuffer(uint32_t bti, const GtMemoryBufferConstSpan& allocatedBuffers, 00161 const IGtKernelDispatch& dispatcher); 00162 00163 /// @return Pointer to a buffer in the 'buffers' sequence that contains 'ptr', or nullptr if buffer is not found 00164 const GtMemoryBuffer* FindBuffer(const GtMemoryBufferConstSpan& buffers, uintptr_t ptr); 00165 00166 private: 00167 // Bounds check functions 00168 CheckA64AccessFunc _checkA64AccessFunc; 00169 CheckA32AccessFunc _checkA32AccessFunc; 00170 CheckSurfaceAccessFunc _checkSurfaceAccessFunc; 00171 CheckA64ScatteredAccessFunc _checkA64ScatteredAccessFunc; 00172 CheckA32ScatteredAccessFunc _checkA32ScatteredAccessFunc; 00173 CheckSurfaceScatteredAccessFunc _checkSurfaceScatteredAccessFunc; 00174 CheckBlock2DAccessFunc _checkBlock2DAccessFunc; 00175 00176 /// Global memory buffers available to the kernel; shared and passed as an argument to bounds check functions 00177 MemBounds* _allocatedBuffers = nullptr; ///< All allocated buffers 00178 MemBounds* _writableBuffers = nullptr; ///< Allocated writable buffers 00179 00180 IGtHliModuleHandle _hliModule = nullptr; ///< Module of HLI functions 00181 std::map<GtKernelId, KernelProfile> _kernels; ///< Collection of kernel profiles 00182 }; 00183 00184 /* ============================================================================================= */ 00185 // MemAccess implementation 00186 /* ============================================================================================= */ 00187 MemAccess::MemAccess(const IGtIns &ins, const DcSendMsg &msg) : _insId(ins.Id()) 00188 { 00189 00190 00191 // Get and check data port (SFID) 00192 GtSfid sfid = ins.Sfid(); 00193 bool isHdc = (sfid == GED_SFID_DP_DC0) || (sfid == GED_SFID_DP_DC1); 00194 if ((sfid != GED_SFID_UGM) && (sfid != GED_SFID_SLM) && !isHdc) 00195 { 00196 _errMsg = "Unsupported data port " + std::string(sfid.ToString()); 00197 return; 00198 } 00199 00200 // Retrieve message descriptor 00201 if (!ins.MsgDescRegFile().IsImm()) 00202 { 00203 _errMsg = "SEND message descriptor is not immediate"; 00204 return; 00205 } 00206 00207 // Check opcode of the memory operation 00208 GED_DP_OPCODE opcode = ins.DPOpCode(); 00209 bool isLoadLsc = (opcode == GED_DP_OPCODE_LOAD) || (opcode == GED_DP_OPCODE_LOAD_2D_BLOCK); 00210 bool isStoreLsc = (opcode == GED_DP_OPCODE_STORE) || (opcode == GED_DP_OPCODE_STORE_2D_BLOCK); 00211 bool isAtomic = ins.IsAtomic(); 00212 bool isLoad = isLoadLsc || (isHdc && !isAtomic && ins.HasDstOperand() && ins.DstRegFile().IsGrf()); 00213 bool isStore = isStoreLsc || (isHdc && !isAtomic && ins.DstOperand().Reg().IsNullReg()); 00214 00215 if (!isLoad && !isStore && !isAtomic) 00216 { 00217 _errMsg = "Unsupported SEND operation (not load/store/atomic)"; 00218 return; 00219 } 00220 00221 // Initialize address model 00222 _addrModel = ins.MemAddrModel(); 00223 if (!_addrModel.IsValid() || _addrModel.IsBss()) 00224 { 00225 _errMsg = "Unsupported/unknown address model"; // @fixme Support BSS model 00226 return; 00227 } 00228 00229 // Finally, initialize the rest of data members... 00230 GTPIN_ASSERT(ins.SrcRegFile(0).IsGrf()); 00231 _firstAddrReg = ins.SrcRegOperand(0).Reg().RegNum(); 00232 _numAddresses = ins.NumAccesses(); GTPIN_ASSERT(_numAddresses != 0); 00233 _dataSize = msg.ElementSize() * msg.NumElements(); GTPIN_ASSERT(_dataSize != 0); 00234 _accessType = (isLoad ? GT_ACCESS_READ : (isStore ? GT_ACCESS_WRITE : GT_ACCESS_READ_WRITE)); 00235 00236 _isValid = true; 00237 } 00238 00239 /* ============================================================================================= */ 00240 // ProfileResults implementation 00241 /* ============================================================================================= */ 00242 ProfileResults::ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess) : 00243 dispatchId(dispatcher.DispatchId()), insId(memAccess.Id()), 00244 surfaceSize(memAccess.GetBoundsCheckArgs().in.surfaceSize), 00245 oobCount(memAccess.GetBoundsCheckArgs().out.oobCount) 00246 { 00247 dispatcher.GetExecDescriptor(kernelExecDesc); 00248 } 00249 00250 /* ============================================================================================= */ 00251 // KernelProfile implementation 00252 /* ============================================================================================= */ 00253 KernelProfile::KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg) : 00254 _id(kernel.Id()), _platform(kernel.GpuPlatform()), _name(GlueString(kernel.Name())), _uniqueName(kernel.UniqueName()), 00255 _asmText(CfgAsmText(cfg)) 00256 { 00257 // Populate this object with the information about memory accesses 00258 for (auto bblPtr : cfg.Bbls()) 00259 { 00260 for (auto insPtr : bblPtr->Instructions()) 00261 { 00262 const IGtIns& ins = *insPtr; 00263 const DcSendMsg msg = DcSendMsg::Decode(ins.GetGedIns()); 00264 // Exclude EOT and fence instructions; fence is used for synchronization, not direct memory access. 00265 if (ins.IsMemAccess() && !ins.IsEot() && !msg.IsMemFence()) 00266 { 00267 MemAccess memAccess(ins, msg); 00268 if (memAccess.IsValid()) 00269 { 00270 _memAccessMap.emplace(ins.Id(), memAccess); 00271 } 00272 else 00273 { 00274 RecordUnsupportedInstruction(ins, memAccess.Error()); 00275 } 00276 } 00277 } 00278 } 00279 } 00280 00281 GtKernelId KernelProfile::Id() const { return _id; } 00282 GtGpuPlatform KernelProfile::Platform() const { return _platform; } 00283 const std::string& KernelProfile::Name() const { return _name; } 00284 const std::string& KernelProfile::UniqueName() const { return _uniqueName; } 00285 void KernelProfile::DumpAsm() const { DumpKernelAsmText(_name, _uniqueName, _asmText); } 00286 const KernelProfile::MemAccessMap& KernelProfile::GetMemAccessMap() const { return _memAccessMap; } 00287 KernelProfile::MemAccessMap& KernelProfile::GetMemAccessMap() { return _memAccessMap; } 00288 00289 std::string KernelProfile::BoundsCheckResults() const 00290 { 00291 std::ostringstream os; 00292 00293 os << std::string(120, '-') << std::endl; 00294 os << std::setw(4) << _id << ": " << _name << "___" << _uniqueName << std::endl; 00295 os << std::string(120, '-') << std::endl; 00296 00297 // Print suspicious instructions explanation (zero surface size), if needed 00298 bool printSuspicious = false; 00299 for (const auto& res : _profileResults) 00300 { 00301 if (res.oobCount != 0 && res.surfaceSize == 0) 00302 { 00303 os << std::endl << "Note: Z marks out-of-bounds accesses to zero-sized surfaces (stateful/BTS, bindless/SS, or SLM)." << std::endl << std::endl; 00304 printSuspicious = true; 00305 break; 00306 } 00307 } 00308 00309 uint32_t oobTotal = 0; 00310 for (const auto& res : _profileResults) 00311 { 00312 if (res.oobCount != 0) 00313 { 00314 if (oobTotal == 0) 00315 { 00316 os << std::setw(20) << "Dispatch ID" << std::setw(20) << "Instruction ID" << std::setw(20) << "#OOB violations"; 00317 os << " " << std::setw(45) << "Execution descriptor"; 00318 if (printSuspicious) 00319 { 00320 os << std::setw(20) << "0-sized Surface"; 00321 } 00322 os << std::endl; 00323 os << std::string(printSuspicious ? 140 : 120, '-') << std::endl; 00324 } 00325 os << std::setw(20) << res.dispatchId << std::setw(20) << res.insId << std::setw(20) << res.oobCount; 00326 os << " " << std::setw(45) << res.kernelExecDesc.ToString(_platform, ExecDescAlignedFormat()); 00327 if (printSuspicious && res.surfaceSize == 0) 00328 { 00329 os << std::setw(20) << "Z"; 00330 } 00331 else 00332 { 00333 os << std::setw(20) << " "; 00334 } 00335 os << std::endl; 00336 oobTotal += res.oobCount; 00337 } 00338 } 00339 if (oobTotal == 0) 00340 { 00341 os << "No OOB accesses detected" << std::endl; 00342 } 00343 00344 // Print unsupported instructions 00345 if (!_unsupportedInstructions.empty()) 00346 { 00347 os << std::endl; 00348 os << " ---------------------------" << std::endl; 00349 os << " Unsupported memory accesses:" << std::endl; 00350 os << " ---------------------------" << std::endl; 00351 for (const auto& entry : _unsupportedInstructions) 00352 { 00353 os << entry.second << ": Instruction ID: [" << std::setw(3) << entry.first << "]" << std::endl; 00354 } 00355 os << std::string(120, '-') << std::endl; 00356 } 00357 00358 return os.str(); 00359 } 00360 00361 void KernelProfile::RecordBoundsCheckResults(IGtKernelDispatch& dispatcher) 00362 { 00363 for (auto& entry: _memAccessMap) 00364 { 00365 MemAccess& memAccess = entry.second; 00366 _profileResults.emplace_back(dispatcher, memAccess); 00367 } 00368 } 00369 00370 void KernelProfile::RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg) 00371 { 00372 if (!errMsg.empty()) 00373 { 00374 _unsupportedInstructions.emplace_back(ins.Id(), errMsg); 00375 } 00376 } 00377 00378 /* ============================================================================================= */ 00379 // BoundsCheck implementation 00380 /* ============================================================================================= */ 00381 BoundsCheck::BoundsCheck() : _checkA64AccessFunc("CheckA64Access"), 00382 _checkA32AccessFunc("CheckA32Access"), 00383 _checkSurfaceAccessFunc("CheckSurfaceAccess"), 00384 _checkA64ScatteredAccessFunc("CheckA64ScatteredAccess"), 00385 _checkA32ScatteredAccessFunc("CheckA32ScatteredAccess"), 00386 _checkSurfaceScatteredAccessFunc("CheckSurfaceScatteredAccess"), 00387 _checkBlock2DAccessFunc("CheckBlock2DAccess") 00388 { 00389 size_t size = MemBounds::SizeOf(KNOB_MAX_KERNEL_BUFFERS); 00390 #if (_GTPIN_CPP_STD >= 17) 00391 constexpr std::align_val_t alignment { alignof(MemBounds) }; 00392 _allocatedBuffers = reinterpret_cast<MemBounds*>(operator new [](size, alignment)); 00393 _writableBuffers = reinterpret_cast<MemBounds*>(operator new [](size, alignment)); 00394 #else 00395 _allocatedBuffers = reinterpret_cast<MemBounds*>(operator new [](size)); 00396 _writableBuffers = reinterpret_cast<MemBounds*>(operator new [](size)); 00397 #endif 00398 } 00399 00400 BoundsCheck::~BoundsCheck() 00401 { 00402 #if (_GTPIN_CPP_STD >= 17) 00403 constexpr std::align_val_t alignment { alignof(MemBounds) }; 00404 operator delete [](_allocatedBuffers, alignment); 00405 operator delete [](_writableBuffers, alignment); 00406 #else 00407 operator delete [](_allocatedBuffers); 00408 operator delete [](_writableBuffers); 00409 #endif 00410 } 00411 00412 BoundsCheck* BoundsCheck::Instance() 00413 { 00414 static BoundsCheck instance; 00415 return &instance; 00416 } 00417 00418 void BoundsCheck::OnKernelBuild(IGtKernelInstrument& instrumentor) 00419 { 00420 const IGtKernel& kernel = instrumentor.Kernel(); 00421 const IGtCfg& cfg = instrumentor.Cfg(); 00422 IGtMemoryMapper& memMapper = instrumentor.MemoryMapper(); 00423 00424 // Create profile for this kernel 00425 auto result = _kernels.emplace(std::piecewise_construct, 00426 std::forward_as_tuple(instrumentor.Kernel().Id()), 00427 std::forward_as_tuple(kernel, cfg)); 00428 KernelProfile& kernelProfile = result.first->second; 00429 00430 // Handle common instrumentation knobs 00431 HandleCommonInstrumnetationKnobs(instrumentor); 00432 // Instrument memory accesses and share per-access arguments with HLI functions 00433 for (const auto& entry : kernelProfile.GetMemAccessMap()) 00434 { 00435 const auto& memAccess = entry.second; 00436 auto insId = entry.first; 00437 00438 if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId)) 00439 { 00440 continue; 00441 } 00442 const IGtIns& ins = cfg.GetInstruction(insId); 00443 InsertBoundsCheck(ins, memAccess, instrumentor); 00444 00445 // Share per-access HLI arguments. 00446 // They will be initialized at the start of the kernel, and copied back to the host memory at completion of the kernel 00447 memMapper.Map(memAccess.GetBoundsCheckArgs(), GT_MMAP_SHARE); 00448 } 00449 00450 // Map the array of global buffers available to kernel. This is an HLI argument common to all global memory accesses. 00451 // The array will be populated at the start of the kernel. 00452 uint32_t sizeOfMemBounds = (uint32_t)MemBounds::SizeOf(KNOB_MAX_KERNEL_BUFFERS); 00453 memMapper.Map(_allocatedBuffers, sizeOfMemBounds, GT_MMAP_NO_SHARE, alignof(MemBounds)); 00454 if (KNOB_CHECK_ACCESS_TYPE) 00455 { 00456 memMapper.Map(_writableBuffers, sizeOfMemBounds, GT_MMAP_NO_SHARE, alignof(MemBounds)); 00457 } 00458 00459 // Link the kernel with the library of HLI functions 00460 instrumentor.LinkHliModule(_hliModule); 00461 } 00462 00463 void BoundsCheck::OnKernelRun(IGtKernelDispatch& dispatcher) 00464 { 00465 const IGtKernel& kernel = dispatcher.Kernel(); 00466 if (_kernels.find(kernel.Id()) == _kernels.end()) 00467 { 00468 return; 00469 } 00470 00471 KernelProfile& kernelProfile = _kernels.at(kernel.Id()); 00472 00473 if (dispatcher.ExecStage().IsDispatch()) 00474 { 00475 GtKernelExecDesc execDesc; dispatcher.GetExecDescriptor(execDesc); 00476 if (kernel.IsInstrumented() && IsKernelExecProfileEnabled(execDesc, kernel.GpuPlatform(), kernel.Name().Get())) 00477 { 00478 dispatcher.SetProfilingMode(true); // Enable instrumentation 00479 00480 // This tool needs an accurate information about memory allocations, which is available on the final dispatch stage. 00481 // So, on the initial dispatch stage, we only enable instrumentation, and request GTPin to invoke BoundsCheck::OnKernelRun 00482 // one more time, on the final dispatch stage. If this request is accepted, the initialization of the profile buffer will 00483 // be done on the final dispatch stage, otherwise - on the initial dispatch stage. 00484 if (dispatcher.ReportFinalDispatchStage()) 00485 { 00486 return; 00487 } 00488 } 00489 else 00490 { 00491 dispatcher.SetProfilingMode(false); // Disable instrumentation 00492 return; 00493 } 00494 } 00495 00496 IGtMemoryMapper& memMapper = dispatcher.MemoryMapper(); 00497 00498 // Populate and share array of global buffers available to the kernel 00499 GtMemoryBufferConstSpan allocatedBuffers = dispatcher.GetAllocatedBuffers(); 00500 GTPIN_ASSERT_MSG(allocatedBuffers.size() <= uint32_t(KNOB_MAX_KERNEL_BUFFERS), 00501 "Number of global buffers " + ToString(allocatedBuffers.size()) + 00502 " exceeded " + ToString(KNOB_MAX_KERNEL_BUFFERS.GetValue()) + ". Increase max_kernel_buffers value"); 00503 00504 _allocatedBuffers->numRanges = 0; 00505 for (const auto& buffer : allocatedBuffers) 00506 { 00507 _allocatedBuffers->ranges[_allocatedBuffers->numRanges++] = buffer.Range(); 00508 } 00509 memMapper.Write(_allocatedBuffers, (uint32_t)_allocatedBuffers->SizeOf()); 00510 00511 if (KNOB_CHECK_ACCESS_TYPE) 00512 { 00513 _writableBuffers->numRanges = 0; 00514 for (const auto& buffer : allocatedBuffers) 00515 { 00516 if (buffer.Access().IsWrite()) 00517 { 00518 _writableBuffers->ranges[_writableBuffers->numRanges++] = buffer.Range(); 00519 } 00520 } 00521 memMapper.Write(_writableBuffers, (uint32_t)_writableBuffers->SizeOf()); 00522 } 00523 00524 // Initialize per-access arguments of HLI functions 00525 for (auto& entry: kernelProfile.GetMemAccessMap()) 00526 { 00527 auto insId = entry.first; 00528 00529 if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId)) 00530 { 00531 continue; 00532 } 00533 00534 MemAccess& memAccess = entry.second; 00535 GtMemoryAddrModel addrModel = memAccess.AddrModel(); 00536 BoundsCheckArgs& bcArgs = memAccess.GetBoundsCheckArgs(); 00537 00538 bcArgs.out.oobCount = 0; 00539 bcArgs.in.dataSize = memAccess.DataSize(); 00540 bcArgs.in.numAddresses = memAccess.NumAddresses(); 00541 00542 // Compute size of the available memory space for BTS, SLM and scratch accesses 00543 bcArgs.in.surfaceSize = UINT32_MAX; 00544 if (addrModel.IsSlm()) 00545 { 00546 bcArgs.in.surfaceSize = dispatcher.SlmSize(); 00547 } 00548 else if (addrModel.IsSurfaceState()) 00549 { 00550 bcArgs.in.surfaceSize = dispatcher.ScratchSpaceSize(); // Assuming SS address model used in scratch access only 00551 } 00552 else if (addrModel.IsBts()) 00553 { 00554 GtMemoryBuffer buffer = GetBtiBuffer(addrModel.Bti(), allocatedBuffers, dispatcher); 00555 if (!buffer.IsEmpty() && (!KNOB_CHECK_ACCESS_TYPE || buffer.Access().Includes(memAccess.AccessType()))) 00556 { 00557 bcArgs.in.surfaceSize = (uint32_t)buffer.Size(); GTPIN_ASSERT(buffer.Size() <= UINT32_MAX); 00558 } 00559 } 00560 00561 memMapper.Write(&bcArgs, sizeof(bcArgs)); 00562 } 00563 } 00564 00565 void BoundsCheck::OnKernelComplete(IGtKernelDispatch& dispatcher) 00566 { 00567 if (dispatcher.IsProfilingEnabled()) 00568 { 00569 KernelProfile& kernelProfile = _kernels.at(dispatcher.Kernel().Id()); 00570 kernelProfile.RecordBoundsCheckResults(dispatcher); 00571 } 00572 } 00573 00574 bool BoundsCheck::InsertBoundsCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor) 00575 { 00576 GTPIN_ASSERT(memAccess.IsValid() && (memAccess.Id() == ins.Id())); 00577 00578 uint32_t numAddresses = memAccess.NumAddresses(); 00579 if (numAddresses == 0) 00580 { 00581 return false; // Nothing to check 00582 } 00583 00584 const IGtKernel& kernel = instrumentor.Kernel(); 00585 const IGtGenModel& genModel = kernel.GenModel(); 00586 uint32_t regSize = genModel.GrfRegSize(); 00587 const GtMemoryAddrModel& addrModel = memAccess.AddrModel(); 00588 uint32_t addrSize = addrModel.PtrSize(); 00589 GtReg firstReg = GrfReg(memAccess.FirstAddrReg(), 0, regSize); 00590 uint32_t numRegs = RoundUp(memAccess.NumAddresses() * addrSize, regSize) / regSize; 00591 BoundsCheckArgs* checkArgs = const_cast<BoundsCheckArgs*>(&memAccess.GetBoundsCheckArgs()); 00592 bool checkWritableBuffers = (KNOB_CHECK_ACCESS_TYPE && memAccess.AccessType().IsWrite()); 00593 MemBounds* globalBuffers = (checkWritableBuffers ? _writableBuffers : _allocatedBuffers); 00594 IargConstGrfRange addrPayload(firstReg.RegNum(), numRegs); 00595 IargInsOpMask accessMask(ins); 00596 00597 if (addrModel.IsA64()) 00598 { 00599 GTPIN_ASSERT(addrSize == sizeof(uint64_t)); 00600 00601 if(ins.IsBlock2DAccess()) 00602 { 00603 _checkBlock2DAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00604 NullReg(), // Unused return value 00605 globalBuffers, // arg[0]: Available global buffers 00606 addrPayload, // arg[1]: Address payload 00607 accessMask, // arg[2]: Per-channel mask of memory accesses 00608 checkArgs // arg[3]: Bounds check arguments 00609 ); 00610 } 00611 else if (numAddresses == 1) 00612 { 00613 _checkA64AccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00614 NullReg(), // Unused return value 00615 globalBuffers, // arg[0]: Available global buffers 00616 firstReg, // arg[1]: Base address of the accessed memory range 00617 accessMask, // arg[2]: Per-channel mask of memory accesses 00618 checkArgs // arg[3]: Bounds check arguments 00619 ); 00620 } 00621 else 00622 { 00623 _checkA64ScatteredAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00624 NullReg(), // Unused return value 00625 globalBuffers, // arg[0]: Available global buffers 00626 addrPayload, // arg[1]: Base addresses of accessed memory ranges 00627 accessMask, // arg[2]: Per-channel mask of memory accesses 00628 checkArgs // arg[3]: Bounds check arguments 00629 ); 00630 } 00631 } 00632 else if (addrModel.IsA32()) 00633 { 00634 GTPIN_ASSERT(addrSize == sizeof(uint32_t)); 00635 00636 if (numAddresses == 1) 00637 { 00638 _checkA32AccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00639 NullReg(), // Unused return value 00640 globalBuffers, // arg[0]: Available global buffers 00641 firstReg, // arg[1]: Base address of the accessed memory range 00642 accessMask, // arg[2]: Per-channel mask of memory accesses 00643 checkArgs // arg[3]: Bounds check arguments 00644 ); 00645 } 00646 else 00647 { 00648 _checkA32ScatteredAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00649 NullReg(), // Unused return value 00650 globalBuffers, // arg[0]: Available global buffers 00651 addrPayload, // arg[1]: Base addresses of accessed memory ranges 00652 accessMask, // arg[2]: Per-channel mask of memory accesses 00653 checkArgs // arg[3]: Bounds check arguments 00654 ); 00655 } 00656 } 00657 else 00658 { 00659 GTPIN_ASSERT(addrSize == sizeof(uint32_t)); 00660 GTPIN_ASSERT_MSG(addrModel.IsBts() || addrModel.IsSlm() || addrModel.IsBss() || addrModel.IsSurfaceState(), 00661 "Unknown address model: " + addrModel.ToString()); 00662 00663 if (numAddresses == 1) 00664 { 00665 _checkSurfaceAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00666 NullReg(), // Unused return value 00667 firstReg, // arg[0]: Base address of the accessed memory range 00668 accessMask, // arg[1]: Per-channel mask of memory accesses 00669 checkArgs // arg[2]: Bounds check arguments 00670 ); 00671 } 00672 else 00673 { 00674 _checkSurfaceScatteredAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00675 NullReg(), // Unused return value 00676 addrPayload, // arg[0]: Base addresses of accessed memory ranges 00677 accessMask, // arg[1]: Per-channel mask of memory accesses 00678 checkArgs // arg[2]: Bounds check arguments 00679 ); 00680 } 00681 } 00682 return true; 00683 } 00684 00685 GtMemoryBuffer BoundsCheck::GetBtiBuffer(uint32_t bti, const GtMemoryBufferConstSpan& allocatedBuffers, 00686 const IGtKernelDispatch& dispatcher) 00687 { 00688 Uint32Validatable<> argIndex; 00689 UintPtrValidatable<> argPtr; 00690 00691 for (const auto& arg : dispatcher.Kernel().PayloadArguments()) 00692 { 00693 if (arg.IsMemoryPointer() && arg.IsExplicit() && arg.addrModel.IsValid() && (arg.addrModel.Bti() == bti)) 00694 { 00695 ConstByteSpan argValue = dispatcher.GetPayloadArgumentValue(arg.index); 00696 if (argValue.size() == sizeof(uintptr_t)) 00697 { 00698 argPtr = *reinterpret_cast<const uintptr_t*>(argValue.data()); 00699 argIndex = arg.index; 00700 } 00701 } 00702 } 00703 if (!argPtr.IsValid()) 00704 { 00705 return {}; 00706 } 00707 00708 const GtMemoryBuffer* buffer = FindBuffer(allocatedBuffers, argPtr); 00709 if (!buffer) 00710 { 00711 return {}; 00712 } 00713 00714 // IF buffer_offset is specified THEN 00715 // BTI_base == allocation_base_of(arg_value), AND BTI_size = allocation_size_of(arg_value) 00716 // ELSE 00717 // BTI_base == arg_value, AND BTI_size = allocation_size_of(arg_value) - (arg_value - allocation_base_of(arg_value)) 00718 00719 for (const auto& arg : dispatcher.Kernel().PayloadArguments()) 00720 { 00721 if ((arg.index == argIndex) && (std::strcmp(arg.type, "buffer_offset") == 0)) 00722 { 00723 return *buffer; 00724 } 00725 } 00726 00727 size_t offset = argPtr - buffer->Base(); GTPIN_ASSERT(offset <= buffer->Size()); 00728 return GtMemoryBuffer(argPtr, buffer->Size() - offset, buffer->Access()); 00729 } 00730 00731 const GtMemoryBuffer* BoundsCheck::FindBuffer(const GtMemoryBufferConstSpan& buffers, uintptr_t ptr) 00732 { 00733 for (const auto& buffer : buffers) 00734 { 00735 if (buffer.Range().Contains(ptr)) 00736 { 00737 return &buffer; 00738 } 00739 } 00740 return nullptr; 00741 } 00742 00743 void BoundsCheck::LoadHliLibrary() 00744 { 00745 std::string modulePath = JoinPath(GetKnobValue<std::string>("installDir"), "Examples", "bounds_check.cl"); 00746 _hliModule = GTPin_GetCore()->HliLibrary().CompileModuleFromFile(modulePath.c_str()); 00747 GTPIN_ASSERT_MSG(_hliModule != nullptr, "Could not load HLI module " + modulePath); 00748 } 00749 00750 void BoundsCheck::Fini() 00751 { 00752 std::string str; 00753 00754 // Dump profiling results and assembly code of all kernels 00755 for (const auto& entry : _kernels) 00756 { 00757 const auto& kernelProfile = entry.second; 00758 str += kernelProfile.BoundsCheckResults(); 00759 kernelProfile.DumpAsm(); 00760 } 00761 00762 std::ofstream fs(JoinPath(GTPin_GetCore()->ProfileDir(), "bounds_check.txt")); 00763 GTPIN_ASSERT(fs.is_open()); 00764 fs << str; 00765 00766 if (!KNOB_NO_COUT) 00767 { 00768 std::cout << str; 00769 } 00770 } 00771 00772 /* ============================================================================================= */ 00773 // GTPin_Entry 00774 /* ============================================================================================= */ 00775 EXPORT_C_FUNC void GTPin_Entry(int argc, const char *argv[]) 00776 { 00777 // Parse command line and configure GTPin 00778 ConfigureGTPin(argc, argv); 00779 00780 // Register the tool (callbacks) with the GTPin core 00781 BoundsCheck::Instance()->Register(); 00782 00783 // Compile and load library of HLI functions 00784 BoundsCheck::Instance()->LoadHliLibrary(); 00785 00786 // Register the termination function 00787 atexit(BoundsCheck::OnFini); 00788 }
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2024-2026 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /*! 00008 * @file Library of High-Level Instrumentation (HLI) functions used by the bounds_check tool 00009 */ 00010 00011 #include "hlif_basic_defs.h" 00012 #include "bounds_check.h" 00013 00014 /*! 00015 * @brief HLI function that detects OOB violations in the FLAT A64 memory access 00016 * @see bounds_check.h for details 00017 */ 00018 IGC_STACK_CALL void CheckA64Access(__global const MemBounds* allocatedBuffers, 00019 uint64_t addr, 00020 uint32_t accessMask, 00021 __global BoundsCheckArgs* boundsCheckArgs) 00022 { 00023 if ((accessMask & 0x1) != 0) 00024 { 00025 uint32_t numRanges = allocatedBuffers->numRanges; 00026 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00027 00028 uint32_t rIdx = 0; 00029 for (; rIdx != numRanges; ++rIdx) 00030 { 00031 __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]); 00032 uintptr_t base = range->base; 00033 uintptr_t end = base + range->size; 00034 if ((addr >= base) && (addr + dataSize <= end)) 00035 { 00036 return; 00037 } 00038 } 00039 atomic_inc(&(boundsCheckArgs->out.oobCount)); 00040 } 00041 } 00042 00043 /*! 00044 * @brief HLI function that detects OOB violations in the FLAT A32 memory access 00045 * @see bounds_check.h for details 00046 */ 00047 IGC_STACK_CALL void CheckA32Access(__global const MemBounds* allocatedBuffers, 00048 uint32_t addr, 00049 uint32_t accessMask, 00050 __global BoundsCheckArgs* boundsCheckArgs) 00051 { 00052 if ((accessMask & 0x1) != 0) 00053 { 00054 uint32_t numRanges = allocatedBuffers->numRanges; 00055 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00056 00057 uint32_t rIdx = 0; 00058 for (; rIdx != numRanges; ++rIdx) 00059 { 00060 __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]); 00061 uintptr_t base = range->base; 00062 uintptr_t end = base + range->size; 00063 if ((addr >= base) && (addr + dataSize <= end)) 00064 { 00065 return; 00066 } 00067 } 00068 atomic_inc(&(boundsCheckArgs->out.oobCount)); 00069 } 00070 } 00071 00072 /*! 00073 * @brief HLI function that detects OOB violations in the surface (BTS/SLM/scratch) memory access 00074 * @see bounds_check.h for details 00075 */ 00076 IGC_STACK_CALL void CheckSurfaceAccess(uint32_t offset, uint32_t accessMask, __global BoundsCheckArgs* boundsCheckArgs) 00077 { 00078 if ((accessMask & 0x1) != 0) 00079 { 00080 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00081 uint32_t surfaceSize = boundsCheckArgs->in.surfaceSize; 00082 if (offset + dataSize > surfaceSize) 00083 { 00084 atomic_inc(&(boundsCheckArgs->out.oobCount)); 00085 } 00086 } 00087 } 00088 00089 /*! 00090 * @brief HLI function that detects OOB violations in the scattered FLAT A64 memory access 00091 * @see bounds_check.h for details 00092 */ 00093 IGC_STACK_CALL void CheckA64ScatteredAccess(__global const MemBounds* allocatedBuffers, 00094 __global const uint64_t* addrPayload, 00095 uint32_t accessMask, 00096 __global BoundsCheckArgs* boundsCheckArgs) 00097 { 00098 if (accessMask != 0) 00099 { 00100 uint32_t numRanges = allocatedBuffers->numRanges; 00101 uint32_t numAddresses = boundsCheckArgs->in.numAddresses; 00102 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00103 00104 uint32_t oobCount = 0; 00105 00106 for (uint32_t aIdx = 0; aIdx != numAddresses; ++aIdx) 00107 { 00108 if ((accessMask & (0x1 << aIdx)) != 0) 00109 { 00110 uint64_t addr = addrPayload[aIdx]; 00111 uint32_t rIdx = 0; 00112 for (; rIdx != numRanges; ++rIdx) 00113 { 00114 __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]); 00115 uintptr_t base = range->base; 00116 uintptr_t end = base + range->size; 00117 if ((addr >= base) && (addr + dataSize <= end)) 00118 { 00119 break; 00120 } 00121 } 00122 if (rIdx == numRanges) 00123 { 00124 oobCount++; 00125 } 00126 } 00127 } 00128 if (oobCount) 00129 { 00130 atomic_add(&(boundsCheckArgs->out.oobCount), oobCount); 00131 } 00132 } 00133 } 00134 00135 /*! 00136 * @brief HLI function that detects OOB violations in the scattered FLAT A32 memory access 00137 * @see bounds_check.h for details 00138 */ 00139 IGC_STACK_CALL void CheckA32ScatteredAccess(__global const MemBounds* allocatedBuffers, 00140 __global const uint32_t* addrPayload, 00141 uint32_t accessMask, 00142 __global BoundsCheckArgs* boundsCheckArgs) 00143 { 00144 if (accessMask != 0) 00145 { 00146 uint32_t numRanges = allocatedBuffers->numRanges; 00147 uint32_t numAddresses = boundsCheckArgs->in.numAddresses; 00148 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00149 00150 uint32_t oobCount = 0; 00151 00152 for (uint32_t aIdx = 0; aIdx != numAddresses; ++aIdx) 00153 { 00154 if ((accessMask & (0x1 << aIdx)) != 0) 00155 { 00156 uint32_t addr = addrPayload[aIdx]; 00157 uint32_t rIdx = 0; 00158 for (; rIdx != numRanges; ++rIdx) 00159 { 00160 __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]); 00161 uintptr_t base = range->base; 00162 uintptr_t end = base + range->size; 00163 if ((addr >= base) && (addr + dataSize <= end)) 00164 { 00165 break; 00166 } 00167 } 00168 if (rIdx == numRanges) 00169 { 00170 oobCount++; 00171 } 00172 } 00173 } 00174 if (oobCount) 00175 { 00176 atomic_add(&(boundsCheckArgs->out.oobCount), oobCount); 00177 } 00178 } 00179 } 00180 00181 /*! 00182 * @brief HLI function that detects OOB violations in the scattered surface (BTS/SLM/scratch) memory access 00183 * @see bounds_check.h for details 00184 */ 00185 IGC_STACK_CALL void CheckSurfaceScatteredAccess( __global const uint32_t* offsets, 00186 uint32_t accessMask, 00187 __global BoundsCheckArgs* boundsCheckArgs) 00188 { 00189 if (accessMask != 0) 00190 { 00191 uint32_t numAddresses = boundsCheckArgs->in.numAddresses; 00192 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00193 uint32_t surfaceSize = boundsCheckArgs->in.surfaceSize; 00194 00195 for (uint32_t aIdx = 0; aIdx != numAddresses; ++aIdx) 00196 { 00197 if (((accessMask & (0x1 << aIdx)) != 0) && (offsets[aIdx] + dataSize > surfaceSize)) 00198 { 00199 atomic_inc(&(boundsCheckArgs->out.oobCount)); 00200 return; 00201 } 00202 } 00203 } 00204 } 00205 00206 /*! 00207 * @brief HLI function that detects OOB violations in 2D block memory access 00208 * 00209 * @param[in] allocatedBuffers Pointer to the memory bounds structure containing allocated ranges 00210 * @param[in] block Pointer to the 2D block structure containing block dimensions and surface details 00211 * @param[in] accessMask Mask Per-channel mask of memory accesses 00212 * @param[in,out] boundsCheckArgs Pointer to the bounds check arguments structure 00213 * 00214 */ 00215 IGC_STACK_CALL void CheckBlock2DAccess(__global const MemBounds* allocatedBuffers, 00216 __global const Block2D* block, 00217 uint32_t accessMask, 00218 __global BoundsCheckArgs* boundsCheckArgs) 00219 { 00220 if ((accessMask & 0x1) == 0) 00221 { 00222 return; 00223 } 00224 00225 uint32_t numRanges = allocatedBuffers->numRanges; 00226 00227 uint64_t surfaceBaseAddress = block->surface_base_address; 00228 uint32_t surfaceHeight = (block->surface_height & 0x00FFFFFF) + 1; 00229 uint32_t surfaceWidth = (block->surface_width & 0x00FFFFFF) + 1; 00230 uint32_t surfacePitch = (block->surface_pitch & 0x00FFFFFF) + 1; 00231 uint32_t numBlocks = (block->array_length + 1); 00232 uint32_t blockHeight = (block->block_height + 1); 00233 uint32_t blockWidth = (block->block_width + 1); 00234 int32_t blockStartX = block->block_start_x; 00235 int32_t blockStartY = block->block_start_y; 00236 00237 uint32_t dataSize = boundsCheckArgs->in.dataSize; 00238 00239 uint32_t lastX = blockStartX + numBlocks * blockWidth - 1; 00240 if (lastX * dataSize >= surfaceWidth) 00241 { 00242 lastX = (surfaceWidth / dataSize) - 1; 00243 } 00244 00245 uint32_t lastY = blockStartY + blockHeight - 1; 00246 if (lastY >= surfaceHeight) 00247 { 00248 lastY = surfaceHeight - 1; 00249 } 00250 00251 // Calculate the first and last byte address of the blocks being accessed 00252 uint64_t firstByteAddr = surfaceBaseAddress + blockStartY * surfacePitch + blockStartX * dataSize; 00253 uint64_t lastByteAddr = surfaceBaseAddress + lastY * surfacePitch + lastX * dataSize -1; 00254 00255 // Check if the access is within any single range 00256 for (uint32_t rIdx = 0; rIdx < numRanges; ++rIdx) 00257 { 00258 __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]); 00259 uintptr_t rangeBase = range->base; 00260 uintptr_t rangeEnd = rangeBase + range->size; 00261 00262 if ((firstByteAddr >= rangeBase) && (lastByteAddr < rangeEnd)) 00263 { 00264 return; 00265 } 00266 } 00267 atomic_inc(&(boundsCheckArgs->out.oobCount)); 00268 }
(Back to the list of all GTPin Sample Tools)
Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT
1.7.4