|
GTPin
|
The Memory_check is a GTPin tool for profiling uninitialized memory read accesses. The tool leverages High Level Instrumentation interface (HLIF).
To run Memory_check tool use the following command:
Profilers/Bin/gtpin -t memory_check [memory_check args] [GTPin args] -- app [application args]
The following output example shows the results of a kernel profiling.
------------------------------------------------------------------------------------------------------------------------
0: read___CS_asm53eca482ddd1b38a_simd32_53eca482ddd1b38a_0
------------------------------------------------------------------------------------------------------------------------
Dispatch ID Instruction ID #UMR violations Execution descriptor
------------------------------------------------------------------------------------------------------------------------
0 37 46 0 3 0 0
0 38 32 0 3 0 0
Meaning that memory load instruction #37 had 46 and instruction #38 had 32 reads from uninitialized memory.
(Back to the list of all GTPin Sample Tools)
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2024-2026 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /*! 00008 * @file A tool that detects uninitialized memory read accesses in kernels 00009 */ 00010 00011 #ifndef MEMORY_CHECK_H_ 00012 #define MEMORY_CHECK_H_ 00013 00014 #include "hlif_basic_defs.h" 00015 00016 #if defined(__cplusplus) 00017 #include "gtpin_api.h" 00018 using namespace gtpin; 00019 #endif 00020 00021 #pragma pack(push, 8) 00022 00023 #define UNINITIALIZED_MEMORY_PATTERN 0xdeadbeef 00024 00025 /* ============================================================================================= */ 00026 // Struct MemoryCheckArgs 00027 /* ============================================================================================= */ 00028 /// Common arguments of HLI functions that detects uninitialized memory read accesses 00029 typedef struct MemoryCheckArgs 00030 { 00031 struct 00032 { 00033 uint32_t numAccesses; ///< Number of elements accessed by the instruction 00034 uint32_t dataSize; ///< Size, in bytes, of the memory element 00035 } in; 00036 struct 00037 { 00038 uint32_t umrCount; ///< Counter of detected uninitialized memory read accesses 00039 } out; 00040 00041 #if defined(__cplusplus) 00042 /// Constructor 00043 MemoryCheckArgs() : in({0, 0}), out({0}) {} 00044 #endif 00045 } MemoryCheckArgs; 00046 00047 00048 /* ============================================================================================= */ 00049 // Struct SlmInitArg 00050 /* ============================================================================================= */ 00051 /// Argument of HLI function that initializes SLM memory 00052 typedef struct SlmInitArg 00053 { 00054 struct 00055 { 00056 uint32_t size; ///< SLM size per working group 00057 } in; 00058 00059 #if defined(__cplusplus) 00060 /// Constructor 00061 SlmInitArg(uint32_t size = 0) : in({size}) {} 00062 #endif 00063 } SlmInitArg; 00064 00065 /* ============================================================================================= */ 00066 // Function CheckReadAccess 00067 /* ============================================================================================= */ 00068 /*! 00069 * @brief HLI function that detects uninitialized memory read accesses 00070 * Only accesses that are a multiplication of dword are supported 00071 * @param[in] dstPayload Array of data elements read by the instruction 00072 * @param[in] accessMask Per-channel mask of memory accesses 00073 * @param[in][out] memoryCheckArgs Information about memory access instruction and results of the memory check 00074 */ 00075 IGC_STACK_CALL void CheckReadAccess(__global const uint32_t* dstPayload, 00076 uint32_t accessMask, 00077 __global MemoryCheckArgs* memoryCheckArgs); 00078 #if defined(__cplusplus) 00079 using CheckReadAccessFunc = GtHliFunction<void, const uint32_t*, uint32_t, MemoryCheckArgs*>; 00080 #endif 00081 00082 /*! 00083 * @brief HLI function that detects uninitialized Shared Local Memory read accesses 00084 * Only accesses that are a multiplication of dword are supported 00085 * @param[in] offsets Array of offsets into SLM memory 00086 * @param[in] accessMask Per-channel mask of memory accesses 00087 * @param[in][out] memoryCheckArgs Information about memory access instruction and results of the memory check 00088 * @param[in] slm Pointer to the local memory 00089 */ 00090 IGC_STACK_CALL void CheckSlmReadAccess(__global const uint32_t* offsets, 00091 uint32_t accessMask, 00092 __global MemoryCheckArgs* memoryCheckArgs, 00093 __local uint32_t* slm); 00094 #if defined(__cplusplus) 00095 using CheckSlmReadAccessFunc = GtHliFunction<void, const uint32_t*, uint32_t, MemoryCheckArgs*, uint32_t>; 00096 #endif 00097 00098 /*! 00099 * @brief HLI function that detects uninitialized global memory read accesses done in A64 mode 00100 * Only accesses that are a multiplication of dword are supported 00101 * @param[in] addresses Array of 64-bit addresses 00102 * @param[in] accessMask Per-channel mask of memory accesses 00103 * @param[in][out] memoryCheckArgs Information about memory access instruction and results of the memory check 00104 */ 00105 IGC_STACK_CALL void CheckUgmA64ReadAccess(__global const uint64_t* addresses, 00106 uint32_t accessMask, 00107 __global MemoryCheckArgs* memoryCheckArgs); 00108 00109 #if defined(__cplusplus) 00110 using CheckUgmA64ReadAccessFunc = GtHliFunction<void, const uint64_t*, uint32_t, MemoryCheckArgs*>; 00111 #endif 00112 00113 /*! 00114 * @brief HLI function that initializes shared local memory 00115 * @param[in] arg Information about SLM memory 00116 * @param[in] slm Pointer to the local memory 00117 */ 00118 void InitSlm(__global const SlmInitArg* arg, __local uint32_t* slm); 00119 00120 #if defined(__cplusplus) 00121 using InitSlmFunc = GtHliFunction<void, const SlmInitArg*, uint32_t>; 00122 #endif 00123 00124 #pragma pack(pop) 00125 00126 #endif
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2024-2025 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /*! 00008 * @file A tool that detects uninitialized memory accesses in kernels 00009 * 00010 * The tool supports the following Read-Only and Read-Modify-Write accesses to global memory and SLM 00011 * 00012 * SLM: any RO and RMW access 00013 * Global memory: 00014 * - Any RO and RMW access done with A64 addressing model 00015 * - Only RO accesses done with any other addressing model 00016 */ 00017 00018 #include <cstring> 00019 #include <map> 00020 #include <algorithm> 00021 00022 #include "memory_check.h" 00023 #include "gen_send_decoder.h" 00024 00025 #include "gtpin_api.h" 00026 #include "gtpin_tool_utils.h" 00027 #include "ged.h" 00028 00029 /* ============================================================================================= */ 00030 // Configuration 00031 /* ============================================================================================= */ 00032 Knob<bool> KNOB_NO_COUT("no_cout", false, "Do not send profiling results to standard output device"); 00033 00034 /* ============================================================================================= */ 00035 // Class MemAccess 00036 /* ============================================================================================= */ 00037 /// Information about memory access instruction 00038 struct MemAccess 00039 { 00040 /// Constructor. If memory access is unsupported, the reason can be queried by Error() 00041 explicit MemAccess(const IGtIns &ins); 00042 00043 bool IsValid() const { return _isValid; } ///< @return true for a supported memory access 00044 InsId Id() const { return _insId; } ///< @return Instruction ID 00045 GtAccessType AccessType() const { return _accessType; } ///< @return Access type: read, write or read-write 00046 GtRegNum FirstDstReg() const { return _firstDstReg; } ///< @return First register in the destination payload 00047 GtRegNum FirstAddrReg() const { return _firstAddrReg; } ///< @return First register in the address payload 00048 uint32_t NumAccesses() const { return _numAccesses; } ///< @return Number of elements in the address payload 00049 uint32_t DataSize() const { return _dataSize; } ///< @return Data size, in bytes, per each address 00050 bool IsSlm() const { return _isSlm; } ///< @return true for SLM access 00051 const GtMemoryAddrModel& AddrModel() const { return _addrModel; } ///< @return Address model of the memory access 00052 const std::string& Error() const { return _errMsg; } ///< @return Error message on unsupported memory access 00053 00054 /*! 00055 * @return Arguments of the HLI function that detects OOB violations in this memory access 00056 * @note This object owns the MemoryCheckArgs structure, but its content is controlled externally 00057 */ 00058 const MemoryCheckArgs& GetMemoryCheckArgs() const { return _mcArgs; } 00059 MemoryCheckArgs& GetMemoryCheckArgs() { return _mcArgs; } 00060 00061 private: 00062 bool _isValid = false; ///< True, if this structure represents supported memory access 00063 InsId _insId; ///< ID of the memory access instruction 00064 GtAccessType _accessType; ///< Access type: read-only, write-only or read-write 00065 GtMemoryAddrModel _addrModel; ///< Address model of the memory access 00066 GtRegNum _firstDstReg; ///< First register in the address payload of the instruction 00067 GtRegNum _firstAddrReg; ///< First register in the address payload of the instruction 00068 uint32_t _numAccesses = 0; ///< Number of elements in the address payload of the instruction 00069 uint32_t _dataSize = 0; ///< Size, in bytes, of the memory range referenced by a single address 00070 MemoryCheckArgs _mcArgs; ///< Common arguments of memory check functions 00071 00072 std::string _errMsg; ///< Error message on unsupported memory access 00073 bool _isSlm = false; ///< True if the memory access is to SLM 00074 }; 00075 00076 /* ============================================================================================= */ 00077 // Struct ProfileResults 00078 /* ============================================================================================= */ 00079 /*! 00080 * Profile results per kernel dispatch / per insruction 00081 */ 00082 struct ProfileResults 00083 { 00084 ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess); 00085 00086 uint64_t dispatchId; ///< Unique ID of the kernel dispatch assigned by GTPin 00087 GtKernelExecDesc kernelExecDesc; ///< Kernel execution descriptor 00088 InsId insId; ///< ID of the memory access instruction 00089 uint32_t umrCount; ///< Counter of UMR violations in memory accesses performed by the instruction 00090 }; 00091 00092 /* ============================================================================================= */ 00093 // Class KernelProfile 00094 /* ============================================================================================= */ 00095 /// Static properties of the kernel, and its profile data updated on each kernel run 00096 class KernelProfile 00097 { 00098 public: 00099 using MemAccessMap = std::map<InsId, MemAccess>; ///< Information about memory accesses by kernel instructions 00100 00101 public: 00102 KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg); ///< Constructor 00103 00104 inline GtKernelId Id() const; ///< @return Unique identifier of the kernel 00105 inline GtGpuPlatform Platform() const; ///< @return Kernel's platform 00106 inline const std::string& Name() const; ///< @return Name of the kernel 00107 inline const std::string& UniqueName() const; ///< @return Unique name of the kernel 00108 inline std::string MemoryCheckResults() const; ///< @return Profiling results of kernel runs, in text format 00109 inline void DumpAsm() const; ///< Store kernel's assembly text in the file 00110 inline const MemAccessMap& GetMemAccessMap() const; ///< @return Information about memory accesses in the kernel 00111 inline MemAccessMap& GetMemAccessMap(); ///< @return Information about memory accesses in the kernel 00112 inline const SlmInitArg& GetSlmInitArg() const; ///< @return argument for SLM initialization function 00113 inline SlmInitArg& GetSlmInitArg(); ///< @return argument for SLM initialization function 00114 00115 void RecordMemoryCheckResults(IGtKernelDispatch& dispatcher); ///< Update profile data with the latest memory check results 00116 00117 void RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg); ///< Record unsupported instruction 00118 void RecordUnhandledAccess(InsId insId, const std::string& errMsg); ///< Record unhandled memory access 00119 00120 private: 00121 GtKernelId _id; ///< Unique identifier of the kernel 00122 GtGpuPlatform _platform; ///< Kernel's platform 00123 std::string _name; ///< Name of the kernel 00124 std::string _uniqueName; ///< Unique name of the kernel 00125 std::string _asmText; ///< Assembly text of the kernel 00126 std::string _unhandledAccesses; ///< List of unhadled memory accesses, in text format 00127 00128 SlmInitArg _slmInitMemArg; ///< Argument for SLM initialization function 00129 std::map<InsId, MemAccess> _memAccessMap; ///< Map: Instruction ID to memory access information 00130 std::list<ProfileResults> _profileResults; ///< Profile results per kernel dispatch / per insruction 00131 }; 00132 00133 /* ============================================================================================= */ 00134 // Class MemoryCheck 00135 /* ============================================================================================= */ 00136 /*! 00137 * A tool that detects uininitialized memory read (UMR) accesses in kernels 00138 */ 00139 class MemoryCheck : public GtTool 00140 { 00141 public: 00142 // Implementation of the IGtTool interface 00143 const char* Name() const override { return "Memory check"; } 00144 void OnKernelBuild(IGtKernelInstrument&) override; 00145 void OnKernelRun(IGtKernelDispatch&) override; 00146 void OnKernelComplete(IGtKernelDispatch&) override; 00147 00148 void LoadHliLibrary(); ///< Compile and load library of HLI functions 00149 static MemoryCheck* Instance(); ///< Return single instance of this class 00150 static void OnFini() { Instance()->Fini(); } ///< Termination handler registered with atexit() 00151 00152 private: 00153 00154 MemoryCheck(); ///< Default constructor 00155 MemoryCheck(const MemoryCheck&) = delete; ///< Disabled copy constructor 00156 MemoryCheck& operator = (const MemoryCheck&) = delete; ///< Disabled assignment operator 00157 ~MemoryCheck(); ///< Destructor 00158 void Fini(); ///< Post process and dump profiling data 00159 00160 /*! 00161 * Insert a call to HLI function that detects uininitialized memory read (UMR) memory accesses in the specified instruction 00162 * @param[in] ins The memory access instruction 00163 * @param[in] memAccess Information about memory access 00164 * @param[in] instrumentor Instrumentation interface 00165 * @return true - success, false - the instruction or memory operation is not supported 00166 */ 00167 bool InsertMemoryCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor); 00168 bool InsertSlmInit(const IGtIns& ins, const SlmInitArg& slmInitArg, IGtKernelInstrument& instrumentor); 00169 00170 private: 00171 // Memory check functions 00172 CheckReadAccessFunc _checkReadAccessFunc; 00173 CheckSlmReadAccessFunc _checkSlmReadAccessFunc; 00174 CheckUgmA64ReadAccessFunc _checkUgmA64ReadAccessFunc; 00175 InitSlmFunc _initSlmFunc; 00176 00177 IGtHliModuleHandle _hliModule = nullptr; ///< Module of HLI functions 00178 std::map<GtKernelId, KernelProfile> _kernels; ///< Collection of kernel profiles 00179 }; 00180 00181 /* ============================================================================================= */ 00182 // MemAccess implementation 00183 /* ============================================================================================= */ 00184 MemAccess::MemAccess(const IGtIns &ins) : _insId(ins.Id()) 00185 { 00186 // Get and check data port (SFID) 00187 GtSfid sfid = ins.Sfid(); 00188 if ((sfid != GED_SFID_UGM) && (sfid != GED_SFID_SLM) && (sfid != GED_SFID_DP_DC0) && (sfid != GED_SFID_DP_DC1)) 00189 { 00190 _errMsg = "Unsupported data port " + std::string(sfid.ToString()); 00191 return; 00192 } 00193 00194 bool isHdc = (sfid == GED_SFID_DP_DC0) || (sfid == GED_SFID_DP_DC1); 00195 00196 _isSlm = (sfid == GED_SFID_SLM); 00197 00198 // Retrieve message descriptor 00199 if (!ins.MsgDescRegFile().IsImm()) 00200 { 00201 _errMsg = "SEND message descriptor is not immediate"; 00202 return; 00203 } 00204 00205 // Check opcode of the memory operation 00206 GED_DP_OPCODE opcode = ins.DPOpCode(); 00207 bool isAtomic = ins.IsAtomic(); 00208 bool isLoad = (isHdc && !isAtomic && ins.HasDstOperand() && ins.DstRegFile().IsGrf()) || (opcode == GED_DP_OPCODE_LOAD); 00209 bool isStore = (isHdc && !isAtomic && !ins.HasDstOperand() && ins.DstOperand().Reg().IsNullReg()) || (opcode == GED_DP_OPCODE_STORE); 00210 _accessType = (isLoad ? GT_ACCESS_READ : (isStore ? GT_ACCESS_WRITE : GT_ACCESS_READ_WRITE)); 00211 00212 if (!isLoad && !isAtomic) 00213 { 00214 _errMsg = "Unsupported SEND operation (not load/atomic)"; 00215 return; 00216 } 00217 00218 // Initialize address model 00219 _addrModel = ins.MemAddrModel(); 00220 if (!_addrModel.IsValid()) 00221 { 00222 _errMsg = "Unsupported/unknown address model"; // @fixme Support BSS model 00223 return; 00224 } 00225 00226 // Finally, initialize the rest of data members... 00227 GTPIN_ASSERT(ins.SrcRegFile(0).IsGrf()); 00228 _firstDstReg = ins.HasDstOperand() ? ins.DstOperand().Reg().RegNum() : GtRegNum(); 00229 _firstAddrReg = ins.SrcRegOperand(0).Reg().RegNum(); 00230 _numAccesses = ins.NumAccesses(); GTPIN_ASSERT(_numAccesses != 0); 00231 00232 DcSendMsg msg = DcSendMsg::Decode(ins.GetGedIns()); 00233 _dataSize = msg.ElementSize() * msg.NumElements(); 00234 00235 if (_dataSize == 0) 00236 { 00237 _errMsg = "Unsupported SEND operation"; 00238 return; 00239 } 00240 00241 if (_accessType == GT_ACCESS_READ_WRITE) 00242 { 00243 if ((_dataSize != 4 && _dataSize != 8) || (!_addrModel.IsSlm() && !_addrModel.IsA64())) 00244 { 00245 _errMsg = "Unsupported Read-Modify-Write instruction: only Dword and Qword data sizes are supported for SLM and A64 addressing modes"; 00246 return; 00247 } 00248 } 00249 00250 if (_dataSize & 0x3) 00251 { 00252 _errMsg = "Unsupported data size: " + DecStr(_dataSize) + "(should be multiplication of dword)"; 00253 return; 00254 } 00255 00256 _isValid = true; 00257 } 00258 00259 /* ============================================================================================= */ 00260 // ProfileResults implementation 00261 /* ============================================================================================= */ 00262 ProfileResults::ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess) : 00263 dispatchId(dispatcher.DispatchId()), insId(memAccess.Id()), umrCount(memAccess.GetMemoryCheckArgs().out.umrCount) 00264 { 00265 dispatcher.GetExecDescriptor(kernelExecDesc); 00266 } 00267 00268 /* ============================================================================================= */ 00269 // KernelProfile implementation 00270 /* ============================================================================================= */ 00271 KernelProfile::KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg) : 00272 _id(kernel.Id()), _platform(kernel.GpuPlatform()), _name(GlueString(kernel.Name())), _uniqueName(kernel.UniqueName()), 00273 _asmText(CfgAsmText(cfg)) 00274 { 00275 // Populate this object with the information about memory accesses 00276 for (auto bblPtr : cfg.Bbls()) 00277 { 00278 for (auto insPtr : bblPtr->Instructions()) 00279 { 00280 const IGtIns& ins = *insPtr; 00281 if (ins.IsMemAccess() && !ins.IsEot()) 00282 { 00283 MemAccess memAccess(ins); 00284 00285 if (!memAccess.IsValid() && memAccess.AccessType().IsWriteOnly()) 00286 { 00287 continue; 00288 } 00289 00290 if (memAccess.IsValid()) 00291 { 00292 _memAccessMap.emplace(ins.Id(), memAccess); 00293 } 00294 else 00295 { 00296 RecordUnsupportedInstruction(ins, memAccess.Error()); 00297 } 00298 } 00299 } 00300 } 00301 } 00302 00303 GtKernelId KernelProfile::Id() const { return _id; } 00304 GtGpuPlatform KernelProfile::Platform() const { return _platform; } 00305 const std::string& KernelProfile::Name() const { return _name; } 00306 const std::string& KernelProfile::UniqueName() const { return _uniqueName; } 00307 void KernelProfile::DumpAsm() const { DumpKernelAsmText(_name, _uniqueName, _asmText); } 00308 const KernelProfile::MemAccessMap& KernelProfile::GetMemAccessMap() const { return _memAccessMap; } 00309 KernelProfile::MemAccessMap& KernelProfile::GetMemAccessMap() { return _memAccessMap; } 00310 const SlmInitArg& KernelProfile::GetSlmInitArg() const { return _slmInitMemArg; } 00311 SlmInitArg& KernelProfile::GetSlmInitArg() { return _slmInitMemArg; } 00312 00313 std::string KernelProfile::MemoryCheckResults() const 00314 { 00315 std::ostringstream os; 00316 00317 os << std::string(120, '-') << std::endl; 00318 os << std::setw(4) << _id << ": " << _name << "___" << _uniqueName << std::endl; 00319 os << std::string(120, '-') << std::endl; 00320 00321 if (!_unhandledAccesses.empty()) 00322 { 00323 os << " Unhandled memory accesses:" << std::endl; 00324 os << " --------------------------" << std::endl; 00325 os << _unhandledAccesses << std::endl; 00326 os << std::string(120, '-') << std::endl; 00327 } 00328 00329 uint64_t umrTotal = 0; 00330 for (const auto& res : _profileResults) 00331 { 00332 if (res.umrCount != 0) 00333 { 00334 if (umrTotal == 0) 00335 { 00336 os << std::setw(20) << "Dispatch ID" << std::setw(20) << "Instruction ID" << std::setw(20) << "#UMR violations"; 00337 os << " " << std::setw(45) << "Execution descriptor" << std::endl; 00338 os << std::string(120, '-') << std::endl; 00339 } 00340 os << std::setw(20) << res.dispatchId << std::setw(20) << res.insId << std::setw(20) << res.umrCount; 00341 os << " " << std::setw(45) << res.kernelExecDesc.ToString(_platform, ExecDescAlignedFormat()) << std::endl; 00342 00343 umrTotal += res.umrCount; 00344 } 00345 } 00346 if (umrTotal == 0) 00347 { 00348 os << "No UMR accesses detected" << std::endl; 00349 } 00350 return os.str(); 00351 } 00352 00353 void KernelProfile::RecordMemoryCheckResults(IGtKernelDispatch& dispatcher) 00354 { 00355 for (auto& entry: _memAccessMap) 00356 { 00357 MemAccess& memAccess = entry.second; 00358 _profileResults.emplace_back(dispatcher, memAccess); 00359 } 00360 } 00361 00362 void KernelProfile::RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg) 00363 { 00364 if (!errMsg.empty()) 00365 { 00366 std::ostringstream os; 00367 os << errMsg << ": [" << std::setw(3) << ins.Id() << "] " << ins.ToString() << std::endl; 00368 _unhandledAccesses.append(os.str()); 00369 } 00370 } 00371 00372 void KernelProfile::RecordUnhandledAccess(InsId insId, const std::string& errMsg) 00373 { 00374 if (!errMsg.empty()) 00375 { 00376 std::ostringstream os; 00377 os << errMsg << ": Instruction ID: [" << std::setw(3) << insId << "] " << std::endl; 00378 _unhandledAccesses.append(os.str()); 00379 } 00380 } 00381 00382 /* ============================================================================================= */ 00383 // MemoryCheck implementation 00384 /* ============================================================================================= */ 00385 MemoryCheck::MemoryCheck() : 00386 _checkReadAccessFunc("CheckReadAccess"), 00387 _checkSlmReadAccessFunc("CheckSlmReadAccess"), 00388 _checkUgmA64ReadAccessFunc("CheckUgmA64ReadAccess"), 00389 _initSlmFunc("InitSlm") {} 00390 MemoryCheck::~MemoryCheck() {} 00391 00392 MemoryCheck* MemoryCheck::Instance() 00393 { 00394 static MemoryCheck instance; 00395 return &instance; 00396 } 00397 00398 void MemoryCheck::OnKernelBuild(IGtKernelInstrument& instrumentor) 00399 { 00400 const IGtKernel& kernel = instrumentor.Kernel(); 00401 const IGtCfg& cfg = instrumentor.Cfg(); 00402 IGtMemoryMapper& memMapper = instrumentor.MemoryMapper(); 00403 00404 // Create profile for this kernel 00405 auto result = _kernels.emplace(std::piecewise_construct, 00406 std::forward_as_tuple(instrumentor.Kernel().Id()), 00407 std::forward_as_tuple(kernel, cfg)); 00408 KernelProfile& kernelProfile = result.first->second; 00409 00410 bool hasSlm = false; 00411 00412 // Instrument memory accesses and share per-access arguments with HLI functions 00413 for (const auto& entry : kernelProfile.GetMemAccessMap()) 00414 { 00415 const auto& memAccess = entry.second; 00416 auto insId = entry.first; 00417 00418 if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId)) 00419 { 00420 continue; 00421 } 00422 const IGtIns& ins = cfg.GetInstruction(insId); 00423 00424 hasSlm |= memAccess.IsSlm(); 00425 00426 auto accessType = memAccess.AccessType(); 00427 if (accessType == GT_ACCESS_READ || memAccess.IsSlm() || memAccess.AddrModel().IsA64()) 00428 { 00429 InsertMemoryCheck(ins, memAccess, instrumentor); 00430 } 00431 else 00432 { 00433 GTPIN_ERROR(); 00434 } 00435 00436 // Share per-access HLI arguments. 00437 // They will be initialized at the start of the kernel, and copied back to the host memory at completion of the kernel 00438 memMapper.Map(memAccess.GetMemoryCheckArgs(), GT_MMAP_SHARE); 00439 } 00440 00441 if (hasSlm) 00442 { 00443 for (auto& bblPtr : cfg.EntryBbls()) 00444 { 00445 const IGtIns& ins = bblPtr->FirstIns(); 00446 InsertSlmInit(ins, kernelProfile.GetSlmInitArg(), instrumentor); 00447 memMapper.Map(kernelProfile.GetSlmInitArg(), GT_MMAP_SHARE); 00448 } 00449 } 00450 00451 // Link the kernel with the library of HLI functions 00452 instrumentor.LinkHliModule(_hliModule); 00453 } 00454 00455 void MemoryCheck::OnKernelRun(IGtKernelDispatch& dispatcher) 00456 { 00457 const IGtKernel& kernel = dispatcher.Kernel(); 00458 KernelProfile& kernelProfile = _kernels.at(kernel.Id()); 00459 00460 if (dispatcher.ExecStage().IsDispatch()) 00461 { 00462 GtKernelExecDesc execDesc; dispatcher.GetExecDescriptor(execDesc); 00463 if (kernel.IsInstrumented() && IsKernelExecProfileEnabled(execDesc, kernel.GpuPlatform(), kernel.Name().Get())) 00464 { 00465 dispatcher.SetProfilingMode(true); // Enable instrumentation 00466 00467 //// This tool needs an accurate information about memory allocations, which is available on the the final dispatch stage. 00468 //// So, on the initial dispatch stage, we only enable instrumentation, and request GTPin to invoke MemoryCheck::OnKernelRun 00469 //// one more time, on the final dispatch stage. If this request is accepted, the initialization of the profile buffer will 00470 //// be done on the final dispatch stage, otherwise - on the intial dispatch stage. 00471 //if (dispatcher.ReportFinalDispatchStage()) 00472 //{ 00473 // return; 00474 //} 00475 } 00476 else 00477 { 00478 dispatcher.SetProfilingMode(false); // Disable instrumentation 00479 return; 00480 } 00481 } 00482 00483 IGtMemoryMapper& memMapper = dispatcher.MemoryMapper(); 00484 00485 bool hasSlm = false; 00486 00487 // Initialize per-access arguments of HLI functions 00488 for (auto& entry: kernelProfile.GetMemAccessMap()) 00489 { 00490 auto insId = entry.first; 00491 00492 if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId)) 00493 { 00494 continue; 00495 } 00496 00497 MemAccess& memAccess = entry.second; 00498 MemoryCheckArgs& mcArgs = memAccess.GetMemoryCheckArgs(); 00499 00500 hasSlm |= memAccess.IsSlm(); 00501 00502 mcArgs.out.umrCount = 0; 00503 mcArgs.in.dataSize = memAccess.DataSize(); 00504 mcArgs.in.numAccesses = memAccess.NumAccesses(); 00505 00506 memMapper.Write(&mcArgs, sizeof(mcArgs)); 00507 } 00508 00509 if (hasSlm) 00510 { 00511 uint32_t slmSize = dispatcher.SlmSize(); GTPIN_ASSERT(slmSize); 00512 00513 SlmInitArg& slmInitMemArgs = kernelProfile.GetSlmInitArg(); 00514 slmInitMemArgs.in.size = slmSize; 00515 00516 memMapper.Write(&slmInitMemArgs, sizeof(SlmInitArg)); 00517 } 00518 } 00519 00520 void MemoryCheck::OnKernelComplete(IGtKernelDispatch& dispatcher) 00521 { 00522 if (dispatcher.IsProfilingEnabled()) 00523 { 00524 KernelProfile& kernelProfile = _kernels.at(dispatcher.Kernel().Id()); 00525 kernelProfile.RecordMemoryCheckResults(dispatcher); 00526 } 00527 } 00528 00529 bool MemoryCheck::InsertMemoryCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor) 00530 { 00531 GTPIN_ASSERT(memAccess.IsValid() && (memAccess.Id() == ins.Id())); 00532 00533 uint32_t numAccesses = memAccess.NumAccesses(); 00534 if (numAccesses == 0) 00535 { 00536 return false; // Nothing to check 00537 } 00538 00539 const IGtKernel& kernel = instrumentor.Kernel(); 00540 const IGtGenModel& genModel = kernel.GenModel(); 00541 uint32_t regSize = genModel.GrfRegSize(); 00542 const GtMemoryAddrModel& addrModel = memAccess.AddrModel(); 00543 uint32_t addrSize = addrModel.PtrSize(); 00544 GtReg firstAddrReg = GrfReg(memAccess.FirstAddrReg(), 0, regSize); 00545 GtReg firstDstReg = GrfReg(memAccess.FirstDstReg(), 0, regSize); 00546 uint32_t dataSize = memAccess.DataSize(); 00547 uint32_t numOfElements = memAccess.NumAccesses(); 00548 uint32_t numDstRegs = RoundUp(numOfElements * dataSize, regSize) / regSize; 00549 uint32_t numAddrRegs = RoundUp(numOfElements * addrSize, regSize) / regSize; 00550 MemoryCheckArgs* checkArgs = const_cast<MemoryCheckArgs*>(&memAccess.GetMemoryCheckArgs()); 00551 IargConstGrfRange dstPayload(firstDstReg.RegNum(), numDstRegs); 00552 IargConstGrfRange addrPayload(firstAddrReg.RegNum(), numAddrRegs); 00553 IargInsOpMask accessMask(ins); 00554 IargSlmPtr slmPtr; 00555 00556 if (memAccess.IsSlm()) 00557 { 00558 _checkSlmReadAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00559 NullReg(), // Unused return value 00560 addrPayload, // arg[1]: Base address of the accessed memory range 00561 accessMask, // arg[2]: Per-channel mask of memory accesses 00562 checkArgs, // arg[3]: Memory check arguments 00563 slmPtr // arg[4]: SLM pointer 00564 ); 00565 } 00566 else if (memAccess.AddrModel().IsA64()) 00567 { 00568 _checkUgmA64ReadAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00569 NullReg(), // Unused return value 00570 addrPayload, // arg[1]: Base address of the accessed memory range 00571 accessMask, // arg[2]: Per-channel mask of memory accesses 00572 checkArgs // arg[3]: Memory check arguments 00573 ); 00574 } 00575 else 00576 { 00577 _checkReadAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::After(), 00578 NullReg(), // Unused return value 00579 dstPayload, // arg[1]: Base address of the destination 00580 accessMask, // arg[2]: Per-channel mask of memory accesses 00581 checkArgs // arg[3]: Memory check arguments 00582 ); 00583 } 00584 return true; 00585 } 00586 00587 bool MemoryCheck::InsertSlmInit(const IGtIns& ins, const SlmInitArg& slmInitArg, IGtKernelInstrument& instrumentor) 00588 { 00589 IargSlmPtr slmPtr; 00590 00591 _initSlmFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(), 00592 NullReg(), // Unused return value 00593 &slmInitArg, // arg[1]: Slm initialization argument 00594 slmPtr // arg[2]: SLM pointer 00595 ); 00596 00597 return true; 00598 } 00599 00600 void MemoryCheck::LoadHliLibrary() 00601 { 00602 std::string modulePath = JoinPath(GetKnobValue<std::string>("installDir"), "Examples", "memory_check.cl"); 00603 _hliModule = GTPin_GetCore()->HliLibrary().CompileModuleFromFile(modulePath.c_str()); 00604 GTPIN_ASSERT_MSG(_hliModule != nullptr, "Could not load HLI module " + modulePath); 00605 } 00606 00607 void MemoryCheck::Fini() 00608 { 00609 std::string str; 00610 00611 // Dump profiling results and assembly code of all kernels 00612 for (const auto& entry : _kernels) 00613 { 00614 const auto& kernelProfile = entry.second; 00615 str += kernelProfile.MemoryCheckResults(); 00616 kernelProfile.DumpAsm(); 00617 } 00618 00619 std::ofstream fs(JoinPath(GTPin_GetCore()->ProfileDir(), "memory_check.txt")); 00620 GTPIN_ASSERT(fs.is_open()); 00621 fs << str; 00622 00623 if (!KNOB_NO_COUT) 00624 { 00625 std::cout << str; 00626 } 00627 } 00628 00629 /* ============================================================================================= */ 00630 // GTPin_Entry 00631 /* ============================================================================================= */ 00632 EXPORT_C_FUNC void GTPin_Entry(int argc, const char *argv[]) 00633 { 00634 SetKnobValue<bool>(true, "uninitialized_buffers_check_on"); 00635 ConfigureGTPin(argc, argv); 00636 00637 // Register the tool (callbacks) with the GTPin core 00638 MemoryCheck::Instance()->Register(); 00639 00640 // Compile and load library of HLI functions 00641 MemoryCheck::Instance()->LoadHliLibrary(); 00642 00643 // Register the termination function 00644 atexit(MemoryCheck::OnFini); 00645 }
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2024-2026 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /*! 00008 * @file Library of High-Level Instrumentation (HLI) functions used by the memory_check tool 00009 */ 00010 00011 #include "hlif_basic_defs.h" 00012 #include "memory_check.h" 00013 00014 /*! 00015 * @brief HLI function that initializes shared local memory 00016 * @see memory_check.h for details 00017 */ 00018 IGC_STACK_CALL void InitSlm(__global const SlmInitArg* arg, __local uint32_t* slm) 00019 { 00020 uint32_t numOfDwordElements = arg->in.size >> 2; 00021 00022 for (uint32_t i = 0; i != numOfDwordElements; ++i) 00023 { 00024 slm[i] = UNINITIALIZED_MEMORY_PATTERN; 00025 } 00026 00027 barrier(CLK_LOCAL_MEM_FENCE); 00028 } 00029 00030 /*! 00031 * @brief HLI function that detects uninitialized Shared Local Memory read accesses 00032 * Only accesses that are a multiplication of dword are supported 00033 * @see memory_check.h for details 00034 */ 00035 IGC_STACK_CALL void CheckSlmReadAccess(__global const uint32_t* offsets, 00036 uint32_t accessMask, 00037 __global MemoryCheckArgs* memoryCheckArgs, 00038 __local uint32_t* slm) 00039 { 00040 if (accessMask != 0) 00041 { 00042 uint32_t numOfElements = memoryCheckArgs->in.numAccesses; 00043 uint32_t dataSize = memoryCheckArgs->in.dataSize; 00044 00045 if (dataSize & 0x3) 00046 { 00047 return; 00048 } 00049 00050 for (uint32_t eIndx = 0; eIndx != numOfElements; ++eIndx) 00051 { 00052 if ((accessMask & (0x1 << eIndx)) != 0) 00053 { 00054 uint32_t numOfDwordSubElements = dataSize >> 2; 00055 00056 uint32_t offset = offsets[eIndx]; 00057 uint32_t index = offset >> 2; 00058 00059 for (uint32_t i = 0; i < numOfDwordSubElements; ++i) 00060 { 00061 uint32_t data = slm[index + i]; 00062 if (data == UNINITIALIZED_MEMORY_PATTERN) 00063 { 00064 atomic_inc(&(memoryCheckArgs->out.umrCount)); 00065 } 00066 } 00067 } 00068 } 00069 } 00070 } 00071 00072 /*! 00073 * @brief HLI function that detects uninitialized global memory read accesses done in A64 mode 00074 * Only accesses that are a multiplication of dword are supported 00075 * @see memory_check.h for details 00076 */ 00077 IGC_STACK_CALL void CheckUgmA64ReadAccess(__global const uint64_t* addresses, 00078 uint32_t accessMask, 00079 __global MemoryCheckArgs* memoryCheckArgs) 00080 { 00081 if (accessMask != 0) 00082 { 00083 uint32_t numOfElements = memoryCheckArgs->in.numAccesses; 00084 uint32_t dataSize = memoryCheckArgs->in.dataSize; 00085 00086 if (dataSize & 0x3) 00087 { 00088 return; 00089 } 00090 00091 for (uint32_t eIndx = 0; eIndx != numOfElements; ++eIndx) 00092 { 00093 if ((accessMask & (0x1 << eIndx)) != 0) 00094 { 00095 uint32_t numOfDwordSubElements = dataSize >> 2; 00096 00097 uint64_t addr = addresses[eIndx]; 00098 00099 for (uint32_t i = 0; i < numOfDwordSubElements; ++i) 00100 { 00101 uint32_t data = *(uint32_t*)addr; 00102 if (data == UNINITIALIZED_MEMORY_PATTERN) 00103 { 00104 atomic_inc(&(memoryCheckArgs->out.umrCount)); 00105 } 00106 addr += 4; 00107 } 00108 } 00109 } 00110 } 00111 } 00112 00113 /*! 00114 * @brief HLI function that detects uninitialized memory read accesses 00115 * Only accesses that are a multiplication of dword are supported 00116 * @see memory_check.h for details 00117 */ 00118 IGC_STACK_CALL void CheckReadAccess(__global const uint32_t* dstPayload, 00119 uint32_t accessMask, 00120 __global MemoryCheckArgs* memoryCheckArgs) 00121 { 00122 if (accessMask != 0) 00123 { 00124 uint32_t numOfElements = memoryCheckArgs->in.numAccesses; 00125 uint32_t dataSize = memoryCheckArgs->in.dataSize; 00126 00127 if (dataSize & 0x3) 00128 { 00129 return; 00130 } 00131 00132 for (uint32_t eIndx = 0; eIndx != numOfElements; ++eIndx) 00133 { 00134 if ((accessMask & (0x1 << eIndx)) != 0) 00135 { 00136 uint32_t numOfDwordSubElements = dataSize >> 2; 00137 00138 for (uint32_t i = 0; i < numOfDwordSubElements; ++i) 00139 { 00140 uint32_t data = dstPayload[eIndx * numOfDwordSubElements + i]; 00141 if (data == UNINITIALIZED_MEMORY_PATTERN) 00142 { 00143 atomic_inc(&(memoryCheckArgs->out.umrCount)); 00144 } 00145 } 00146 } 00147 } 00148 } 00149 }
(Back to the list of all GTPin Sample Tools)
Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT
1.7.4