|
GTPin
|
The Mem tool implements a simple memory model
To run the Mem tool in its default configuration, use this command:
Profilers\GTReplay\intel64\gtreplay.exe -t mem -- path-to-the-directory-containing-the-trace
(Back to the list of all GTReplay Sample Tools)
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2021-2022 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /******************************************************************************************************* 00008 * MEM tool 00009 * 00010 * Simple memory model - follow writes to global memory. 00011 * 00012 * NOTE: the tool callbacks might be called from different threads. 00013 */ 00014 #include <mutex> 00015 #include <stdio.h> 00016 #include <string.h> 00017 00018 #include "gtreplay_assert.h" 00019 #include "gtreplay_client.h" 00020 #include "knob_parser.h" 00021 #include "mem_model.h" 00022 00023 // Global variables 00024 uint8_t* sendDecodeData; 00025 00026 uint32_t gMaxNumOfHwThreads = 0; 00027 uint32_t gNumOfInstructions = 0; 00028 00029 MemoryModel mem; 00030 std::mutex mem_mutex; 00031 uint32_t count = 0; 00032 uint64_t max_addr = 0; 00033 00034 /* 00035 * BeforeInsCallback - callback called before instruction execution 00036 * 00037 * @params[in] tid - the ID of the GPU HW thread for which the callback is called 00038 * @params[in] ins - a handle to the current instruction 00039 * @params[in] state - a handle to the HW Thread state corresponding to tid 00040 */ 00041 void BeforeInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*) 00042 { 00043 // Lock is required since this callback can be called from different threads 00044 std::lock_guard<std::mutex> lock(mem_mutex); 00045 00046 GTREPLAY_ASSERT(GTReplay_IsMemoryWrite(ins)); 00047 00048 uint32_t id = GTReplay_InsId(ins); 00049 00050 // Obtain attributes of access for this instruction 00051 uint32_t numOfAccesses = GTReplay_GetNumOfMemoryAccesses(ins); // number of accesses 00052 uint32_t surfaceId = GTReplay_GetMemorySurfaceId(ins); // surface ID (BTI) 00053 uint32_t memAccessSize = GTReplay_GetMemoryAccessSize(ins); // memory access size 00054 00055 // Obtain the dynamic execution mask - active channels 00056 uint32_t chan = GTReplay_DynamicExecMask(ins, state); 00057 00058 // Iterate over all accesses 00059 for (uint32_t i = 0; i < numOfAccesses; i++) 00060 { 00061 uint8_t data[32] = {}; 00062 00063 // If the current channel is active 00064 if (chan & (1 << i)) 00065 { 00066 // Obtain the current access address 00067 uint64_t addr = GTReplay_GetMemoryAccessAddr(ins, state, i); 00068 // Obtain the current access value 00069 GTReplay_GetMemoryWriteValue(ins, state, i, data); 00070 00071 uint32_t* d = (uint32_t*)data; 00072 00073 // Write to memory in granularity of 4 bytes 00074 for (uint32_t e = 0; e < memAccessSize / 4; e++) 00075 { 00076 mem.Write(surfaceId, (uint32_t)addr, d[0]); 00077 00078 if (addr > max_addr) max_addr = addr; 00079 00080 count += 4; 00081 addr += 4; 00082 d++; 00083 } 00084 } 00085 } 00086 } 00087 00088 /* 00089 * AfterInsCallback - callback called after instruction execution 00090 * An illustration - this tool doesn't need to register after instruction callbacks 00091 * 00092 * @params[in] tid - the ID of the GPU HW thread for which the callback is called 00093 * @params[in] ins - a handle to the current instruction 00094 * @params[in] state - a handle to the HW Thread state corresponding to tid 00095 */ 00096 void AfterInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*) 00097 { 00098 } 00099 00100 /* 00101 * OnKernelComplete - callback called upon kernel completion 00102 * 00103 * @params[in] kernel - a handle to the kernel 00104 */ 00105 void OnKernelComplete(GTReplayKernel kernel) 00106 { 00107 // Print the results 00108 printf("\n\n=================\n" 00109 "MEM TOOL\n" 00110 "=================\n\n"); 00111 00112 printf("written memory = 0x%08x bytes max_addr = 0x%016llx\n", count, max_addr); 00113 00114 // Print the resulting memory image 00115 mem.Print(true); 00116 } 00117 00118 /* 00119 * OnKernelBuild - callback called before kernel execution 00120 * The purpose of this callback is to traverse the kernel binary and instrument callbacks 00121 * 00122 * @params[in] kernel - a handle to the kernel 00123 */ 00124 void OnKernelBuild(GTReplayKernel kernel) 00125 { 00126 uint32_t gModelId = GTReplay_GetModel(kernel); 00127 00128 gMaxNumOfHwThreads = GTReplay_MaxNumOfHWThreads(gModelId); 00129 00130 gNumOfInstructions = GTReplay_NumOfInstructions(kernel); 00131 00132 // Traverse all the basic blocks 00133 for (GTReplayBbl bbl = GTReplay_BblHead(kernel); GTReplay_BblValid(bbl); bbl = GTReplay_BblNext(bbl)) 00134 { 00135 // Traverse all the instruction within the basic blocks 00136 for (GTReplayIns ins = GTReplay_InsHead(bbl); GTReplay_InsValid(ins); ins = GTReplay_InsNext(ins)) 00137 { 00138 // Check whether the instruction is memory write 00139 if (!GTReplay_IsMemoryWrite(ins)) 00140 { 00141 // If not, there is nothing to do 00142 continue; 00143 } 00144 // Register callback to be called before instruction execution 00145 GTReplay_RegisterCallbackBeforeIns(kernel, ins, BeforeInsCallback, NULL); 00146 } 00147 } 00148 } 00149 00150 /* 00151 * GTReplay_Entry - tool entry point 00152 */ 00153 extern "C" 00154 DLLEXP void FASTCALL GTReplay_Entry(int argc, const char *argv[]) 00155 { 00156 // configure GTReplay 00157 ConfigureGTReplay(argc, argv); 00158 00159 // register OnKernelBuild and OnKernelComplete callbacks 00160 GTReplay_RegisterOnKernelBuildCallback(OnKernelBuild); 00161 GTReplay_RegisterOnKernelCompleteCallback(OnKernelComplete); 00162 00163 // Start GTReplay 00164 GTReplay_Start(); 00165 }
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2021 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 #ifndef _MEM_MODEL_H 00008 #define _MEM_MODEL_H 00009 00010 #include <cstring> 00011 #include <mutex> 00012 00013 00014 #define FOUR_MB (4 * 1024 * 1024) 00015 #define FOUR_KB (4 * 1024) 00016 00017 /* 00018 * Surface - represents a memory buffer 00019 */ 00020 class Surface { 00021 public: 00022 // Constructor 00023 Surface(); 00024 00025 // Destructor 00026 ~Surface(); 00027 00028 // Writing into the surface 00029 // 00030 // addr - an offset into the surface 00031 // data - data to be written 00032 void Write(uint32_t addr, uint32_t data); 00033 00034 // Printing the surface image 00035 void Print(bool asFloat); 00036 00037 00038 private: 00039 00040 // Computes the pointer to the surface corresponding to the given address 00041 uint8_t * GetPtr(uint32_t addr); 00042 00043 // PageTable - represents a 4KB page table where each entry points to a 4KB page 00044 typedef struct PageTableS { 00045 // Initially each page table is empty 00046 PageTableS() { memset(pte, 0, 1024 * sizeof(uint32_t*)); } 00047 uint32_t* pte[1024]; 00048 } PageTable; 00049 00050 PageTable* _pageDir[1024]; // Page directory is an array of 1024 pointers to page tables 00051 00052 uint32_t _numOfWrites; // number of writes 00053 uint32_t _maxAddr; // maximal address 00054 }; 00055 00056 /* 00057 * MemoryModel - a simple memory model 00058 */ 00059 class MemoryModel { 00060 public: 00061 00062 // Constructor 00063 MemoryModel(); 00064 00065 // Destructor 00066 ~MemoryModel(); 00067 00068 // Writing into a surface 00069 // 00070 // bti - surface ID (BTI index) 00071 // addr - an offset into the surface 00072 // data - data to be written 00073 void Write(uint32_t bti, uint32_t addr, uint32_t data); 00074 00075 // Printing the memory image 00076 void Print(bool asFloat); 00077 00078 private: 00079 00080 Surface* _mem[256]; // an array of potential 256 surfaces 00081 00082 std::mutex _mutex; // mutex 00083 }; 00084 00085 #endif
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2021 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 #include "mem_model.h" 00008 #include <cstdio> 00009 00010 // MemoryModel constructor 00011 MemoryModel::MemoryModel() 00012 { 00013 // Initially all surface pointers are NULL 00014 for (uint32_t bti = 0; bti < 256; bti++) 00015 { 00016 _mem[bti] = NULL; 00017 } 00018 } 00019 00020 // MemoryModel destructor 00021 MemoryModel::~MemoryModel() 00022 { 00023 // Iterate over all potential surfaces 00024 for (uint32_t bti = 0; bti < 256; bti++) 00025 { 00026 // If exists 00027 if (_mem[bti]) 00028 { 00029 // Delete it 00030 delete _mem[bti]; 00031 } 00032 } 00033 } 00034 00035 // Writing into a surface 00036 // 00037 // bti - surface ID (BTI index) 00038 // addr - an offset into the surface 00039 // data - data to be written 00040 void MemoryModel::Write(uint32_t bti, uint32_t addr, uint32_t data) 00041 { 00042 std::lock_guard<std::mutex> lock(_mutex); 00043 00044 // If first access to the specific surface 00045 if (_mem[bti] == NULL) 00046 { 00047 // Create it 00048 _mem[bti] = new Surface; 00049 } 00050 00051 // Perform a write into the surface 00052 _mem[bti]->Write(addr, data); 00053 } 00054 00055 // Printing the memory image 00056 void MemoryModel::Print(bool asFloat) 00057 { 00058 // Iterate over all potential surfaces 00059 for (uint32_t bti = 0; bti < 256; bti++) 00060 { 00061 // If exists 00062 if (_mem[bti]) 00063 { 00064 printf("\nSurface: 0x%02x\n=============\n\n", bti); 00065 00066 // Print the surface 00067 _mem[bti]->Print(asFloat); 00068 } 00069 } 00070 } 00071 00072 00073 // Surface constructor 00074 Surface::Surface() : _numOfWrites(0), _maxAddr(0) 00075 { 00076 // Page Directory is initially empty 00077 memset(_pageDir, 0, 1024 * sizeof(PageTable*)); 00078 } 00079 00080 // Surface destructor 00081 Surface::~Surface() 00082 { 00083 // Iterate over all Page Directory entries 00084 for (uint32_t pdi = 0; pdi < 1024; pdi++) 00085 { 00086 PageTable* pageTable = _pageDir[pdi]; 00087 00088 // If current Page Table exists 00089 if (pageTable) 00090 { 00091 // Iterate over all Page Table entries 00092 for (uint32_t pti = 0; pti < 1024; pti++) 00093 { 00094 uint32_t* page = pageTable->pte[pti]; 00095 00096 // If page exists 00097 if (page) 00098 { 00099 // Delete it 00100 delete[] page; 00101 pageTable->pte[pti] = NULL; 00102 } 00103 } 00104 00105 // Delete Page Table 00106 delete pageTable; 00107 _pageDir[pdi] = NULL; 00108 } 00109 } 00110 } 00111 00112 // Writing into the surface 00113 // 00114 // addr - an offset into the surface 00115 // data - data to be written 00116 void Surface::Write(uint32_t addr, uint32_t data) 00117 { 00118 // Update statistics 00119 _numOfWrites++; 00120 if (_maxAddr < addr) 00121 { 00122 _maxAddr = addr; 00123 } 00124 00125 // Get a pointer to required offset 00126 uint32_t* ptr = (uint32_t*)GetPtr(addr); 00127 00128 // Write the data 00129 ptr[0] = data; 00130 } 00131 00132 00133 // Printing the surface image 00134 void Surface::Print(bool asFloat) 00135 { 00136 // Print surface statistics 00137 printf("Surface: numOfWrites = %d maxAddr = 0x%08x\n", _numOfWrites, _maxAddr); 00138 00139 // Iterate over all entries within the Page Directory 00140 for (uint32_t pdi = 0; pdi < 1024; pdi++) 00141 { 00142 PageTable* pageTable = _pageDir[pdi]; 00143 00144 // If corresponding Page Table exists 00145 if (pageTable) 00146 { 00147 // Compute the base described by the current Page Table 00148 uint32_t pageTableBase = pdi * FOUR_MB; 00149 00150 // Iterate over all Page Table entries 00151 for (uint32_t pti = 0; pti < 1024; pti++) 00152 { 00153 uint32_t* page = pageTable->pte[pti]; 00154 00155 // If the current page exists 00156 if (page) 00157 { 00158 // Compute the base of the current page 00159 uint32_t pageBase = pageTableBase + pti * FOUR_KB; 00160 00161 // Dump all 4bytes of the current page 00162 for (uint32_t a = 0; a < 1024; a++) 00163 { 00164 uint32_t addr = pageBase + a * 4; 00165 uint32_t val = page[a]; 00166 00167 if (asFloat) printf("0x%08x: 0x%08x (%f)\n", addr, val, *(float*)&val); 00168 else printf("0x%08x: 0x%08x\n", addr, val); 00169 } 00170 } 00171 } 00172 } 00173 } 00174 } 00175 00176 00177 // Computes the pointer to the surface corresponding to the given address 00178 uint8_t * Surface::GetPtr(uint32_t addr) 00179 { 00180 // Compute Page Directory index, Page Table index and the offset into the page from the address 00181 uint32_t pdi = addr >> 22; 00182 uint32_t pti = (addr & 0x003FF000) >> 12; 00183 uint32_t offset = addr & 0xFFF; 00184 00185 // Get corresponding Page Table 00186 PageTable* pageTable = _pageDir[pdi]; 00187 00188 // If doesn't exist 00189 if (pageTable == NULL) 00190 { 00191 // Allocate one 00192 pageTable = new PageTable; 00193 00194 // Update Page Directory 00195 _pageDir[pdi] = pageTable; 00196 } 00197 00198 // Compute the current page 00199 uint32_t* page = pageTable->pte[pti]; 00200 00201 // If doesn't exist 00202 if (page == NULL) 00203 { 00204 // Allocate one 00205 page = new uint32_t[1024]; 00206 // Initialize with zeros 00207 memset(page, 0, 0x1000); 00208 // Update Page Table 00209 pageTable->pte[pti] = page; 00210 } 00211 00212 // Compute the pointer into the current page 00213 uint8_t* ptr = (uint8_t*)page + offset; 00214 00215 return ptr; 00216 } 00217
(Back to the list of all GTReplay Sample Tools)
Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT
1.7.4