|
GTPin
|
The Icount tool counts the amount of dynamic instructions
To run the Icount tool in its default configuration, use this command:
Profilers\GTReplay\intel64\gtreplay.exe -t icount -- path-to-the-directory-containing-the-trace
(Back to the list of all GTReplay Sample Tools)
00001 /*========================== begin_copyright_notice ============================ 00002 Copyright (C) 2021-2024 Intel Corporation 00003 00004 SPDX-License-Identifier: MIT 00005 ============================= end_copyright_notice ===========================*/ 00006 00007 /******************************************************************************************************* 00008 * ICOUNT tool 00009 * 00010 * Count dynamic instructions within the trace. 00011 * 00012 * NOTE: the tool callbacks might be called from different threads. 00013 */ 00014 #include <stdio.h> 00015 #include <string.h> 00016 #include <vector> 00017 00018 #include "gtreplay_assert.h" 00019 #include "gtreplay_client.h" 00020 #include "knob_parser.h" 00021 00022 00023 // Global variables 00024 uint32_t gMaxNumOfHwThreads = 0; 00025 uint32_t gMaxNumOfTiles = 0; 00026 std::vector<std::vector<uint64_t>> icount; 00027 std::string kernelName; 00028 00029 /* 00030 * BeforeInsCallback - callback called before instruction execution 00031 * 00032 * @params[in] tid - the ID of the GPU HW thread for which the callback is called 00033 * @params[in] ins - a handle to the current instruction 00034 * @params[in] state - a handle to the HW Thread state corresponding to tid 00035 */ 00036 void BeforeInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*) 00037 { 00038 GTREPLAY_ASSERT(tileId < gMaxNumOfTiles && tid < gMaxNumOfHwThreads); 00039 // Update the instruction counter corresponding to the current HW thread 00040 icount[tileId][tid]++; 00041 } 00042 00043 /* 00044 * AfterInsCallback - callback called after instruction execution 00045 * An illustration - this tool doesn't need to register after instruction callbacks 00046 * 00047 * @params[in] tid - the ID of the GPU HW thread for which the callback is called 00048 * @params[in] ins - a handle to the current instruction 00049 * @params[in] state - a handle to the HW Thread state corresponding to tid 00050 */ 00051 void AfterInsCallback(uint32_t tileId, uint32_t tid, GTReplayIns ins, GTReplayState state, void*) 00052 { 00053 } 00054 00055 /* 00056 * OnKernelComplete - callback called upon kernel completion 00057 * 00058 * @params[in] kernel - a handle to the kernel 00059 */ 00060 void OnKernelComplete(GTReplayKernel kernel) 00061 { 00062 uint64_t count = 0; 00063 00064 // Accumulate counters from all HW threads 00065 for (uint32_t t = 0; t < gMaxNumOfTiles; t++) 00066 { 00067 for (uint32_t i = 0; i < gMaxNumOfHwThreads; i++) 00068 { 00069 count += icount[t][i]; 00070 } 00071 } 00072 00073 // Print the results 00074 std::cout << "\n\n=================\n"; 00075 std::cout << "ICOUNT TOOL\n"; 00076 std::cout << "=================\n\n"; 00077 std::cout.imbue(std::locale("")); 00078 std::cout << "Kernel: " << kernelName << "\n\n"; 00079 std::cout << "TOTAL ICOUNT = " << count << "\n"; 00080 } 00081 00082 /* 00083 * OnKernelBuild - callback called before kernel execution 00084 * The purpose of this callback is to traverse the kernel binary and instrument callbacks 00085 * 00086 * @params[in] kernel - a handle to the kernel 00087 */ 00088 void OnKernelBuild(GTReplayKernel kernel) 00089 { 00090 uint32_t gModelId = GTReplay_GetModel(kernel); 00091 00092 gMaxNumOfHwThreads = GTReplay_MaxNumOfHWThreads(gModelId); 00093 00094 gMaxNumOfTiles = GTReplay_MaxNumOfTiles(kernel); 00095 GTREPLAY_ASSERT(gMaxNumOfTiles); 00096 00097 // Allocate a buffer of counters - one per each possible HW thread 00098 icount.resize(gMaxNumOfTiles); 00099 for (uint32_t i = 0; i < gMaxNumOfTiles; i++) 00100 { 00101 icount[i].resize(gMaxNumOfHwThreads, 0); 00102 } 00103 00104 // Traverse all the basic blocks 00105 for (GTReplayBbl bbl = GTReplay_BblHead(kernel); GTReplay_BblValid(bbl); bbl = GTReplay_BblNext(bbl)) 00106 { 00107 // Traverse all the instructions within the basic blocks 00108 for (GTReplayIns ins = GTReplay_InsHead(bbl); GTReplay_InsValid(ins); ins = GTReplay_InsNext(ins)) 00109 { 00110 // Register callback to be called before instruction execution 00111 GTReplay_RegisterCallbackBeforeIns(kernel, ins, BeforeInsCallback, NULL); 00112 } 00113 } 00114 00115 uint32_t kernelNameSize = 0; 00116 GTReplay_GetKernelName(kernel, &kernelNameSize, nullptr); 00117 00118 char* buf = new char[kernelNameSize + 1](); 00119 GTReplay_GetKernelName(kernel, &kernelNameSize, buf); 00120 00121 kernelName = std::string(buf); 00122 00123 delete[] buf; 00124 } 00125 00126 /* 00127 * GTReplay_Entry - tool entry point 00128 */ 00129 extern "C" 00130 DLLEXP void FASTCALL GTReplay_Entry(int argc, const char *argv[]) 00131 { 00132 // configure GTReplay 00133 ConfigureGTReplay(argc, argv); 00134 00135 // register OnKernelBuild and OnKernelComplete callbacks 00136 GTReplay_RegisterOnKernelBuildCallback(OnKernelBuild); 00137 GTReplay_RegisterOnKernelCompleteCallback(OnKernelComplete); 00138 00139 // Start GTReplay 00140 GTReplay_Start(); 00141 }
(Back to the list of all GTReplay Sample Tools)
Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT
1.7.4