GTPin
GTPin: Memory_check Sample Tool

The Memory_check is a GTPin tool for profiling uninitialized memory read accesses. The tool leverages High Level Instrumentation interface (HLIF).

Running the Memory_check tool

To run Memory_check tool use the following command:

Profilers/Bin/gtpin -t memory_check [memory_check args] [GTPin args]  -- app [application args]

Configuration options

Example Output

The following output example shows the results of a kernel profiling.

------------------------------------------------------------------------------------------------------------------------
   0:    read___CS_asm53eca482ddd1b38a_simd32_53eca482ddd1b38a_0
------------------------------------------------------------------------------------------------------------------------
         Dispatch ID      Instruction ID     #UMR violations                          Execution descriptor
------------------------------------------------------------------------------------------------------------------------
                   0                  37                  46               0         3         0         0
                   0                  38                  32               0         3         0         0

Meaning that memory load instruction #37 had 46 and instruction #38 had 32 reads from uninitialized memory.

(Back to the list of all GTPin Sample Tools)

memory_check.h - Data structures and HLI function declarations.

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2024-2026 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*!
00008  * @file A tool that detects uninitialized memory read accesses in kernels
00009  */
00010 
00011 #ifndef MEMORY_CHECK_H_
00012 #define MEMORY_CHECK_H_
00013 
00014 #include "hlif_basic_defs.h"
00015 
00016 #if defined(__cplusplus)
00017 #include "gtpin_api.h"
00018 using namespace gtpin;
00019 #endif
00020 
00021 #pragma pack(push, 8)
00022 
00023 #define UNINITIALIZED_MEMORY_PATTERN 0xdeadbeef
00024 
00025 /* ============================================================================================= */
00026 // Struct MemoryCheckArgs
00027 /* ============================================================================================= */
00028 /// Common arguments of HLI functions that detects uninitialized memory read accesses
00029 typedef struct MemoryCheckArgs
00030 {
00031     struct
00032     {
00033         uint32_t    numAccesses;   ///< Number of elements accessed by the instruction
00034         uint32_t    dataSize;      ///< Size, in bytes, of the memory element
00035     } in;
00036     struct
00037     {
00038         uint32_t    umrCount;     ///< Counter of detected uninitialized memory read accesses
00039     } out;
00040 
00041     #if defined(__cplusplus)
00042     /// Constructor
00043     MemoryCheckArgs() : in({0, 0}), out({0}) {}
00044     #endif
00045 } MemoryCheckArgs;
00046 
00047 
00048 /* ============================================================================================= */
00049 // Struct SlmInitArg
00050 /* ============================================================================================= */
00051 /// Argument of HLI function that initializes SLM memory
00052 typedef struct SlmInitArg
00053 {
00054     struct
00055     {
00056         uint32_t    size;  ///< SLM size per working group
00057     } in;
00058 
00059 #if defined(__cplusplus)
00060     /// Constructor
00061     SlmInitArg(uint32_t size = 0) : in({size}) {}
00062 #endif
00063 } SlmInitArg;
00064 
00065 /* ============================================================================================= */
00066 // Function CheckReadAccess
00067 /* ============================================================================================= */
00068 /*!
00069  * @brief HLI function that detects uninitialized memory read accesses
00070  *        Only accesses that are a multiplication of dword are supported
00071  * @param[in]       dstPayload          Array of data elements read by the instruction
00072  * @param[in]       accessMask          Per-channel mask of memory accesses
00073  * @param[in][out]  memoryCheckArgs     Information about memory access instruction and results of the memory check
00074  */
00075 IGC_STACK_CALL void CheckReadAccess(__global const uint32_t* dstPayload,
00076                                     uint32_t accessMask,
00077                                     __global MemoryCheckArgs* memoryCheckArgs);
00078 #if defined(__cplusplus)
00079 using CheckReadAccessFunc = GtHliFunction<void, const uint32_t*, uint32_t, MemoryCheckArgs*>;
00080 #endif
00081 
00082 /*!
00083  * @brief HLI function that detects uninitialized Shared Local Memory read accesses
00084  *        Only accesses that are a multiplication of dword are supported
00085  * @param[in]       offsets             Array of offsets into SLM memory
00086  * @param[in]       accessMask          Per-channel mask of memory accesses
00087  * @param[in][out]  memoryCheckArgs     Information about memory access instruction and results of the memory check
00088  * @param[in]       slm                 Pointer to the local memory
00089  */
00090 IGC_STACK_CALL void CheckSlmReadAccess(__global const uint32_t* offsets,
00091                                        uint32_t accessMask,
00092                                        __global MemoryCheckArgs* memoryCheckArgs,
00093                                        __local uint32_t* slm);
00094 #if defined(__cplusplus)
00095 using CheckSlmReadAccessFunc = GtHliFunction<void, const uint32_t*, uint32_t, MemoryCheckArgs*, uint32_t>;
00096 #endif
00097 
00098 /*!
00099  * @brief HLI function that detects uninitialized global memory read accesses done in A64 mode
00100  *        Only accesses that are a multiplication of dword are supported
00101  * @param[in]       addresses           Array of 64-bit addresses
00102  * @param[in]       accessMask          Per-channel mask of memory accesses
00103  * @param[in][out]  memoryCheckArgs     Information about memory access instruction and results of the memory check
00104  */
00105 IGC_STACK_CALL void CheckUgmA64ReadAccess(__global const uint64_t* addresses,
00106                                           uint32_t accessMask,
00107                                           __global MemoryCheckArgs* memoryCheckArgs);
00108 
00109 #if defined(__cplusplus)
00110     using CheckUgmA64ReadAccessFunc = GtHliFunction<void, const uint64_t*, uint32_t, MemoryCheckArgs*>;
00111 #endif
00112 
00113 /*!
00114  * @brief HLI function that initializes shared local memory
00115  * @param[in]  arg     Information about SLM memory
00116  * @param[in]  slm     Pointer to the local memory
00117  */
00118 void InitSlm(__global const SlmInitArg* arg, __local uint32_t* slm);
00119 
00120 #if defined(__cplusplus)
00121 using InitSlmFunc = GtHliFunction<void, const SlmInitArg*, uint32_t>;
00122 #endif
00123 
00124 #pragma pack(pop)
00125 
00126 #endif

memory_check.cpp - Tool implementation, instrumentation logic, and result aggregation.

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2024-2025 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*!
00008  * @file A tool that detects uninitialized memory accesses in kernels
00009  *
00010  *       The tool supports the following Read-Only and Read-Modify-Write accesses to global memory and SLM
00011  * 
00012  *       SLM:     any RO and RMW access
00013  *       Global memory: 
00014  *         - Any RO and RMW access done with A64 addressing model
00015  *         - Only RO accesses done with any other addressing model
00016  */
00017 
00018 #include <cstring>
00019 #include <map>
00020 #include <algorithm>
00021 
00022 #include "memory_check.h"
00023 #include "gen_send_decoder.h"
00024 
00025 #include "gtpin_api.h"
00026 #include "gtpin_tool_utils.h"
00027 #include "ged.h"
00028 
00029 /* ============================================================================================= */
00030 // Configuration
00031 /* ============================================================================================= */
00032 Knob<bool> KNOB_NO_COUT("no_cout", false, "Do not send profiling results to standard output device");
00033 
00034 /* ============================================================================================= */
00035 // Class MemAccess
00036 /* ============================================================================================= */
00037 /// Information about memory access instruction
00038 struct MemAccess
00039 {
00040     /// Constructor. If memory access is unsupported, the reason can be queried by Error()
00041     explicit MemAccess(const IGtIns &ins);
00042 
00043     bool         IsValid()      const { return _isValid; }            ///< @return true for a supported memory access
00044     InsId        Id()           const { return _insId; }              ///< @return Instruction ID
00045     GtAccessType AccessType()   const { return _accessType; }         ///< @return Access type: read, write or read-write
00046     GtRegNum     FirstDstReg()  const { return _firstDstReg; }        ///< @return First register in the destination payload
00047     GtRegNum     FirstAddrReg() const { return _firstAddrReg; }       ///< @return First register in the address payload
00048     uint32_t     NumAccesses()  const { return _numAccesses; }        ///< @return Number of elements in the address payload
00049     uint32_t     DataSize()     const { return _dataSize; }           ///< @return Data size, in bytes, per each address
00050     bool         IsSlm()        const { return _isSlm; }              ///< @return true for SLM access
00051     const GtMemoryAddrModel& AddrModel() const { return _addrModel; } ///< @return Address model of the memory access
00052     const std::string&       Error()     const { return _errMsg; }    ///< @return Error message on unsupported memory access
00053 
00054     /*!
00055      * @return Arguments of the HLI function that detects OOB violations in this memory access
00056      * @note This object owns the MemoryCheckArgs structure, but its content is controlled externally
00057      */
00058     const MemoryCheckArgs& GetMemoryCheckArgs() const { return _mcArgs; }
00059     MemoryCheckArgs&       GetMemoryCheckArgs()       { return _mcArgs; }
00060 
00061 private:
00062     bool                _isValid = false;       ///< True, if this structure represents supported memory access
00063     InsId               _insId;                 ///< ID of the memory access instruction
00064     GtAccessType        _accessType;            ///< Access type: read-only, write-only or read-write
00065     GtMemoryAddrModel   _addrModel;             ///< Address model of the memory access
00066     GtRegNum            _firstDstReg;           ///< First register in the address payload of the instruction
00067     GtRegNum            _firstAddrReg;          ///< First register in the address payload of the instruction
00068     uint32_t            _numAccesses = 0;       ///< Number of elements in the address payload of the instruction
00069     uint32_t            _dataSize = 0;          ///< Size, in bytes, of the memory range referenced by a single address
00070     MemoryCheckArgs     _mcArgs;                ///< Common arguments of memory check functions
00071 
00072     std::string         _errMsg;                ///< Error message on unsupported memory access
00073     bool                _isSlm = false;         ///< True if the memory access is to SLM
00074 };
00075 
00076 /* ============================================================================================= */
00077 // Struct ProfileResults
00078 /* ============================================================================================= */
00079 /*!
00080  * Profile results per kernel dispatch / per insruction
00081  */
00082 struct ProfileResults
00083 {
00084     ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess);
00085 
00086     uint64_t          dispatchId;     ///< Unique ID of the kernel dispatch assigned by GTPin
00087     GtKernelExecDesc  kernelExecDesc; ///< Kernel execution descriptor
00088     InsId             insId;          ///< ID of the memory access instruction
00089     uint32_t          umrCount;       ///< Counter of UMR violations in memory accesses performed by the instruction
00090 };
00091 
00092 /* ============================================================================================= */
00093 // Class KernelProfile
00094 /* ============================================================================================= */
00095 /// Static properties of the kernel, and its profile data updated on each kernel run
00096 class KernelProfile
00097 {
00098 public:
00099     using MemAccessMap = std::map<InsId, MemAccess>;              ///< Information about memory accesses by kernel instructions
00100 
00101 public:
00102     KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg);    ///< Constructor
00103 
00104     inline GtKernelId             Id()                 const;        ///< @return Unique identifier of the kernel
00105     inline GtGpuPlatform          Platform()           const;        ///< @return Kernel's platform
00106     inline const std::string&     Name()               const;        ///< @return Name of the kernel
00107     inline const std::string&     UniqueName()         const;        ///< @return Unique name of the kernel
00108     inline std::string            MemoryCheckResults() const;        ///< @return Profiling results of kernel runs, in text format
00109     inline void                   DumpAsm()            const;        ///< Store kernel's assembly text in the file
00110     inline const MemAccessMap&    GetMemAccessMap()    const;        ///< @return Information about memory accesses in the kernel
00111     inline MemAccessMap&          GetMemAccessMap();                 ///< @return Information about memory accesses in the kernel
00112     inline const SlmInitArg&      GetSlmInitArg()      const;        ///< @return argument for SLM initialization function
00113     inline SlmInitArg&            GetSlmInitArg();                   ///< @return argument for SLM initialization function
00114 
00115     void RecordMemoryCheckResults(IGtKernelDispatch& dispatcher); ///< Update profile data with the latest memory check results
00116 
00117     void RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg); ///< Record unsupported instruction
00118     void RecordUnhandledAccess(InsId insId, const std::string& errMsg);              ///< Record unhandled memory access
00119 
00120 private:
00121     GtKernelId      _id;                        ///< Unique identifier of the kernel
00122     GtGpuPlatform   _platform;                  ///< Kernel's platform
00123     std::string     _name;                      ///< Name of the kernel
00124     std::string     _uniqueName;                ///< Unique name of the kernel
00125     std::string     _asmText;                   ///< Assembly text of the kernel
00126     std::string     _unhandledAccesses;         ///< List of unhadled memory accesses, in text format
00127 
00128     SlmInitArg                 _slmInitMemArg;  ///< Argument for SLM initialization function
00129     std::map<InsId, MemAccess> _memAccessMap;   ///< Map: Instruction ID to memory access information
00130     std::list<ProfileResults>  _profileResults; ///< Profile results per kernel dispatch / per insruction 
00131 };
00132 
00133 /* ============================================================================================= */
00134 // Class MemoryCheck
00135 /* ============================================================================================= */
00136 /*!
00137  * A tool that detects uininitialized memory read (UMR) accesses in kernels
00138  */
00139 class MemoryCheck : public GtTool
00140 {
00141 public:
00142     // Implementation of the IGtTool interface
00143     const char* Name()          const                   override { return "Memory check"; }
00144     void        OnKernelBuild(IGtKernelInstrument&)     override;
00145     void        OnKernelRun(IGtKernelDispatch&)         override;
00146     void        OnKernelComplete(IGtKernelDispatch&)    override;
00147 
00148     void                LoadHliLibrary();                   ///< Compile and load library of HLI functions
00149     static MemoryCheck* Instance();                         ///< Return single instance of this class
00150     static void         OnFini()    { Instance()->Fini(); } ///< Termination handler registered with atexit()
00151 
00152 private:
00153     
00154     MemoryCheck();                                          ///< Default constructor
00155     MemoryCheck(const MemoryCheck&) = delete;               ///< Disabled copy constructor
00156     MemoryCheck& operator = (const MemoryCheck&) = delete;  ///< Disabled assignment operator
00157     ~MemoryCheck();                                         ///< Destructor
00158     void Fini();                                            ///< Post process and dump profiling data
00159 
00160     /*!
00161      * Insert a call to HLI function that detects uininitialized memory read (UMR) memory accesses in the specified instruction
00162      * @param[in]   ins             The memory access instruction
00163      * @param[in]   memAccess       Information about memory access
00164      * @param[in]   instrumentor    Instrumentation interface
00165      * @return true - success, false - the instruction or memory operation is not supported
00166      */
00167     bool InsertMemoryCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor);
00168     bool InsertSlmInit(const IGtIns& ins, const SlmInitArg& slmInitArg, IGtKernelInstrument& instrumentor);
00169 
00170 private:
00171     // Memory check functions
00172     CheckReadAccessFunc                 _checkReadAccessFunc;
00173     CheckSlmReadAccessFunc              _checkSlmReadAccessFunc;
00174     CheckUgmA64ReadAccessFunc           _checkUgmA64ReadAccessFunc;
00175     InitSlmFunc                         _initSlmFunc;
00176 
00177     IGtHliModuleHandle                  _hliModule = nullptr;        ///< Module of HLI functions
00178     std::map<GtKernelId, KernelProfile> _kernels;                    ///< Collection of kernel profiles
00179 };
00180 
00181 /* ============================================================================================= */
00182 // MemAccess implementation
00183 /* ============================================================================================= */
00184 MemAccess::MemAccess(const IGtIns &ins) : _insId(ins.Id())
00185 {
00186     // Get and check data port (SFID)
00187     GtSfid sfid = ins.Sfid();
00188     if ((sfid != GED_SFID_UGM) && (sfid != GED_SFID_SLM) && (sfid != GED_SFID_DP_DC0) && (sfid != GED_SFID_DP_DC1))
00189     {
00190         _errMsg = "Unsupported data port " + std::string(sfid.ToString());
00191         return;
00192     }
00193 
00194     bool isHdc = (sfid == GED_SFID_DP_DC0) || (sfid == GED_SFID_DP_DC1);
00195 
00196     _isSlm = (sfid == GED_SFID_SLM);
00197 
00198     // Retrieve message descriptor
00199     if (!ins.MsgDescRegFile().IsImm())
00200     {
00201         _errMsg = "SEND message descriptor is not immediate";
00202         return;
00203     }
00204 
00205     // Check opcode of the memory operation
00206     GED_DP_OPCODE opcode = ins.DPOpCode();
00207     bool          isAtomic   = ins.IsAtomic();
00208     bool          isLoad = (isHdc && !isAtomic && ins.HasDstOperand() && ins.DstRegFile().IsGrf()) || (opcode == GED_DP_OPCODE_LOAD);
00209     bool          isStore = (isHdc && !isAtomic && !ins.HasDstOperand() && ins.DstOperand().Reg().IsNullReg()) || (opcode == GED_DP_OPCODE_STORE);
00210     _accessType = (isLoad ? GT_ACCESS_READ : (isStore ? GT_ACCESS_WRITE : GT_ACCESS_READ_WRITE));
00211 
00212     if (!isLoad && !isAtomic)
00213     {
00214         _errMsg = "Unsupported SEND operation (not load/atomic)";
00215         return;
00216     }
00217 
00218     // Initialize address model
00219     _addrModel = ins.MemAddrModel();
00220     if (!_addrModel.IsValid())
00221     {
00222         _errMsg = "Unsupported/unknown address model"; // @fixme Support BSS model
00223         return;
00224     }
00225 
00226     // Finally, initialize the rest of data members...
00227     GTPIN_ASSERT(ins.SrcRegFile(0).IsGrf());
00228     _firstDstReg  = ins.HasDstOperand() ? ins.DstOperand().Reg().RegNum() : GtRegNum();
00229     _firstAddrReg = ins.SrcRegOperand(0).Reg().RegNum(); 
00230     _numAccesses  = ins.NumAccesses(); GTPIN_ASSERT(_numAccesses != 0);
00231 
00232     DcSendMsg msg = DcSendMsg::Decode(ins.GetGedIns());
00233     _dataSize = msg.ElementSize() * msg.NumElements();
00234 
00235     if (_dataSize == 0)
00236     {
00237         _errMsg = "Unsupported SEND operation";
00238         return;
00239     }
00240 
00241     if (_accessType == GT_ACCESS_READ_WRITE)
00242     {
00243         if ((_dataSize != 4 && _dataSize != 8) || (!_addrModel.IsSlm() && !_addrModel.IsA64()))
00244         {
00245             _errMsg = "Unsupported Read-Modify-Write instruction: only Dword and Qword data sizes are supported for SLM and A64 addressing modes";
00246             return;
00247         }
00248     }
00249 
00250     if (_dataSize & 0x3)
00251     {
00252         _errMsg = "Unsupported data size: " + DecStr(_dataSize) + "(should be multiplication of dword)";
00253         return;
00254     }
00255 
00256     _isValid = true;
00257 }
00258 
00259 /* ============================================================================================= */
00260 // ProfileResults implementation
00261 /* ============================================================================================= */
00262 ProfileResults::ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess) :
00263     dispatchId(dispatcher.DispatchId()), insId(memAccess.Id()), umrCount(memAccess.GetMemoryCheckArgs().out.umrCount)
00264 {
00265     dispatcher.GetExecDescriptor(kernelExecDesc);
00266 }
00267 
00268 /* ============================================================================================= */
00269 // KernelProfile implementation
00270 /* ============================================================================================= */
00271 KernelProfile::KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg) :
00272     _id(kernel.Id()), _platform(kernel.GpuPlatform()), _name(GlueString(kernel.Name())), _uniqueName(kernel.UniqueName()),
00273     _asmText(CfgAsmText(cfg))
00274 {
00275     // Populate this object with the information about memory accesses
00276     for (auto bblPtr : cfg.Bbls())
00277     {
00278         for (auto insPtr : bblPtr->Instructions())
00279         {
00280             const IGtIns& ins = *insPtr;
00281             if (ins.IsMemAccess() && !ins.IsEot())
00282             {
00283                 MemAccess memAccess(ins);
00284 
00285                 if (!memAccess.IsValid() && memAccess.AccessType().IsWriteOnly())
00286                 {
00287                     continue;
00288                 }
00289 
00290                 if (memAccess.IsValid())
00291                 {
00292                     _memAccessMap.emplace(ins.Id(), memAccess);
00293                 }
00294                 else
00295                 {
00296                     RecordUnsupportedInstruction(ins, memAccess.Error());
00297                 }
00298             }
00299         }
00300     }
00301 }
00302 
00303 GtKernelId         KernelProfile::Id()          const { return _id; }
00304 GtGpuPlatform      KernelProfile::Platform()    const { return _platform; }
00305 const std::string& KernelProfile::Name()        const { return _name; }
00306 const std::string& KernelProfile::UniqueName()  const { return _uniqueName; }
00307 void               KernelProfile::DumpAsm()     const { DumpKernelAsmText(_name, _uniqueName, _asmText); }
00308 const KernelProfile::MemAccessMap& KernelProfile::GetMemAccessMap() const { return _memAccessMap; }
00309 KernelProfile::MemAccessMap&       KernelProfile::GetMemAccessMap()       { return _memAccessMap; }
00310 const SlmInitArg& KernelProfile::GetSlmInitArg() const { return _slmInitMemArg; }
00311 SlmInitArg&       KernelProfile::GetSlmInitArg()       { return _slmInitMemArg; }
00312 
00313 std::string KernelProfile::MemoryCheckResults() const
00314 {
00315     std::ostringstream os;
00316 
00317     os << std::string(120, '-') << std::endl;
00318     os << std::setw(4) << _id << ":    " << _name << "___" << _uniqueName << std::endl;
00319     os << std::string(120, '-') << std::endl;
00320 
00321     if (!_unhandledAccesses.empty())
00322     {
00323         os << " Unhandled memory accesses:" << std::endl;
00324         os << " --------------------------" << std::endl;
00325         os << _unhandledAccesses << std::endl;
00326         os << std::string(120, '-') << std::endl;
00327     }
00328 
00329     uint64_t umrTotal = 0;
00330     for (const auto& res : _profileResults)
00331     {
00332         if (res.umrCount != 0)
00333         {
00334             if (umrTotal == 0)
00335             {
00336                 os << std::setw(20) << "Dispatch ID" << std::setw(20) << "Instruction ID" << std::setw(20) << "#UMR violations";
00337                 os << " " << std::setw(45) << "Execution descriptor" << std::endl;
00338                 os << std::string(120, '-') << std::endl;
00339             }
00340             os << std::setw(20) << res.dispatchId << std::setw(20) << res.insId << std::setw(20) << res.umrCount;
00341             os << " " << std::setw(45) << res.kernelExecDesc.ToString(_platform, ExecDescAlignedFormat()) << std::endl;
00342 
00343             umrTotal += res.umrCount;
00344         }
00345     }
00346     if (umrTotal == 0)
00347     {
00348         os << "No UMR accesses detected" << std::endl;
00349     }
00350     return os.str();
00351 }
00352 
00353 void KernelProfile::RecordMemoryCheckResults(IGtKernelDispatch& dispatcher)
00354 {
00355     for (auto& entry: _memAccessMap)
00356     {
00357         MemAccess& memAccess = entry.second;
00358         _profileResults.emplace_back(dispatcher, memAccess);
00359     }
00360 }
00361 
00362 void KernelProfile::RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg)
00363 {
00364     if (!errMsg.empty())
00365     {
00366         std::ostringstream os;
00367         os << errMsg << ": [" << std::setw(3) << ins.Id() << "] " << ins.ToString() << std::endl;
00368         _unhandledAccesses.append(os.str());
00369     }
00370 }
00371 
00372 void KernelProfile::RecordUnhandledAccess(InsId insId, const std::string& errMsg)
00373 {
00374     if (!errMsg.empty())
00375     {
00376         std::ostringstream os;
00377         os << errMsg << ": Instruction ID: [" << std::setw(3) << insId << "] " << std::endl;
00378         _unhandledAccesses.append(os.str());
00379     }
00380 }
00381 
00382 /* ============================================================================================= */
00383 // MemoryCheck implementation
00384 /* ============================================================================================= */
00385 MemoryCheck::MemoryCheck() :
00386     _checkReadAccessFunc("CheckReadAccess"),
00387     _checkSlmReadAccessFunc("CheckSlmReadAccess"),
00388     _checkUgmA64ReadAccessFunc("CheckUgmA64ReadAccess"),
00389     _initSlmFunc("InitSlm") {}
00390 MemoryCheck::~MemoryCheck() {}
00391 
00392 MemoryCheck* MemoryCheck::Instance()
00393 {
00394     static MemoryCheck instance;
00395     return &instance;
00396 }
00397 
00398 void MemoryCheck::OnKernelBuild(IGtKernelInstrument& instrumentor)
00399 {
00400     const IGtKernel& kernel     = instrumentor.Kernel();
00401     const IGtCfg&    cfg        = instrumentor.Cfg();
00402     IGtMemoryMapper& memMapper  = instrumentor.MemoryMapper();
00403 
00404     // Create profile for this kernel
00405     auto result = _kernels.emplace(std::piecewise_construct,
00406                                    std::forward_as_tuple(instrumentor.Kernel().Id()),
00407                                    std::forward_as_tuple(kernel, cfg));
00408     KernelProfile& kernelProfile = result.first->second;
00409 
00410     bool hasSlm = false;
00411 
00412     // Instrument memory accesses and share per-access arguments with HLI functions
00413     for (const auto& entry : kernelProfile.GetMemAccessMap())
00414     {
00415         const auto& memAccess = entry.second;
00416         auto        insId     = entry.first;
00417 
00418         if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId))
00419         {
00420             continue;
00421         }
00422         const IGtIns& ins = cfg.GetInstruction(insId);
00423 
00424         hasSlm |= memAccess.IsSlm();
00425 
00426         auto accessType = memAccess.AccessType();
00427         if (accessType == GT_ACCESS_READ || memAccess.IsSlm() || memAccess.AddrModel().IsA64())
00428         {
00429             InsertMemoryCheck(ins, memAccess, instrumentor);
00430         }
00431         else
00432         {
00433             GTPIN_ERROR();
00434         }
00435 
00436         // Share per-access HLI arguments.
00437         // They will be initialized at the start of the kernel, and copied back to the host memory at completion of the kernel
00438         memMapper.Map(memAccess.GetMemoryCheckArgs(), GT_MMAP_SHARE);
00439     }
00440 
00441     if (hasSlm)
00442     {
00443         for (auto& bblPtr : cfg.EntryBbls())
00444         {
00445             const IGtIns& ins = bblPtr->FirstIns();
00446             InsertSlmInit(ins, kernelProfile.GetSlmInitArg(), instrumentor);
00447             memMapper.Map(kernelProfile.GetSlmInitArg(), GT_MMAP_SHARE);
00448         }
00449     }
00450 
00451     // Link the kernel with the library of HLI functions
00452     instrumentor.LinkHliModule(_hliModule);
00453 }
00454 
00455 void MemoryCheck::OnKernelRun(IGtKernelDispatch& dispatcher)
00456 {
00457     const IGtKernel& kernel        = dispatcher.Kernel();
00458     KernelProfile&   kernelProfile = _kernels.at(kernel.Id());
00459 
00460     if (dispatcher.ExecStage().IsDispatch())
00461     {
00462         GtKernelExecDesc execDesc; dispatcher.GetExecDescriptor(execDesc);
00463         if (kernel.IsInstrumented() && IsKernelExecProfileEnabled(execDesc, kernel.GpuPlatform(), kernel.Name().Get()))
00464         {
00465             dispatcher.SetProfilingMode(true);  // Enable instrumentation
00466 
00467             //// This tool needs an accurate information about memory allocations, which is available on the the final dispatch stage.
00468             //// So, on the initial dispatch stage, we only enable instrumentation, and request GTPin to invoke MemoryCheck::OnKernelRun
00469             //// one more time, on the final dispatch stage. If this request is accepted, the initialization of the profile buffer will
00470             //// be done on the final dispatch stage, otherwise - on the intial dispatch stage.
00471             //if (dispatcher.ReportFinalDispatchStage())
00472             //{
00473             //    return;
00474             //}
00475         }
00476         else
00477         {
00478             dispatcher.SetProfilingMode(false); // Disable instrumentation
00479             return;
00480         }
00481     }
00482 
00483     IGtMemoryMapper& memMapper = dispatcher.MemoryMapper();
00484 
00485     bool hasSlm = false;
00486 
00487     // Initialize per-access arguments of HLI functions
00488     for (auto& entry: kernelProfile.GetMemAccessMap())
00489     {
00490         auto        insId = entry.first;
00491 
00492         if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId))
00493         {
00494             continue;
00495         }
00496 
00497         MemAccess&        memAccess = entry.second;
00498         MemoryCheckArgs&  mcArgs    = memAccess.GetMemoryCheckArgs();
00499 
00500         hasSlm |= memAccess.IsSlm();
00501 
00502         mcArgs.out.umrCount    = 0;
00503         mcArgs.in.dataSize     = memAccess.DataSize();
00504         mcArgs.in.numAccesses  = memAccess.NumAccesses();
00505 
00506         memMapper.Write(&mcArgs, sizeof(mcArgs));
00507     }
00508 
00509     if (hasSlm)
00510     {
00511         uint32_t slmSize = dispatcher.SlmSize(); GTPIN_ASSERT(slmSize);
00512 
00513         SlmInitArg& slmInitMemArgs = kernelProfile.GetSlmInitArg();
00514         slmInitMemArgs.in.size = slmSize;
00515 
00516         memMapper.Write(&slmInitMemArgs, sizeof(SlmInitArg));
00517     }
00518 }
00519 
00520 void MemoryCheck::OnKernelComplete(IGtKernelDispatch& dispatcher)
00521 {
00522     if (dispatcher.IsProfilingEnabled())
00523     {
00524         KernelProfile& kernelProfile = _kernels.at(dispatcher.Kernel().Id());
00525         kernelProfile.RecordMemoryCheckResults(dispatcher);
00526     }
00527 }
00528 
00529 bool MemoryCheck::InsertMemoryCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor)
00530 {
00531     GTPIN_ASSERT(memAccess.IsValid() && (memAccess.Id() == ins.Id()));
00532 
00533     uint32_t numAccesses = memAccess.NumAccesses();
00534     if (numAccesses == 0)
00535     {
00536         return false; // Nothing to check
00537     }
00538 
00539     const IGtKernel&         kernel               = instrumentor.Kernel();
00540     const IGtGenModel&       genModel             = kernel.GenModel();
00541     uint32_t                 regSize              = genModel.GrfRegSize();
00542     const GtMemoryAddrModel& addrModel            = memAccess.AddrModel();
00543     uint32_t                 addrSize             = addrModel.PtrSize();
00544     GtReg                    firstAddrReg         = GrfReg(memAccess.FirstAddrReg(), 0, regSize);
00545     GtReg                    firstDstReg          = GrfReg(memAccess.FirstDstReg(), 0, regSize);
00546     uint32_t                 dataSize             = memAccess.DataSize();
00547     uint32_t                 numOfElements        = memAccess.NumAccesses();
00548     uint32_t                 numDstRegs           = RoundUp(numOfElements * dataSize, regSize) / regSize;
00549     uint32_t                 numAddrRegs          = RoundUp(numOfElements * addrSize, regSize) / regSize;
00550     MemoryCheckArgs*         checkArgs            = const_cast<MemoryCheckArgs*>(&memAccess.GetMemoryCheckArgs());
00551     IargConstGrfRange        dstPayload(firstDstReg.RegNum(), numDstRegs);
00552     IargConstGrfRange        addrPayload(firstAddrReg.RegNum(), numAddrRegs);
00553     IargInsOpMask            accessMask(ins);
00554     IargSlmPtr               slmPtr;
00555 
00556     if (memAccess.IsSlm())
00557     {
00558         _checkSlmReadAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00559             NullReg(),          // Unused return value
00560             addrPayload,        // arg[1]: Base address of the accessed memory range
00561             accessMask,         // arg[2]: Per-channel mask of memory accesses
00562             checkArgs,          // arg[3]: Memory check arguments
00563             slmPtr              // arg[4]: SLM pointer
00564         );
00565     }
00566     else if (memAccess.AddrModel().IsA64())
00567     {
00568         _checkUgmA64ReadAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00569             NullReg(),          // Unused return value
00570             addrPayload,        // arg[1]: Base address of the accessed memory range
00571             accessMask,         // arg[2]: Per-channel mask of memory accesses
00572             checkArgs           // arg[3]: Memory check arguments
00573         );
00574     }
00575     else
00576     {
00577         _checkReadAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::After(),
00578             NullReg(),          // Unused return value
00579             dstPayload,         // arg[1]: Base address of the destination
00580             accessMask,         // arg[2]: Per-channel mask of memory accesses
00581             checkArgs           // arg[3]: Memory check arguments
00582         );
00583     }
00584     return true;
00585 }
00586 
00587 bool MemoryCheck::InsertSlmInit(const IGtIns& ins, const SlmInitArg& slmInitArg, IGtKernelInstrument& instrumentor)
00588 {
00589     IargSlmPtr slmPtr;
00590 
00591     _initSlmFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00592         NullReg(),          // Unused return value
00593         &slmInitArg,        // arg[1]: Slm initialization argument
00594         slmPtr              // arg[2]: SLM pointer
00595     );
00596 
00597     return true;
00598 }
00599 
00600 void MemoryCheck::LoadHliLibrary()
00601 {
00602     std::string modulePath = JoinPath(GetKnobValue<std::string>("installDir"), "Examples", "memory_check.cl");
00603     _hliModule = GTPin_GetCore()->HliLibrary().CompileModuleFromFile(modulePath.c_str());
00604     GTPIN_ASSERT_MSG(_hliModule != nullptr, "Could not load HLI module " + modulePath);
00605 }
00606 
00607 void MemoryCheck::Fini()
00608 {
00609     std::string str;
00610 
00611     // Dump profiling results and assembly code of all kernels
00612     for (const auto& entry : _kernels)
00613     {
00614         const auto& kernelProfile = entry.second;
00615         str += kernelProfile.MemoryCheckResults();
00616         kernelProfile.DumpAsm();
00617     }
00618 
00619     std::ofstream fs(JoinPath(GTPin_GetCore()->ProfileDir(), "memory_check.txt"));
00620     GTPIN_ASSERT(fs.is_open());
00621     fs << str;
00622 
00623     if (!KNOB_NO_COUT)
00624     {
00625         std::cout << str;
00626     }
00627 }
00628 
00629 /* ============================================================================================= */
00630 // GTPin_Entry
00631 /* ============================================================================================= */
00632 EXPORT_C_FUNC void GTPin_Entry(int argc, const char *argv[])
00633 {
00634     SetKnobValue<bool>(true, "uninitialized_buffers_check_on");
00635     ConfigureGTPin(argc, argv);
00636 
00637     // Register the tool (callbacks) with the GTPin core
00638     MemoryCheck::Instance()->Register();
00639 
00640     // Compile and load library of HLI functions
00641     MemoryCheck::Instance()->LoadHliLibrary();
00642 
00643     // Register the termination function
00644     atexit(MemoryCheck::OnFini);
00645 }

memory_check.cl - HLI function implementations in OpenCL.

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2024-2026 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*!
00008  * @file Library of High-Level Instrumentation (HLI) functions used by the memory_check tool
00009  */
00010 
00011 #include "hlif_basic_defs.h"
00012 #include "memory_check.h"
00013 
00014 /*!
00015  * @brief HLI function that initializes shared local memory
00016  * @see memory_check.h for details
00017  */
00018 IGC_STACK_CALL void InitSlm(__global const SlmInitArg* arg, __local uint32_t* slm)
00019 {
00020     uint32_t numOfDwordElements = arg->in.size >> 2;
00021 
00022     for (uint32_t i = 0; i != numOfDwordElements; ++i)
00023     {
00024         slm[i] = UNINITIALIZED_MEMORY_PATTERN;
00025     }
00026 
00027     barrier(CLK_LOCAL_MEM_FENCE);
00028 }
00029 
00030 /*!
00031  * @brief HLI function that detects uninitialized Shared Local Memory read accesses
00032  *        Only accesses that are a multiplication of dword are supported
00033  * @see memory_check.h for details
00034  */
00035 IGC_STACK_CALL void CheckSlmReadAccess(__global const uint32_t* offsets,
00036                                        uint32_t accessMask,
00037                                        __global MemoryCheckArgs* memoryCheckArgs,
00038                                        __local uint32_t* slm)
00039 {
00040     if (accessMask != 0)
00041     {
00042         uint32_t numOfElements = memoryCheckArgs->in.numAccesses;
00043         uint32_t dataSize      = memoryCheckArgs->in.dataSize;
00044 
00045         if (dataSize & 0x3)
00046         {
00047             return;
00048         }
00049 
00050         for (uint32_t eIndx = 0; eIndx != numOfElements; ++eIndx)
00051         {
00052             if ((accessMask & (0x1 << eIndx)) != 0)
00053             {
00054                 uint32_t numOfDwordSubElements = dataSize >> 2;
00055 
00056                 uint32_t offset = offsets[eIndx];
00057                 uint32_t index = offset >> 2;
00058 
00059                 for (uint32_t i = 0; i < numOfDwordSubElements; ++i)
00060                 {
00061                     uint32_t data = slm[index + i];
00062                     if (data == UNINITIALIZED_MEMORY_PATTERN)
00063                     {
00064                         atomic_inc(&(memoryCheckArgs->out.umrCount));
00065                     }
00066                 }
00067             }
00068         }
00069     }
00070 }
00071 
00072 /*!
00073  * @brief HLI function that detects uninitialized global memory read accesses done in A64 mode
00074  *        Only accesses that are a multiplication of dword are supported
00075  * @see memory_check.h for details
00076  */
00077 IGC_STACK_CALL void CheckUgmA64ReadAccess(__global const uint64_t* addresses,
00078                                           uint32_t accessMask,
00079                                           __global MemoryCheckArgs* memoryCheckArgs)
00080 {
00081     if (accessMask != 0)
00082     {
00083         uint32_t numOfElements = memoryCheckArgs->in.numAccesses;
00084         uint32_t dataSize      = memoryCheckArgs->in.dataSize;
00085 
00086         if (dataSize & 0x3)
00087         {
00088             return;
00089         }
00090 
00091         for (uint32_t eIndx = 0; eIndx != numOfElements; ++eIndx)
00092         {
00093             if ((accessMask & (0x1 << eIndx)) != 0)
00094             {
00095                 uint32_t numOfDwordSubElements = dataSize >> 2;
00096 
00097                 uint64_t addr = addresses[eIndx];
00098 
00099                 for (uint32_t i = 0; i < numOfDwordSubElements; ++i)
00100                 {
00101                     uint32_t data = *(uint32_t*)addr;
00102                     if (data == UNINITIALIZED_MEMORY_PATTERN)
00103                     {
00104                         atomic_inc(&(memoryCheckArgs->out.umrCount));
00105                     }
00106                     addr += 4;
00107                 }
00108             }
00109         }
00110     }
00111 }
00112 
00113 /*!
00114  * @brief HLI function that detects uninitialized memory read accesses
00115  *        Only accesses that are a multiplication of dword are supported
00116  * @see memory_check.h for details
00117  */
00118 IGC_STACK_CALL void CheckReadAccess(__global const uint32_t* dstPayload,
00119                                     uint32_t accessMask,
00120                                     __global MemoryCheckArgs* memoryCheckArgs)
00121 {
00122     if (accessMask != 0)
00123     {
00124         uint32_t numOfElements = memoryCheckArgs->in.numAccesses;
00125         uint32_t dataSize      = memoryCheckArgs->in.dataSize;
00126 
00127         if (dataSize & 0x3)
00128         {
00129             return;
00130         }
00131 
00132         for (uint32_t eIndx = 0; eIndx != numOfElements; ++eIndx)
00133         {
00134             if ((accessMask & (0x1 << eIndx)) != 0)
00135             {
00136                 uint32_t numOfDwordSubElements = dataSize >> 2;
00137 
00138                 for (uint32_t i = 0; i < numOfDwordSubElements; ++i)
00139                 {
00140                     uint32_t data = dstPayload[eIndx * numOfDwordSubElements + i];
00141                     if (data == UNINITIALIZED_MEMORY_PATTERN)
00142                     {
00143                         atomic_inc(&(memoryCheckArgs->out.umrCount));
00144                     }
00145                 }
00146             }
00147         }
00148     }
00149 }

(Back to the list of all GTPin Sample Tools)


 All Data Structures Functions Variables Typedefs Enumerations Enumerator


  Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT