GTPin
GTPin: Bounds_check Sample Tool

The Bounds_check is a GTPin tool for profiling out-of-bounds accesses to memory buffers in GPU kernels. The tool leverages High Level Instrumentation interface (HLIF).

Running the Bounds_check tool

To run Bounds_check tool use the following command:

Profilers/Bin/gtpin -t bounds_check [bounds_check args] [GTPin args]  -- app [application args]

Configuration options

Example Output

The output example below shows the results of profiling the following kernel

__kernel void vector_copy(__global char* outB, __global char* inB, int oob_offset)
{
  uint gid = get_global_id(0);
  outB[gid] = inB[gid + oob_offset];
}

which was run with oob_offset value 45:

------------------------------------------------------------------------------------------------------------------------
   0:    vector_copy___CS_asm950d7dfbd2b6e54b_simd32_950d7dfbd2b6e54b_0
------------------------------------------------------------------------------------------------------------------------
         Dispatch ID      Instruction ID     #OOB violations                          Execution descriptor
------------------------------------------------------------------------------------------------------------------------
                   0                  43                  16               0         3         0         0
                   0                  44                  29               0         3         0         0

The instruction #43 performed 16 and instruction #44 performed 29 out-of-bounds accesses. Overall, the kernel accessed out of buffer bounds 45 times.

(Back to the list of all GTPin Sample Tools)

bounds_check.h - Data structures and HLI function declarations.

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2024-2026 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*!
00008  * @file A tool that detects out-of-bounds (OOB) memory accesses in kernels
00009  */
00010 
00011 #ifndef BOUNDS_CHECK_H_
00012 #define BOUNDS_CHECK_H_
00013 
00014 #include "hlif_basic_defs.h"
00015 #include "block_2d.h"
00016 
00017 #if defined(__cplusplus)
00018 #include "gtpin_api.h"
00019 using namespace gtpin;
00020 #endif
00021 
00022 #pragma pack(push, 8)
00023 
00024 /* ============================================================================================= */
00025 // Struct MemRange
00026 /* ============================================================================================= */
00027 /// Range in the memory address space
00028 typedef struct MemRange
00029 {
00030     uintptr_t   base;   ///< Base address of the range
00031     size_t      size;   ///< Size of the range in bytes
00032 
00033     #if defined(__cplusplus)
00034     constexpr MemRange(uintptr_t b = 0, size_t s = 0) : base(b), size(s) {}     ///< Constructor
00035     constexpr MemRange(const AddrRange& r) : base(r.Base()), size(r.Size()) {}  ///< Constructor
00036     #endif
00037 } MemRange;
00038 
00039 /* ============================================================================================= */
00040 // Struct MemBounds
00041 /* ============================================================================================= */
00042 /// Flexible (variable size) array of memory ranges that represent bounds of available memory resources
00043 typedef struct MemBounds
00044 {
00045     uint32_t    numRanges;  ///< Number of elements in the 'ranges' array
00046     MemRange    ranges[1];  ///< Flexible array of memory ranges
00047 
00048     #if defined(__cplusplus)
00049     /// @return Size of this structure in bytes
00050     constexpr size_t SizeOf() const
00051     { return (sizeof(MemBounds) - sizeof(MemRange) + (numRanges * sizeof(MemRange))); }
00052     
00053     /// Given a number of memory ranges, return size of this structure in bytes
00054     static constexpr size_t SizeOf(uint32_t numRanges)
00055     { return (sizeof(MemBounds) - sizeof(MemRange) + (numRanges * sizeof(MemRange))); }
00056     #endif
00057 
00058 } MemBounds;
00059 
00060 /* ============================================================================================= */
00061 // Struct BoundsCheckArgs
00062 /* ============================================================================================= */
00063 /// Common arguments of HLI functions that detect OOB violations in memory access instructions
00064 typedef struct BoundsCheckArgs
00065 {
00066     struct
00067     {
00068         uint32_t    numAddresses;          ///< Number of elements in the address payload of the instruction
00069         uint32_t    dataSize;              ///< Size, in bytes, of the memory range referenced by the corresponding address
00070         uint32_t    surfaceSize;           ///< Size, in bytes, of the available memory space for a surface access.
00071                                            ///< This value is valid for stateful (BTS), bindless (SS), and SLM accesses only.
00072                                            ///< For other address models, it is set to UINT32_MAX.
00073     } in;
00074     struct
00075     {
00076         uint32_t    oobCount;              ///< Counter of detected OOB violations
00077     }out;
00078 
00079     #if defined(__cplusplus)
00080     /// Constructor
00081     BoundsCheckArgs(uint32_t nAddr = 0, uint32_t dSize = 0, uint32_t sSize = 0) :
00082         in({ nAddr, dSize, sSize }), out({ 0 }) {}
00083     #endif
00084 } BoundsCheckArgs;
00085 
00086 /* ============================================================================================= */
00087 // Function CheckA64Access
00088 /* ============================================================================================= */
00089 /*!
00090  * @brief HLI function that detects OOB violations in the FLAT A64 memory access
00091  * @param[in]       allocatedBuffers    Array of global global memory buffers available to the kernel
00092  * @param[in]       addr                Base address of the accessed memory range
00093  * @param[in]       accessMask          Per-channel mask of memory accesses
00094  * @param[in][out]  boundsCheckArgs     Information about memory access instruction and results of the bounds check
00095  */
00096 IGC_STACK_CALL void CheckA64Access(__global const MemBounds* allocatedBuffers,
00097                                    uint64_t addr,
00098                                    uint32_t accessMask,
00099                                    __global BoundsCheckArgs* boundsCheckArgs);
00100 #if defined(__cplusplus)
00101 using CheckA64AccessFunc = GtHliFunction<void, const MemBounds*, uint64_t, uint32_t, BoundsCheckArgs*>;
00102 #endif
00103 
00104 /* ============================================================================================= */
00105 // Function CheckA32Access
00106 /* ============================================================================================= */
00107 /*!
00108  * @brief HLI function that detects OOB violations in the FLAT A32 memory access
00109  * @copydetails CheckA64Access
00110  */
00111 IGC_STACK_CALL void CheckA32Access(__global const MemBounds* allocatedBuffers,
00112                                    uint32_t addr,
00113                                    uint32_t accessMask,
00114                                    __global BoundsCheckArgs* boundsCheckArgs);
00115 #if defined(__cplusplus)
00116 using CheckA32AccessFunc = GtHliFunction<void, const MemBounds*, uint32_t, uint32_t, BoundsCheckArgs*>;
00117 #endif
00118 
00119 /* ============================================================================================= */
00120 // Function CheckSurfaceAccess
00121 /* ============================================================================================= */
00122 /*!
00123  * @brief HLI function that detects OOB violations in the surface (BTS/SLM/scratch) memory access
00124  * @param[in]       offset              Offset of the accessed memory range within the surface space
00125  * @param[in]       accessMask          Per-channel mask of memory accesses
00126  * @param[in][out]  boundsCheckArgs     Information about memory access instruction and results of the bounds check
00127  */
00128 IGC_STACK_CALL void CheckSurfaceAccess(uint32_t offset, uint32_t accessMask, __global BoundsCheckArgs* boundsCheckArgs);
00129 #if defined(__cplusplus)
00130 using CheckSurfaceAccessFunc = GtHliFunction<void, uint32_t, uint32_t, BoundsCheckArgs*>;
00131 #endif
00132 
00133 /* ============================================================================================= */
00134 // Function CheckA64ScatteredAccess
00135 /* ============================================================================================= */
00136 /*!
00137  * @brief HLI function that detects OOB violations in the scattered FLAT A64 memory access
00138  * @param[in]       allocatedBuffers    Array of global memory buffers available to the kernel
00139  * @param[in]       addrPayload         Array of base addresses of accessed memory ranges
00140  * @param[in]       accessMask          Per-channel mask of memory accesses
00141  * @param[in][out]  boundsCheckArgs     Information about memory access instruction and results of the bounds check
00142  */
00143 IGC_STACK_CALL void CheckA64ScatteredAccess(__global const MemBounds* allocatedBuffers,
00144                                             __global const uint64_t* addrPayload,
00145                                             uint32_t accessMask,
00146                                             __global BoundsCheckArgs* boundsCheckArgs);
00147 #if defined(__cplusplus)
00148 using CheckA64ScatteredAccessFunc = GtHliFunction<void, const MemBounds*, const uint64_t*, uint32_t, BoundsCheckArgs*>;
00149 #endif
00150 
00151 /* ============================================================================================= */
00152 // Function CheckA32ScatteredAccess
00153 /* ============================================================================================= */
00154 /*!
00155  * @brief HLI function that detects OOB violations in the scattered FLAT A32 memory access
00156  * @copydetails CheckA64ScatteredAccess
00157  */
00158 IGC_STACK_CALL void CheckA32ScatteredAccess(__global const MemBounds* allocatedBuffers,
00159                                             __global const uint32_t* addrPayload,
00160                                             uint32_t accessMask,
00161                                             __global BoundsCheckArgs* boundsCheckArgs);
00162 #if defined(__cplusplus)
00163 using CheckA32ScatteredAccessFunc = GtHliFunction<void, const MemBounds*, const uint32_t*, uint32_t, BoundsCheckArgs*>;
00164 #endif
00165 
00166 /* ============================================================================================= */
00167 // Function CheckSurfaceScatteredAccess
00168 /* ============================================================================================= */
00169 /*!
00170  * @brief HLI function that detects OOB violations in the scattered surface (BTS/SLM/scratch) memory access
00171  * @param[in]       offsets         Array of offsets of accessed memory ranges within the surface space
00172  * @param[in]       accessMask          Per-channel mask of memory accesses
00173  * @param[in][out]  boundsCheckArgs     Information about memory access instruction and results of the bounds check
00174  */
00175 IGC_STACK_CALL void CheckSurfaceScatteredAccess(__global const uint32_t* offsets,
00176                                                 uint32_t accessMask,
00177                                                 __global BoundsCheckArgs* boundsCheckArgs);
00178 #if defined(__cplusplus)
00179 using CheckSurfaceScatteredAccessFunc = GtHliFunction<void, const uint32_t*, uint32_t, BoundsCheckArgs*>;
00180 #endif
00181 
00182 /*!
00183  * @brief HLI function that detects OOB violations in the block 2D memory access
00184  * @param[in]       allocatedBuffers    Array of global memory buffers available to the kernel
00185  * @param[in]       block               Pointer to the block 2D structure representing the accessed memory range
00186  * @param[in]       accessMask          Per-channel mask of memory accesses
00187  * @param[in][out]  boundsCheckArgs     Information about memory access instruction and results of the bounds check
00188  */
00189 IGC_STACK_CALL void CheckBlock2DAccess(__global const MemBounds* allocatedBuffers,
00190                                        __global const Block2D* block,
00191                                        uint32_t accessMask,
00192                                        __global BoundsCheckArgs* boundsCheckArgs);
00193 #if defined(__cplusplus)
00194 using CheckBlock2DAccessFunc = GtHliFunction<void, const MemBounds*, const uint64_t*, uint32_t, BoundsCheckArgs*>;
00195 #endif
00196 
00197 #pragma pack(pop)
00198 
00199 #endif

bounds_check.cpp - Tool implementation, instrumentation logic, and result aggregation.

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2024-2025 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*!
00008  * @file A tool that detects out-of-bounds (OOB) memory accesses in kernels
00009  */
00010 
00011 #include <cstring>
00012 #include <map>
00013 #include <vector>
00014 #include <set>
00015 
00016 #include "bounds_check.h"
00017 
00018 #include "gtpin_api.h"
00019 #include "gtpin_tool_utils.h"
00020 #include "gen_send_decoder.h"
00021 #include "ged.h"
00022 
00023 /* ============================================================================================= */
00024 // Configuration
00025 /* ============================================================================================= */
00026 Knob<int>  KNOB_MAX_KERNEL_BUFFERS("max_kernel_buffers", 256, "Maximum number of global buffers available to the kernel");
00027 Knob<bool> KNOB_CHECK_ACCESS_TYPE("check_access_type", false, "Check access type compatibility (readonly vs. write)");
00028 Knob<bool> KNOB_NO_COUT("no_cout", false, "Do not send profiling results to standard output device");
00029 
00030 /* ============================================================================================= */
00031 // Class MemAccess
00032 /* ============================================================================================= */
00033 /// Information about memory access instruction
00034 struct MemAccess
00035 {
00036     /// Constructor. If memory access is unsupported, the reason can be queried by Error()
00037     explicit MemAccess(const IGtIns &ins, const DcSendMsg &msg);
00038 
00039     bool         IsValid()      const { return _isValid; }            ///< @return true for a supported memory access
00040     InsId        Id()           const { return _insId; }              ///< @return Instruction ID
00041     GtAccessType AccessType()   const { return _accessType; }         ///< @return Access type: read, write or read-write
00042     GtRegNum     FirstAddrReg() const { return _firstAddrReg; }       ///< @return First register in the address payload
00043     uint32_t     NumAddresses() const { return _numAddresses; }       ///< @return Number of elements in the address payload
00044     uint32_t     DataSize()     const { return _dataSize; }           ///< @return Data size, in bytes, per each address
00045     const GtMemoryAddrModel& AddrModel() const { return _addrModel; } ///< @return Address model of the memory access
00046     const std::string&       Error()     const { return _errMsg; }    ///< @return Error message on unsupported memory access
00047 
00048     /*!
00049      * @return Arguments of the HLI function that detects OOB violations in this memory access
00050      * @note This object owns the BoundsCheckArgs structure, but its content is controlled externally
00051      */
00052     const BoundsCheckArgs& GetBoundsCheckArgs() const { return _bcArgs; }
00053     BoundsCheckArgs&       GetBoundsCheckArgs()       { return _bcArgs; }
00054 
00055 private:
00056     bool                _isValid = false;       ///< True, if this structure represents supported memory access
00057     InsId               _insId;                 ///< ID of the memory access instruction
00058     GtAccessType        _accessType;            ///< Access type: read-only, write-only or read-write
00059     GtMemoryAddrModel   _addrModel;             ///< Address model of the memory access
00060     GtRegNum            _firstAddrReg;          ///< First register in the address payload of the instruction
00061     uint32_t            _numAddresses = 0;      ///< Number of elements in the address payload of the instruction
00062     uint32_t            _dataSize = 0;          ///< Size, in bytes, of the memory range referenced by a single address
00063     BoundsCheckArgs     _bcArgs;                ///< Common arguments of bounds check functions
00064     std::string         _errMsg;                ///< Error message on unsupported memory access
00065 };
00066 
00067 /* ============================================================================================= */
00068 // Struct ProfileResults
00069 /* ============================================================================================= */
00070 /*!
00071  * Profile results per kernel dispatch / per instuction
00072  */
00073 struct ProfileResults
00074 {
00075     ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess);
00076 
00077     uint64_t          dispatchId;          ///< Unique ID of the kernel dispatch assigned by GTPin
00078     GtKernelExecDesc  kernelExecDesc;      ///< Kernel execution descriptor
00079     InsId             insId;               ///< ID of the memory access instruction
00080     uint32_t          surfaceSize;         ///< Size, in bytes, of the available memory space for a surface access
00081     uint32_t          oobCount;            ///< Counter of OOB violations in memory accesses performed by the instruction
00082 };
00083 
00084 /* ============================================================================================= */
00085 // Class KernelProfile
00086 /* ============================================================================================= */
00087 /// Static properties of the kernel, and its profile data updated on each kernel run
00088 class KernelProfile
00089 {
00090 public:
00091     using MemAccessMap = std::map<InsId, MemAccess>;              ///< Information about memory accesses by kernel instructions
00092 
00093 public:
00094     KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg);    ///< Constructor
00095 
00096     inline GtKernelId          Id()                 const;        ///< @return Unique identifier of the kernel
00097     inline GtGpuPlatform       Platform()           const;        ///< @return Kernel's platform
00098     inline const std::string&  Name()               const;        ///< @return Name of the kernel
00099     inline const std::string&  UniqueName()         const;        ///< @return Unique name of the kernel
00100     inline std::string         BoundsCheckResults() const;        ///< @return Profiling results of kernel runs, in text format
00101     inline void                DumpAsm()            const;        ///< Store kernel's assembly text in the file
00102     inline const MemAccessMap& GetMemAccessMap()    const;        ///< @return Information about memory accesses in the kernel
00103     inline MemAccessMap&       GetMemAccessMap();                 ///< @return Information about memory accesses in the kernel
00104 
00105     void RecordBoundsCheckResults(IGtKernelDispatch& dispatcher); ///< Update profile data with the latest bounds check results
00106 
00107     void RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg); ///< Record unsupported instruction
00108 private:
00109     GtKernelId      _id;                                                  ///< Unique identifier of the kernel
00110     GtGpuPlatform   _platform;                                            ///< Kernel's platform
00111     std::string     _name;                                                ///< Name of the kernel
00112     std::string     _uniqueName;                                          ///< Unique name of the kernel
00113     std::string     _asmText;                                             ///< Assembly text of the kernel
00114     std::vector<std::pair<InsId, std::string>> _unsupportedInstructions;  ///< Instructions that could not be instrumented
00115 
00116     std::map<InsId, MemAccess> _memAccessMap;   ///< Map: Instruction ID to memory access information
00117     std::list<ProfileResults>  _profileResults; ///< Profile results per kernel dispatch / per instuction 
00118 };
00119 
00120 /* ============================================================================================= */
00121 // Class BoundsCheck
00122 /* ============================================================================================= */
00123 /*!
00124  * A tool that detects out-of-bounds (OOB) memory accesses in kernels
00125  */
00126 class BoundsCheck : public GtTool
00127 {
00128 public:
00129     // Implementation of the IGtTool interface
00130     const char* Name()          const                   override { return "Bounds check"; }
00131     void        OnKernelBuild(IGtKernelInstrument&)     override;
00132     void        OnKernelRun(IGtKernelDispatch&)         override;
00133     void        OnKernelComplete(IGtKernelDispatch&)    override;
00134 
00135     void                LoadHliLibrary();                   ///< Compile and load library of HLI functions
00136     static BoundsCheck* Instance();                         ///< Return single instance of this class
00137     static void         OnFini()    { Instance()->Fini(); } ///< Termination handler registered with atexit()
00138 
00139 private:
00140     
00141     BoundsCheck();                                          ///< Default constructor
00142     BoundsCheck(const BoundsCheck&) = delete;               ///< Disabled copy constructor
00143     BoundsCheck& operator = (const BoundsCheck&) = delete;  ///< Disabled assignment operator
00144     ~BoundsCheck();                                         ///< Destructor
00145     void Fini();                                            ///< Post process and dump profiling data
00146 
00147     /*!
00148      * Insert a call to HLI function that detects out-of-bounds (OOB) memory accesses in the specified instruction
00149      * @param[in]   ins             The memory access instruction
00150      * @param[in]   memAccess       Information about memory access
00151      * @param[in]   instrumentor    Instrumentation interface
00152      * @return true - success, false - the instruction or memory operation is not supported
00153      */
00154     bool InsertBoundsCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor);
00155 
00156     /*!
00157      * Given BTI index, return memory buffer referenced by the corresponding stateful argument.
00158      * Return empty buffer, if BTI index cannot be dereferenced
00159      */
00160     GtMemoryBuffer GetBtiBuffer(uint32_t bti, const GtMemoryBufferConstSpan& allocatedBuffers,
00161                                 const IGtKernelDispatch& dispatcher);
00162 
00163     /// @return Pointer to a buffer in the 'buffers' sequence that contains 'ptr', or nullptr if buffer is not found
00164     const GtMemoryBuffer* FindBuffer(const GtMemoryBufferConstSpan& buffers, uintptr_t ptr);
00165 
00166 private:
00167     // Bounds check functions
00168     CheckA64AccessFunc                  _checkA64AccessFunc;
00169     CheckA32AccessFunc                  _checkA32AccessFunc;
00170     CheckSurfaceAccessFunc              _checkSurfaceAccessFunc;
00171     CheckA64ScatteredAccessFunc         _checkA64ScatteredAccessFunc;
00172     CheckA32ScatteredAccessFunc         _checkA32ScatteredAccessFunc;
00173     CheckSurfaceScatteredAccessFunc     _checkSurfaceScatteredAccessFunc;
00174     CheckBlock2DAccessFunc              _checkBlock2DAccessFunc;
00175 
00176     /// Global memory buffers available to the kernel; shared and passed as an argument to bounds check functions
00177     MemBounds*                          _allocatedBuffers = nullptr; ///< All allocated buffers
00178     MemBounds*                          _writableBuffers  = nullptr; ///< Allocated writable buffers
00179 
00180     IGtHliModuleHandle                  _hliModule = nullptr;        ///< Module of HLI functions
00181     std::map<GtKernelId, KernelProfile> _kernels;                    ///< Collection of kernel profiles
00182 };
00183 
00184 /* ============================================================================================= */
00185 // MemAccess implementation
00186 /* ============================================================================================= */
00187 MemAccess::MemAccess(const IGtIns &ins, const DcSendMsg &msg) : _insId(ins.Id())
00188 {
00189 
00190 
00191     // Get and check data port (SFID)
00192     GtSfid sfid = ins.Sfid();
00193     bool isHdc = (sfid == GED_SFID_DP_DC0) || (sfid == GED_SFID_DP_DC1);
00194     if ((sfid != GED_SFID_UGM) && (sfid != GED_SFID_SLM) && !isHdc)
00195     {
00196         _errMsg = "Unsupported data port " + std::string(sfid.ToString());
00197         return;
00198     }
00199 
00200     // Retrieve message descriptor
00201     if (!ins.MsgDescRegFile().IsImm())
00202     {
00203         _errMsg = "SEND message descriptor is not immediate";
00204         return;
00205     }
00206 
00207     // Check opcode of the memory operation
00208     GED_DP_OPCODE opcode     = ins.DPOpCode();
00209     bool          isLoadLsc  = (opcode == GED_DP_OPCODE_LOAD) || (opcode == GED_DP_OPCODE_LOAD_2D_BLOCK);
00210     bool          isStoreLsc = (opcode == GED_DP_OPCODE_STORE) || (opcode == GED_DP_OPCODE_STORE_2D_BLOCK);
00211     bool          isAtomic   = ins.IsAtomic();
00212     bool          isLoad     = isLoadLsc || (isHdc && !isAtomic && ins.HasDstOperand() && ins.DstRegFile().IsGrf());
00213     bool          isStore    = isStoreLsc || (isHdc && !isAtomic && ins.DstOperand().Reg().IsNullReg());
00214 
00215     if (!isLoad && !isStore && !isAtomic)
00216     {
00217         _errMsg = "Unsupported SEND operation (not load/store/atomic)";
00218         return;
00219     }
00220 
00221     // Initialize address model
00222     _addrModel = ins.MemAddrModel();
00223     if (!_addrModel.IsValid() || _addrModel.IsBss())
00224     {
00225         _errMsg = "Unsupported/unknown address model"; // @fixme Support BSS model
00226         return;
00227     }
00228 
00229     // Finally, initialize the rest of data members...
00230     GTPIN_ASSERT(ins.SrcRegFile(0).IsGrf());
00231     _firstAddrReg = ins.SrcRegOperand(0).Reg().RegNum(); 
00232     _numAddresses = ins.NumAccesses();              GTPIN_ASSERT(_numAddresses != 0);
00233     _dataSize     = msg.ElementSize() * msg.NumElements();            GTPIN_ASSERT(_dataSize != 0);
00234     _accessType   = (isLoad ? GT_ACCESS_READ : (isStore ? GT_ACCESS_WRITE : GT_ACCESS_READ_WRITE));
00235 
00236     _isValid = true;
00237 }
00238 
00239 /* ============================================================================================= */
00240 // ProfileResults implementation
00241 /* ============================================================================================= */
00242 ProfileResults::ProfileResults(const IGtKernelDispatch& dispatcher, const MemAccess& memAccess) :
00243     dispatchId(dispatcher.DispatchId()), insId(memAccess.Id()),
00244     surfaceSize(memAccess.GetBoundsCheckArgs().in.surfaceSize),
00245     oobCount(memAccess.GetBoundsCheckArgs().out.oobCount)
00246 {
00247     dispatcher.GetExecDescriptor(kernelExecDesc);
00248 }
00249 
00250 /* ============================================================================================= */
00251 // KernelProfile implementation
00252 /* ============================================================================================= */
00253 KernelProfile::KernelProfile(const IGtKernel& kernel, const IGtCfg& cfg) :
00254     _id(kernel.Id()), _platform(kernel.GpuPlatform()), _name(GlueString(kernel.Name())), _uniqueName(kernel.UniqueName()),
00255     _asmText(CfgAsmText(cfg))
00256 {
00257     // Populate this object with the information about memory accesses
00258     for (auto bblPtr : cfg.Bbls())
00259     {
00260         for (auto insPtr : bblPtr->Instructions())
00261         {
00262             const IGtIns& ins = *insPtr;
00263             const DcSendMsg msg = DcSendMsg::Decode(ins.GetGedIns());
00264             // Exclude EOT and fence instructions; fence is used for synchronization, not direct memory access.
00265             if (ins.IsMemAccess() && !ins.IsEot() && !msg.IsMemFence())
00266             {
00267                 MemAccess memAccess(ins, msg);
00268                 if (memAccess.IsValid())
00269                 {
00270                     _memAccessMap.emplace(ins.Id(), memAccess);
00271                 }
00272                 else
00273                 {
00274                     RecordUnsupportedInstruction(ins, memAccess.Error());
00275                 }
00276             }
00277         }
00278     }
00279 }
00280 
00281 GtKernelId         KernelProfile::Id()          const { return _id; }
00282 GtGpuPlatform      KernelProfile::Platform()    const { return _platform; }
00283 const std::string& KernelProfile::Name()        const { return _name; }
00284 const std::string& KernelProfile::UniqueName()  const { return _uniqueName; }
00285 void               KernelProfile::DumpAsm()     const { DumpKernelAsmText(_name, _uniqueName, _asmText); }
00286 const KernelProfile::MemAccessMap& KernelProfile::GetMemAccessMap() const { return _memAccessMap; }
00287 KernelProfile::MemAccessMap&       KernelProfile::GetMemAccessMap()       { return _memAccessMap; }
00288 
00289 std::string KernelProfile::BoundsCheckResults() const
00290 {
00291     std::ostringstream os;
00292 
00293     os << std::string(120, '-') << std::endl;
00294     os << std::setw(4) << _id << ":    " << _name << "___" << _uniqueName << std::endl;
00295     os << std::string(120, '-') << std::endl;
00296 
00297     // Print suspicious instructions explanation (zero surface size), if needed
00298     bool printSuspicious = false;
00299     for (const auto& res : _profileResults)
00300     {
00301         if (res.oobCount != 0 && res.surfaceSize == 0)
00302         {
00303             os << std::endl << "Note: Z marks out-of-bounds accesses to zero-sized surfaces (stateful/BTS, bindless/SS, or SLM)." << std::endl << std::endl;
00304             printSuspicious = true;
00305             break;
00306         }
00307     }
00308 
00309     uint32_t oobTotal = 0;
00310     for (const auto& res : _profileResults)
00311     {
00312         if (res.oobCount != 0)
00313         {
00314             if (oobTotal == 0)
00315             {
00316                 os << std::setw(20) << "Dispatch ID" << std::setw(20) << "Instruction ID" << std::setw(20) << "#OOB violations";
00317                 os << " " << std::setw(45) << "Execution descriptor";
00318                 if (printSuspicious)
00319                 {
00320                     os << std::setw(20) << "0-sized Surface";
00321                 }
00322                 os << std::endl;
00323                 os << std::string(printSuspicious ? 140 : 120, '-') << std::endl;
00324             }
00325             os << std::setw(20) << res.dispatchId << std::setw(20) << res.insId << std::setw(20) << res.oobCount;
00326             os << " " << std::setw(45) << res.kernelExecDesc.ToString(_platform, ExecDescAlignedFormat());
00327             if (printSuspicious && res.surfaceSize == 0)
00328             {
00329                 os << std::setw(20) << "Z";
00330             }
00331             else
00332             {
00333                 os << std::setw(20) << " ";
00334             }
00335             os << std::endl;
00336             oobTotal += res.oobCount;
00337         }
00338     }
00339     if (oobTotal == 0)
00340     {
00341         os << "No OOB accesses detected" << std::endl;
00342     }
00343 
00344     // Print unsupported instructions
00345     if (!_unsupportedInstructions.empty())
00346     {
00347         os << std::endl;
00348         os << " ---------------------------" << std::endl;
00349         os << " Unsupported memory accesses:" << std::endl;
00350         os << " ---------------------------" << std::endl;
00351         for (const auto& entry : _unsupportedInstructions)
00352         {
00353             os << entry.second << ": Instruction ID: [" << std::setw(3) << entry.first << "]" << std::endl;
00354         }
00355         os << std::string(120, '-') << std::endl;
00356     }
00357 
00358     return os.str();
00359 }
00360 
00361 void KernelProfile::RecordBoundsCheckResults(IGtKernelDispatch& dispatcher)
00362 {
00363     for (auto& entry: _memAccessMap)
00364     {
00365         MemAccess& memAccess = entry.second;
00366         _profileResults.emplace_back(dispatcher, memAccess);
00367     }
00368 }
00369 
00370 void KernelProfile::RecordUnsupportedInstruction(const IGtIns& ins, const std::string& errMsg)
00371 {
00372     if (!errMsg.empty())
00373     {
00374         _unsupportedInstructions.emplace_back(ins.Id(), errMsg);
00375     }
00376 }
00377 
00378 /* ============================================================================================= */
00379 // BoundsCheck implementation
00380 /* ============================================================================================= */
00381 BoundsCheck::BoundsCheck() : _checkA64AccessFunc("CheckA64Access"),
00382                              _checkA32AccessFunc("CheckA32Access"),
00383                              _checkSurfaceAccessFunc("CheckSurfaceAccess"),
00384                              _checkA64ScatteredAccessFunc("CheckA64ScatteredAccess"),
00385                              _checkA32ScatteredAccessFunc("CheckA32ScatteredAccess"),
00386                              _checkSurfaceScatteredAccessFunc("CheckSurfaceScatteredAccess"),
00387                              _checkBlock2DAccessFunc("CheckBlock2DAccess")
00388 {
00389     size_t size = MemBounds::SizeOf(KNOB_MAX_KERNEL_BUFFERS);
00390     #if (_GTPIN_CPP_STD >= 17)
00391         constexpr std::align_val_t alignment { alignof(MemBounds) };
00392         _allocatedBuffers = reinterpret_cast<MemBounds*>(operator new [](size, alignment));
00393         _writableBuffers  = reinterpret_cast<MemBounds*>(operator new [](size, alignment));
00394     #else
00395         _allocatedBuffers = reinterpret_cast<MemBounds*>(operator new [](size));
00396         _writableBuffers  = reinterpret_cast<MemBounds*>(operator new [](size));
00397     #endif
00398 }
00399 
00400 BoundsCheck::~BoundsCheck()
00401 {
00402     #if (_GTPIN_CPP_STD >= 17)
00403         constexpr std::align_val_t alignment { alignof(MemBounds) };
00404         operator delete [](_allocatedBuffers, alignment);
00405         operator delete [](_writableBuffers, alignment);
00406     #else
00407         operator delete [](_allocatedBuffers);
00408         operator delete [](_writableBuffers);
00409     #endif
00410 }
00411 
00412 BoundsCheck* BoundsCheck::Instance()
00413 {
00414     static BoundsCheck instance;
00415     return &instance;
00416 }
00417 
00418 void BoundsCheck::OnKernelBuild(IGtKernelInstrument& instrumentor)
00419 {
00420     const IGtKernel& kernel     = instrumentor.Kernel();
00421     const IGtCfg&    cfg        = instrumentor.Cfg();
00422     IGtMemoryMapper& memMapper  = instrumentor.MemoryMapper();
00423 
00424     // Create profile for this kernel
00425     auto result = _kernels.emplace(std::piecewise_construct,
00426                                    std::forward_as_tuple(instrumentor.Kernel().Id()),
00427                                    std::forward_as_tuple(kernel, cfg));
00428     KernelProfile& kernelProfile = result.first->second;
00429 
00430     // Handle common instrumentation knobs
00431     HandleCommonInstrumnetationKnobs(instrumentor);
00432     // Instrument memory accesses and share per-access arguments with HLI functions
00433     for (const auto& entry : kernelProfile.GetMemAccessMap())
00434     {
00435         const auto& memAccess = entry.second;
00436         auto        insId     = entry.first;
00437 
00438         if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId))
00439         {
00440             continue;
00441         }
00442         const IGtIns& ins = cfg.GetInstruction(insId);
00443         InsertBoundsCheck(ins, memAccess, instrumentor);
00444 
00445         // Share per-access HLI arguments.
00446         // They will be initialized at the start of the kernel, and copied back to the host memory at completion of the kernel
00447         memMapper.Map(memAccess.GetBoundsCheckArgs(), GT_MMAP_SHARE);
00448     }
00449 
00450     // Map the array of global buffers available to kernel. This is an HLI argument common to all global memory accesses.
00451     // The array will be populated at the start of the kernel.
00452     uint32_t sizeOfMemBounds = (uint32_t)MemBounds::SizeOf(KNOB_MAX_KERNEL_BUFFERS);
00453     memMapper.Map(_allocatedBuffers, sizeOfMemBounds, GT_MMAP_NO_SHARE, alignof(MemBounds));
00454     if (KNOB_CHECK_ACCESS_TYPE)
00455     {
00456         memMapper.Map(_writableBuffers, sizeOfMemBounds, GT_MMAP_NO_SHARE, alignof(MemBounds));
00457     }
00458 
00459     // Link the kernel with the library of HLI functions
00460     instrumentor.LinkHliModule(_hliModule);
00461 }
00462 
00463 void BoundsCheck::OnKernelRun(IGtKernelDispatch& dispatcher)
00464 {
00465     const IGtKernel& kernel        = dispatcher.Kernel();
00466     if (_kernels.find(kernel.Id()) == _kernels.end())
00467     {
00468         return;
00469     }
00470 
00471     KernelProfile&   kernelProfile = _kernels.at(kernel.Id());
00472 
00473     if (dispatcher.ExecStage().IsDispatch())
00474     {
00475         GtKernelExecDesc execDesc; dispatcher.GetExecDescriptor(execDesc);
00476         if (kernel.IsInstrumented() && IsKernelExecProfileEnabled(execDesc, kernel.GpuPlatform(), kernel.Name().Get()))
00477         {
00478             dispatcher.SetProfilingMode(true);  // Enable instrumentation
00479 
00480             // This tool needs an accurate information about memory allocations, which is available on the final dispatch stage.
00481             // So, on the initial dispatch stage, we only enable instrumentation, and request GTPin to invoke BoundsCheck::OnKernelRun
00482             // one more time, on the final dispatch stage. If this request is accepted, the initialization of the profile buffer will
00483             // be done on the final dispatch stage, otherwise - on the initial dispatch stage.
00484             if (dispatcher.ReportFinalDispatchStage())
00485             {
00486                 return;
00487             }
00488         }
00489         else
00490         {
00491             dispatcher.SetProfilingMode(false); // Disable instrumentation
00492             return;
00493         }
00494     }
00495 
00496     IGtMemoryMapper& memMapper = dispatcher.MemoryMapper();
00497 
00498     // Populate and share array of global buffers available to the kernel
00499     GtMemoryBufferConstSpan allocatedBuffers = dispatcher.GetAllocatedBuffers();
00500     GTPIN_ASSERT_MSG(allocatedBuffers.size() <= uint32_t(KNOB_MAX_KERNEL_BUFFERS),
00501                      "Number of global buffers " + ToString(allocatedBuffers.size()) +
00502                      " exceeded " + ToString(KNOB_MAX_KERNEL_BUFFERS.GetValue()) + ". Increase max_kernel_buffers value");
00503 
00504     _allocatedBuffers->numRanges = 0;
00505     for (const auto& buffer : allocatedBuffers)
00506     {
00507         _allocatedBuffers->ranges[_allocatedBuffers->numRanges++] = buffer.Range();
00508     }
00509     memMapper.Write(_allocatedBuffers, (uint32_t)_allocatedBuffers->SizeOf());
00510 
00511     if (KNOB_CHECK_ACCESS_TYPE)
00512     {
00513         _writableBuffers->numRanges = 0;
00514         for (const auto& buffer : allocatedBuffers)
00515         {
00516             if (buffer.Access().IsWrite())
00517             {
00518                 _writableBuffers->ranges[_writableBuffers->numRanges++] = buffer.Range();
00519             }
00520         }
00521         memMapper.Write(_writableBuffers, (uint32_t)_writableBuffers->SizeOf());
00522     }
00523 
00524     // Initialize per-access arguments of HLI functions
00525     for (auto& entry: kernelProfile.GetMemAccessMap())
00526     {
00527         auto        insId = entry.first;
00528 
00529         if (int32_t(insId) < knobMinInstrumentIns || knobMaxInstrumentIns < int32_t(insId))
00530         {
00531             continue;
00532         }
00533 
00534         MemAccess&        memAccess = entry.second;
00535         GtMemoryAddrModel addrModel = memAccess.AddrModel();
00536         BoundsCheckArgs&  bcArgs    = memAccess.GetBoundsCheckArgs();
00537 
00538         bcArgs.out.oobCount    = 0;
00539         bcArgs.in.dataSize     = memAccess.DataSize();
00540         bcArgs.in.numAddresses = memAccess.NumAddresses();
00541 
00542         // Compute size of the available memory space for BTS, SLM and scratch accesses
00543         bcArgs.in.surfaceSize = UINT32_MAX;
00544         if (addrModel.IsSlm())
00545         {
00546             bcArgs.in.surfaceSize = dispatcher.SlmSize();
00547         }
00548         else if (addrModel.IsSurfaceState())
00549         {
00550             bcArgs.in.surfaceSize = dispatcher.ScratchSpaceSize(); // Assuming SS address model used in scratch access only
00551         }
00552         else if (addrModel.IsBts())
00553         {
00554             GtMemoryBuffer buffer = GetBtiBuffer(addrModel.Bti(), allocatedBuffers, dispatcher);
00555             if (!buffer.IsEmpty() && (!KNOB_CHECK_ACCESS_TYPE || buffer.Access().Includes(memAccess.AccessType())))
00556             {
00557                 bcArgs.in.surfaceSize = (uint32_t)buffer.Size(); GTPIN_ASSERT(buffer.Size() <= UINT32_MAX);
00558             }
00559         }
00560 
00561         memMapper.Write(&bcArgs, sizeof(bcArgs));
00562     }
00563 }
00564 
00565 void BoundsCheck::OnKernelComplete(IGtKernelDispatch& dispatcher)
00566 {
00567     if (dispatcher.IsProfilingEnabled())
00568     {
00569         KernelProfile& kernelProfile = _kernels.at(dispatcher.Kernel().Id());
00570         kernelProfile.RecordBoundsCheckResults(dispatcher);
00571     }
00572 }
00573 
00574 bool BoundsCheck::InsertBoundsCheck(const IGtIns &ins, const MemAccess& memAccess, IGtKernelInstrument& instrumentor)
00575 {
00576     GTPIN_ASSERT(memAccess.IsValid() && (memAccess.Id() == ins.Id()));
00577 
00578     uint32_t numAddresses = memAccess.NumAddresses();
00579     if (numAddresses == 0)
00580     {
00581         return false; // Nothing to check
00582     }
00583 
00584     const IGtKernel&         kernel               = instrumentor.Kernel();
00585     const IGtGenModel&       genModel             = kernel.GenModel();
00586     uint32_t                 regSize              = genModel.GrfRegSize();
00587     const GtMemoryAddrModel& addrModel            = memAccess.AddrModel();
00588     uint32_t                 addrSize             = addrModel.PtrSize();
00589     GtReg                    firstReg             = GrfReg(memAccess.FirstAddrReg(), 0, regSize);
00590     uint32_t                 numRegs              = RoundUp(memAccess.NumAddresses() * addrSize, regSize) / regSize;
00591     BoundsCheckArgs*         checkArgs            = const_cast<BoundsCheckArgs*>(&memAccess.GetBoundsCheckArgs());
00592     bool                     checkWritableBuffers = (KNOB_CHECK_ACCESS_TYPE && memAccess.AccessType().IsWrite());
00593     MemBounds*               globalBuffers        = (checkWritableBuffers ? _writableBuffers : _allocatedBuffers);
00594     IargConstGrfRange        addrPayload(firstReg.RegNum(), numRegs);
00595     IargInsOpMask            accessMask(ins);
00596 
00597     if (addrModel.IsA64())
00598     {
00599         GTPIN_ASSERT(addrSize == sizeof(uint64_t));
00600 
00601         if(ins.IsBlock2DAccess())
00602         {
00603             _checkBlock2DAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00604                                 NullReg(),          // Unused return value
00605                                 globalBuffers,      // arg[0]: Available global buffers
00606                                 addrPayload,        // arg[1]: Address payload
00607                                 accessMask,         // arg[2]: Per-channel mask of memory accesses
00608                                 checkArgs           // arg[3]: Bounds check arguments
00609                                 );
00610         }
00611         else if (numAddresses == 1)
00612         {
00613             _checkA64AccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00614                                 NullReg(),          // Unused return value
00615                                 globalBuffers,      // arg[0]: Available global buffers
00616                                 firstReg,           // arg[1]: Base address of the accessed memory range
00617                                 accessMask,         // arg[2]: Per-channel mask of memory accesses
00618                                 checkArgs           // arg[3]: Bounds check arguments
00619                                 );
00620         }
00621         else
00622         {
00623             _checkA64ScatteredAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00624                                 NullReg(),          // Unused return value
00625                                 globalBuffers,      // arg[0]: Available global buffers
00626                                 addrPayload,        // arg[1]: Base addresses of accessed memory ranges
00627                                 accessMask,         // arg[2]: Per-channel mask of memory accesses
00628                                 checkArgs           // arg[3]: Bounds check arguments
00629                                 );
00630         }
00631     }
00632     else if (addrModel.IsA32())
00633     {
00634         GTPIN_ASSERT(addrSize == sizeof(uint32_t));
00635 
00636         if (numAddresses == 1)
00637         {
00638             _checkA32AccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00639                                 NullReg(),          // Unused return value
00640                                 globalBuffers,      // arg[0]: Available global buffers
00641                                 firstReg,           // arg[1]: Base address of the accessed memory range
00642                                 accessMask,         // arg[2]: Per-channel mask of memory accesses
00643                                 checkArgs           // arg[3]: Bounds check arguments
00644                                 );
00645         }
00646         else
00647         {
00648             _checkA32ScatteredAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00649                                 NullReg(),          // Unused return value
00650                                 globalBuffers,      // arg[0]: Available global buffers
00651                                 addrPayload,        // arg[1]: Base addresses of accessed memory ranges
00652                                 accessMask,         // arg[2]: Per-channel mask of memory accesses
00653                                 checkArgs           // arg[3]: Bounds check arguments
00654                                 );
00655         }
00656     }
00657     else
00658     {
00659         GTPIN_ASSERT(addrSize == sizeof(uint32_t));
00660         GTPIN_ASSERT_MSG(addrModel.IsBts() || addrModel.IsSlm() || addrModel.IsBss() || addrModel.IsSurfaceState(),
00661                          "Unknown address model: " + addrModel.ToString());
00662 
00663         if (numAddresses == 1)
00664         {
00665             _checkSurfaceAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00666                                 NullReg(),          // Unused return value
00667                                 firstReg,           // arg[0]: Base address of the accessed memory range
00668                                 accessMask,         // arg[1]: Per-channel mask of memory accesses
00669                                 checkArgs           // arg[2]: Bounds check arguments
00670                                 );
00671         }
00672         else
00673         {
00674             _checkSurfaceScatteredAccessFunc.InsertCallAtInstruction(instrumentor, ins, GtIpoint::Before(),
00675                                 NullReg(),          // Unused return value
00676                                 addrPayload,        // arg[0]: Base addresses of accessed memory ranges
00677                                 accessMask,         // arg[1]: Per-channel mask of memory accesses
00678                                 checkArgs           // arg[2]: Bounds check arguments
00679                                 );
00680         }
00681     }
00682     return true;
00683 }
00684 
00685 GtMemoryBuffer BoundsCheck::GetBtiBuffer(uint32_t bti, const GtMemoryBufferConstSpan& allocatedBuffers,
00686                                          const IGtKernelDispatch& dispatcher)
00687 {
00688     Uint32Validatable<>  argIndex;
00689     UintPtrValidatable<> argPtr;
00690 
00691     for (const auto& arg : dispatcher.Kernel().PayloadArguments())
00692     {
00693         if (arg.IsMemoryPointer() && arg.IsExplicit() && arg.addrModel.IsValid() && (arg.addrModel.Bti() == bti))
00694         {
00695             ConstByteSpan argValue = dispatcher.GetPayloadArgumentValue(arg.index);
00696             if (argValue.size() == sizeof(uintptr_t))
00697             {
00698                 argPtr   = *reinterpret_cast<const uintptr_t*>(argValue.data());
00699                 argIndex = arg.index;
00700             }
00701         }
00702     }
00703     if (!argPtr.IsValid())
00704     {
00705         return {};
00706     }
00707 
00708     const GtMemoryBuffer* buffer = FindBuffer(allocatedBuffers, argPtr);
00709     if (!buffer)
00710     {
00711         return {};
00712     }
00713 
00714     // IF buffer_offset is specified THEN
00715     //    BTI_base == allocation_base_of(arg_value), AND BTI_size = allocation_size_of(arg_value)
00716     // ELSE
00717     //    BTI_base == arg_value, AND BTI_size = allocation_size_of(arg_value) - (arg_value - allocation_base_of(arg_value))
00718 
00719     for (const auto& arg : dispatcher.Kernel().PayloadArguments())
00720     {
00721         if ((arg.index == argIndex) && (std::strcmp(arg.type, "buffer_offset") == 0))
00722         {
00723             return *buffer;
00724         }
00725     }
00726 
00727     size_t offset = argPtr - buffer->Base(); GTPIN_ASSERT(offset <= buffer->Size());
00728     return GtMemoryBuffer(argPtr, buffer->Size() - offset, buffer->Access());
00729 }
00730 
00731 const GtMemoryBuffer* BoundsCheck::FindBuffer(const GtMemoryBufferConstSpan& buffers, uintptr_t ptr)
00732 {
00733     for (const auto& buffer : buffers)
00734     {
00735         if (buffer.Range().Contains(ptr))
00736         {
00737             return &buffer;
00738         }
00739     }
00740     return nullptr;
00741 }
00742 
00743 void BoundsCheck::LoadHliLibrary()
00744 {
00745     std::string modulePath = JoinPath(GetKnobValue<std::string>("installDir"), "Examples", "bounds_check.cl");
00746     _hliModule = GTPin_GetCore()->HliLibrary().CompileModuleFromFile(modulePath.c_str());
00747     GTPIN_ASSERT_MSG(_hliModule != nullptr, "Could not load HLI module " + modulePath);
00748 }
00749 
00750 void BoundsCheck::Fini()
00751 {
00752     std::string str;
00753 
00754     // Dump profiling results and assembly code of all kernels
00755     for (const auto& entry : _kernels)
00756     {
00757         const auto& kernelProfile = entry.second;
00758         str += kernelProfile.BoundsCheckResults();
00759         kernelProfile.DumpAsm();
00760     }
00761 
00762     std::ofstream fs(JoinPath(GTPin_GetCore()->ProfileDir(), "bounds_check.txt"));
00763     GTPIN_ASSERT(fs.is_open());
00764     fs << str;
00765 
00766     if (!KNOB_NO_COUT)
00767     {
00768         std::cout << str;
00769     }
00770 }
00771 
00772 /* ============================================================================================= */
00773 // GTPin_Entry
00774 /* ============================================================================================= */
00775 EXPORT_C_FUNC void GTPin_Entry(int argc, const char *argv[])
00776 {
00777     // Parse command line and configure GTPin
00778     ConfigureGTPin(argc, argv);
00779 
00780     // Register the tool (callbacks) with the GTPin core
00781     BoundsCheck::Instance()->Register();
00782 
00783     // Compile and load library of HLI functions
00784     BoundsCheck::Instance()->LoadHliLibrary();
00785 
00786     // Register the termination function
00787     atexit(BoundsCheck::OnFini);
00788 }

bounds_check.cl - HLI function implementations in OpenCL.

00001 /*========================== begin_copyright_notice ============================
00002 Copyright (C) 2024-2026 Intel Corporation
00003 
00004 SPDX-License-Identifier: MIT
00005 ============================= end_copyright_notice ===========================*/
00006 
00007 /*!
00008  * @file Library of High-Level Instrumentation (HLI) functions used by the bounds_check tool
00009  */
00010 
00011 #include "hlif_basic_defs.h"
00012 #include "bounds_check.h"
00013 
00014 /*!
00015  * @brief HLI function that detects OOB violations in the FLAT A64 memory access
00016  * @see bounds_check.h for details
00017  */
00018 IGC_STACK_CALL void CheckA64Access(__global const MemBounds* allocatedBuffers,
00019                                    uint64_t addr,
00020                                    uint32_t accessMask,
00021                                    __global BoundsCheckArgs* boundsCheckArgs)
00022 {
00023     if ((accessMask & 0x1) != 0)
00024     {
00025         uint32_t numRanges = allocatedBuffers->numRanges;
00026         uint32_t dataSize  = boundsCheckArgs->in.dataSize;
00027 
00028         uint32_t rIdx = 0;
00029         for (; rIdx != numRanges; ++rIdx)
00030         {
00031             __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]);
00032             uintptr_t base = range->base;
00033             uintptr_t end  = base + range->size;
00034             if ((addr >= base) && (addr + dataSize <= end))
00035             {
00036                 return;
00037             }
00038         }
00039         atomic_inc(&(boundsCheckArgs->out.oobCount));
00040     }
00041 }
00042 
00043 /*!
00044  * @brief HLI function that detects OOB violations in the FLAT A32 memory access
00045  * @see bounds_check.h for details
00046  */
00047 IGC_STACK_CALL void CheckA32Access(__global const MemBounds* allocatedBuffers,
00048                                    uint32_t addr,
00049                                    uint32_t accessMask,
00050                                    __global BoundsCheckArgs* boundsCheckArgs)
00051 {
00052     if ((accessMask & 0x1) != 0)
00053     {
00054         uint32_t numRanges = allocatedBuffers->numRanges;
00055         uint32_t dataSize  = boundsCheckArgs->in.dataSize;
00056 
00057         uint32_t rIdx = 0;
00058         for (; rIdx != numRanges; ++rIdx)
00059         {
00060             __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]);
00061             uintptr_t base = range->base;
00062             uintptr_t end  = base + range->size;
00063             if ((addr >= base) && (addr + dataSize <= end))
00064             {
00065                 return;
00066             }
00067         }
00068         atomic_inc(&(boundsCheckArgs->out.oobCount));
00069     }
00070 }
00071 
00072 /*!
00073  * @brief HLI function that detects OOB violations in the surface (BTS/SLM/scratch) memory access
00074  * @see bounds_check.h for details
00075  */
00076 IGC_STACK_CALL void CheckSurfaceAccess(uint32_t offset, uint32_t accessMask, __global BoundsCheckArgs* boundsCheckArgs)
00077 {
00078     if ((accessMask & 0x1) != 0)
00079     {
00080         uint32_t dataSize    = boundsCheckArgs->in.dataSize;
00081         uint32_t surfaceSize = boundsCheckArgs->in.surfaceSize;
00082         if (offset + dataSize > surfaceSize)
00083         {
00084             atomic_inc(&(boundsCheckArgs->out.oobCount));
00085         }
00086     }
00087 }
00088 
00089 /*!
00090  * @brief HLI function that detects OOB violations in the scattered FLAT A64 memory access
00091  * @see bounds_check.h for details
00092  */
00093 IGC_STACK_CALL void CheckA64ScatteredAccess(__global const MemBounds* allocatedBuffers,
00094                                             __global const uint64_t* addrPayload,
00095                                             uint32_t accessMask,
00096                                             __global BoundsCheckArgs* boundsCheckArgs)
00097 {
00098     if (accessMask != 0)
00099     {
00100         uint32_t numRanges    = allocatedBuffers->numRanges;
00101         uint32_t numAddresses = boundsCheckArgs->in.numAddresses;
00102         uint32_t dataSize     = boundsCheckArgs->in.dataSize;
00103 
00104         uint32_t oobCount = 0;
00105 
00106         for (uint32_t aIdx = 0; aIdx != numAddresses; ++aIdx)
00107         {
00108             if ((accessMask & (0x1 << aIdx)) != 0)
00109             {
00110                 uint64_t addr = addrPayload[aIdx];
00111                 uint32_t rIdx = 0;
00112                 for (; rIdx != numRanges; ++rIdx)
00113                 {
00114                     __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]);
00115                     uintptr_t base = range->base;
00116                     uintptr_t end  = base + range->size;
00117                     if ((addr >= base) && (addr + dataSize <= end))
00118                     {
00119                         break;
00120                     }
00121                 }
00122                 if (rIdx == numRanges)
00123                 {
00124                     oobCount++;
00125                 }
00126             }
00127         }
00128         if (oobCount)
00129         {
00130             atomic_add(&(boundsCheckArgs->out.oobCount), oobCount);
00131         }
00132     }
00133 }
00134 
00135 /*!
00136  * @brief HLI function that detects OOB violations in the scattered FLAT A32 memory access
00137  * @see bounds_check.h for details
00138  */
00139 IGC_STACK_CALL void CheckA32ScatteredAccess(__global const MemBounds* allocatedBuffers,
00140                                             __global const uint32_t* addrPayload,
00141                                             uint32_t accessMask,
00142                                             __global BoundsCheckArgs* boundsCheckArgs)
00143 {
00144     if (accessMask != 0)
00145     {
00146         uint32_t numRanges    = allocatedBuffers->numRanges;
00147         uint32_t numAddresses = boundsCheckArgs->in.numAddresses;
00148         uint32_t dataSize     = boundsCheckArgs->in.dataSize;
00149 
00150         uint32_t oobCount = 0;
00151 
00152         for (uint32_t aIdx = 0; aIdx != numAddresses; ++aIdx)
00153         {
00154             if ((accessMask & (0x1 << aIdx)) != 0)
00155             {
00156                 uint32_t addr = addrPayload[aIdx];
00157                 uint32_t rIdx = 0;
00158                 for (; rIdx != numRanges; ++rIdx)
00159                 {
00160                     __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]);
00161                     uintptr_t base = range->base;
00162                     uintptr_t end  = base + range->size;
00163                     if ((addr >= base) && (addr + dataSize <= end))
00164                     {
00165                         break;
00166                     }
00167                 }
00168                 if (rIdx == numRanges)
00169                 {
00170                     oobCount++;
00171                 }
00172             }
00173         }
00174         if (oobCount)
00175         {
00176             atomic_add(&(boundsCheckArgs->out.oobCount), oobCount);
00177         }
00178     }
00179 }
00180 
00181 /*!
00182  * @brief HLI function that detects OOB violations in the scattered surface (BTS/SLM/scratch) memory access
00183  * @see bounds_check.h for details
00184  */
00185 IGC_STACK_CALL void CheckSurfaceScatteredAccess( __global const uint32_t* offsets,
00186                                                  uint32_t accessMask,
00187                                                  __global BoundsCheckArgs* boundsCheckArgs)
00188 {
00189     if (accessMask != 0)
00190     {
00191         uint32_t numAddresses = boundsCheckArgs->in.numAddresses;
00192         uint32_t dataSize     = boundsCheckArgs->in.dataSize;
00193         uint32_t surfaceSize  = boundsCheckArgs->in.surfaceSize;
00194 
00195         for (uint32_t aIdx = 0; aIdx != numAddresses; ++aIdx)
00196         {
00197             if (((accessMask & (0x1 << aIdx)) != 0) && (offsets[aIdx] + dataSize > surfaceSize))
00198             {
00199                 atomic_inc(&(boundsCheckArgs->out.oobCount));
00200                 return;
00201             }
00202         }
00203     }
00204 }
00205 
00206 /*!
00207  * @brief HLI function that detects OOB violations in 2D block memory access
00208  *
00209  * @param[in]       allocatedBuffers Pointer to the memory bounds structure containing allocated ranges
00210  * @param[in]       block            Pointer to the 2D block structure containing block dimensions and surface details
00211  * @param[in]       accessMask Mask  Per-channel mask of memory accesses
00212  * @param[in,out]   boundsCheckArgs  Pointer to the bounds check arguments structure
00213  *
00214  */
00215 IGC_STACK_CALL void CheckBlock2DAccess(__global const MemBounds* allocatedBuffers,
00216                                        __global const Block2D* block,
00217                                        uint32_t accessMask,
00218                                        __global BoundsCheckArgs* boundsCheckArgs)
00219 {
00220     if ((accessMask & 0x1) == 0)
00221     {
00222         return;
00223     }
00224 
00225     uint32_t numRanges          = allocatedBuffers->numRanges;
00226 
00227     uint64_t surfaceBaseAddress = block->surface_base_address;
00228     uint32_t surfaceHeight      = (block->surface_height & 0x00FFFFFF) + 1;
00229     uint32_t surfaceWidth       = (block->surface_width & 0x00FFFFFF) + 1;
00230     uint32_t surfacePitch       = (block->surface_pitch & 0x00FFFFFF) + 1;
00231     uint32_t numBlocks          = (block->array_length + 1);
00232     uint32_t blockHeight        = (block->block_height + 1);
00233     uint32_t blockWidth         = (block->block_width + 1);
00234     int32_t  blockStartX        = block->block_start_x;
00235     int32_t  blockStartY        = block->block_start_y;
00236 
00237     uint32_t dataSize           = boundsCheckArgs->in.dataSize;
00238 
00239     uint32_t lastX = blockStartX + numBlocks * blockWidth - 1;
00240     if (lastX * dataSize >= surfaceWidth)
00241     {
00242         lastX = (surfaceWidth / dataSize) - 1;
00243     }
00244 
00245     uint32_t lastY = blockStartY + blockHeight - 1;
00246     if (lastY >= surfaceHeight)
00247     {
00248         lastY = surfaceHeight - 1;
00249     }
00250 
00251     // Calculate the first and last byte address of the blocks being accessed
00252     uint64_t firstByteAddr = surfaceBaseAddress + blockStartY * surfacePitch + blockStartX * dataSize;
00253     uint64_t lastByteAddr  = surfaceBaseAddress + lastY * surfacePitch + lastX * dataSize -1;
00254 
00255     // Check if the access is within any single range
00256     for (uint32_t rIdx = 0; rIdx < numRanges; ++rIdx)
00257     {
00258         __global const MemRange* range = &(allocatedBuffers->ranges[rIdx]);
00259         uintptr_t rangeBase = range->base;
00260         uintptr_t rangeEnd  = rangeBase + range->size; 
00261 
00262         if ((firstByteAddr >= rangeBase) && (lastByteAddr < rangeEnd))
00263         {
00264             return;
00265         }
00266     }
00267     atomic_inc(&(boundsCheckArgs->out.oobCount));
00268 }

(Back to the list of all GTPin Sample Tools)


 All Data Structures Functions Variables Typedefs Enumerations Enumerator


  Copyright (C) 2013-2025 Intel Corporation
SPDX-License-Identifier: MIT