From c1151dd35969de27b9b5ef1127e940b98c44987b Mon Sep 17 00:00:00 2001 From: Amit Pandey Date: Mon, 7 Apr 2025 12:12:51 +0530 Subject: [PATCH] [OpenMP][Offload][ASan] HostCall support for ASan. Patch implements ASan hostcall support via GPU libc RPC. --- clang/lib/Driver/ToolChain.cpp | 2 + clang/lib/Driver/ToolChains/Gnu.cpp | 10 +- offload/libomptarget/exports | 2 + offload/plugins-nextgen/common/CMakeLists.txt | 2 + .../plugins-nextgen/common/include/Emissary.h | 9 +- .../plugins-nextgen/common/src/Emissary.cpp | 9 +- .../common/src/EmissaryPrint.cpp | 6 - .../common/src/EmissarySanitizer.cpp | 364 ++++++++++++++++++ offload/plugins-nextgen/common/src/RPC.cpp | 6 +- openmp/device/CMakeLists.txt | 160 ++++---- openmp/device/include/EmissaryIds.h | 7 +- openmp/device/src/EmissaryPrint.cpp | 12 +- openmp/device/src/State.cpp | 2 +- 13 files changed, 501 insertions(+), 90 deletions(-) create mode 100644 offload/plugins-nextgen/common/src/EmissarySanitizer.cpp diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index b1c6ef7bb7e51..210cf4e87b7fd 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1644,6 +1644,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { Res |= SanitizerKind::ShadowCallStack; if (getTriple().isAArch64(64)) Res |= SanitizerKind::MemTag; + if (getTriple().isAMDGPU()) + Res |= SanitizerKind::Address; return Res; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 07df986463690..c05836d7cbed3 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -437,10 +437,12 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } // Make sure openmp finds it libomp.so before all others. - if (Args.hasArg(options::OPT_fopenmp) || - JA.isHostOffloading(Action::OFK_OpenMP)) { - addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); - CmdArgs.push_back(Args.MakeArgString("-L" + D.Dir + "/../lib")); + if (!Args.hasArg(options::OPT_fsanitize_EQ)) { + if (Args.hasArg(options::OPT_fopenmp) || + JA.isHostOffloading(Action::OFK_OpenMP)) { + addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); + CmdArgs.push_back(Args.MakeArgString("-L" + D.Dir + "/../lib")); + } } Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_u}); diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index cc6f7903f494c..1e80859aff534 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -62,6 +62,8 @@ VERS1.0 { __ockl_dm_alloc; __ockl_dm_dealloc; __ockl_devmem_request; + __asan_malloc_impl; + __asan_free_impl; llvm_omp_target_alloc_host; llvm_omp_target_alloc_shared; llvm_omp_target_alloc_device; diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index 198e2add6b2d3..c2fbc4ad7d393 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -12,6 +12,7 @@ if(OFFLOAD_ENABLE_EMISSARY_APIS) src/Emissary.cpp src/EmissaryFortrt.cpp src/EmissaryPrint.cpp + src/EmissarySanitizer.cpp ) endif() @@ -89,6 +90,7 @@ target_include_directories(PluginCommon PUBLIC ${LIBOMPTARGET_LLVM_INCLUDE_DIRS} ${LIBOMPTARGET_BINARY_INCLUDE_DIR} ${LIBOMPTARGET_INCLUDE_DIR} + /home/ampandey/aomp-toolchain/rocm/aomp/include/hsa ) set_target_properties(PluginCommon PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/offload/plugins-nextgen/common/include/Emissary.h b/offload/plugins-nextgen/common/include/Emissary.h index 4480ed9eb5506..2850599c6e539 100644 --- a/offload/plugins-nextgen/common/include/Emissary.h +++ b/offload/plugins-nextgen/common/include/Emissary.h @@ -15,11 +15,11 @@ #define OFFLOAD_EMISSARY_H #include "../../../../openmp/device/include/EmissaryIds.h" - +#include extern "C" { /// Called by rpc after receiving emissary argument buffer -emis_return_t Emissary(char *data); +emis_return_t Emissary(char *data, int NumLanes, int DeviceID); /// Called by Emissary for all Fortrt emissary functions emis_return_t EmissaryFortrt(char *data, emisArgBuf_t *ab); @@ -27,6 +27,11 @@ emis_return_t EmissaryFortrt(char *data, emisArgBuf_t *ab); /// Called by Emissary for all misc print functions emis_return_t EmissaryPrint(char *data, emisArgBuf_t *ab); +/// Called by Emissary for all Sanitizer categories for reporting illegal +/// access. +emis_return_t EmissarySanitizer(emisArgBuf_t *ab, emis_argptr_t *arg[MAXVARGS], + int NumLanes, int DeviceID); + /// Called by Emissary for all MPI emissary API functions __attribute((weak)) emis_return_t EmissaryMPI(char *data, emisArgBuf_t *ab, emis_argptr_t *arg[MAXVARGS]); diff --git a/offload/plugins-nextgen/common/src/Emissary.cpp b/offload/plugins-nextgen/common/src/Emissary.cpp index a3ed958797cd8..efa0251360caa 100644 --- a/offload/plugins-nextgen/common/src/Emissary.cpp +++ b/offload/plugins-nextgen/common/src/Emissary.cpp @@ -18,7 +18,7 @@ #include "Emissary.h" -extern "C" emis_return_t Emissary(char *data) { +extern "C" emis_return_t Emissary(char *data, int NumLanes, int DeviceID) { emisArgBuf_t ab; emisExtractArgBuf(data, &ab); emis_return_t result = 0; @@ -59,6 +59,13 @@ extern "C" emis_return_t Emissary(char *data) { result = EmissaryReserve(data, &ab, args); break; } + case EMIS_ID_SANITIZER: { + if (EmissaryBuildVargs(ab.NumArgs, ab.keyptr, ab.argptr, ab.strptr, + &ab.data_not_used, &args[0]) != _RC_SUCCESS) + return (emis_return_t)0; + result = EmissarySanitizer(&ab, args, NumLanes, DeviceID); + break; + } default: fprintf(stderr, "EMIS_ID:%d fnid:%d not supported\n", ab.emisid, ab.emisfnid); diff --git a/offload/plugins-nextgen/common/src/EmissaryPrint.cpp b/offload/plugins-nextgen/common/src/EmissaryPrint.cpp index 4b1ffacc93f7c..0e80ba624284b 100644 --- a/offload/plugins-nextgen/common/src/EmissaryPrint.cpp +++ b/offload/plugins-nextgen/common/src/EmissaryPrint.cpp @@ -38,12 +38,6 @@ extern "C" emis_return_t EmissaryPrint(char *data, emisArgBuf_t *ab) { rc = emissary_fprintf(&return_value, ab); break; } - case _ockl_asan_report_idx: { - fprintf(stderr, " asan_report not yet implemented\n"); - return_value = 0; - rc = _RC_STATUS_ERROR; - break; - } case _print_INVALID: default: { fprintf(stderr, " INVALID emissary function id (%d) for PRINT API \n", diff --git a/offload/plugins-nextgen/common/src/EmissarySanitizer.cpp b/offload/plugins-nextgen/common/src/EmissarySanitizer.cpp new file mode 100644 index 0000000000000..710ce4e7e924b --- /dev/null +++ b/offload/plugins-nextgen/common/src/EmissarySanitizer.cpp @@ -0,0 +1,364 @@ +//===---- amdgcn_urilocator.cpp - services support for urilocator --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains source code of sanitizer support using Emissary. +// +//===----------------------------------------------------------------------===// + +/* Copyright (c) 2023 Advanced Micro Devices, Inc. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +#include "Emissary.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__has_include) +#if __has_include("hsa.h") +#include "hsa.h" +#include "hsa_ven_amd_loader.h" +#elif __has_include("hsa/hsa.h") +#include "hsa/hsa.h" +#include "hsa/hsa_ven_amd_loader.h" +#endif +#else +#include "hsa/hsa.h" +#include "hsa/hsa_vem_amd_loader.h" +#endif + +class UriLocator { +public: + struct UriInfo { + std::string uriPath; + int64_t loadAddressDiff; + }; + + struct UriRange { + uint64_t startAddr_, endAddr_; + int64_t elfDelta_; + std::string Uri_; + }; + + bool init_ = false; + std::vector rangeTab_; + hsa_ven_amd_loader_1_03_pfn_t fn_table_; + + hsa_status_t createUriRangeTable(); + + ~UriLocator() {} + + UriInfo lookUpUri(uint64_t device_pc); + std::pair decodeUriAndGetFd(UriInfo &uri_path, + int *uri_fd); +}; + +extern "C" void __asan_report_nonself_error( + uint64_t *callstack, uint32_t n_callstack, uint64_t *addr, uint32_t naddr, + uint64_t *entity_ids, uint32_t n_entities, bool is_write, + uint32_t access_size, bool is_abort, const char *name, int64_t vma_adjust, + int fd, uint64_t file_extent_size, uint64_t file_exten_start = 0); + +static bool GetFileHandle(const char *fname, int *fd_ptr, size_t *sz_ptr) { + if ((fd_ptr == nullptr) || (sz_ptr == nullptr)) { + return false; + } + + // open system function call, return false on fail + struct stat stat_buf; + *fd_ptr = open(fname, O_RDONLY); + if (*fd_ptr < 0) { + return false; + } + + // Retrieve stat info and size + if (fstat(*fd_ptr, &stat_buf) != 0) { + close(*fd_ptr); + return false; + } + + *sz_ptr = stat_buf.st_size; + return true; +} + +hsa_status_t UriLocator::createUriRangeTable() { + auto execCb = [](hsa_executable_t exec, void *data) -> hsa_status_t { + int execState = 0; + hsa_status_t status; + status = + hsa_executable_get_info(exec, HSA_EXECUTABLE_INFO_STATE, &execState); + if (status != HSA_STATUS_SUCCESS) + return status; + if (execState != HSA_EXECUTABLE_STATE_FROZEN) + return status; + + auto loadedCodeObjectCb = [](hsa_executable_t exec, + hsa_loaded_code_object_t lcobj, + void *data) -> hsa_status_t { + hsa_status_t result; + uint64_t loadBAddr = 0, loadSize = 0; + uint32_t uriLen = 0; + int64_t delta = 0; + uint64_t *argsCb = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnTab = + reinterpret_cast(argsCb[0]); + std::vector *rangeTab = + reinterpret_cast *>(argsCb[1]); + + if (!fnTab->hsa_ven_amd_loader_loaded_code_object_get_info) + return HSA_STATUS_ERROR; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info( + lcobj, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, + (void *)&loadBAddr); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info( + lcobj, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, + (void *)&loadSize); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info( + lcobj, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, + (void *)&uriLen); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info( + lcobj, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, + (void *)&delta); + if (result != HSA_STATUS_SUCCESS) + return result; + + char *uri = new char[uriLen + 1]; + uri[uriLen] = '\0'; + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info( + lcobj, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, (void *)uri); + if (result != HSA_STATUS_SUCCESS) + return result; + + rangeTab->push_back(UriRange{loadBAddr, loadBAddr + loadSize - 1, delta, + std::string{uri, uriLen + 1}}); + delete[] uri; + return HSA_STATUS_SUCCESS; + }; + + uint64_t *args = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnExtTab = + reinterpret_cast(args[0]); + return fnExtTab->hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + exec, loadedCodeObjectCb, data); + }; + + if (!fn_table_.hsa_ven_amd_loader_iterate_executables) + return HSA_STATUS_ERROR; + + uint64_t callbackArgs[2] = {(uint64_t)&fn_table_, (uint64_t)&rangeTab_}; + return fn_table_.hsa_ven_amd_loader_iterate_executables(execCb, + (void *)callbackArgs); +} + +// Encoding of uniform-resource-identifier(URI) is detailed in +// https://llvm.org/docs/AMDGPUUsage.html#loaded-code-object-path-uniform-resource-identifier-uri +// The below code currently extracts the uri of loaded code object using +// file-uri. +std::pair UriLocator::decodeUriAndGetFd(UriInfo &uri, + int *uri_fd) { + + std::ostringstream ss; + char cur; + uint64_t offset = 0, size = 0; + if (uri.uriPath.size() == 0) + return {0, 0}; + auto pos = uri.uriPath.find("//"); + if (pos == std::string::npos) { + uri.uriPath = ""; + return {0, 0}; + } + auto rspos = uri.uriPath.find('#'); + if (rspos != std::string::npos) { + // parse range specifier + std::string offprefix = "offset=", sizeprefix = "size="; + auto sbeg = uri.uriPath.find('&', rspos); + auto offbeg = rspos + offprefix.size() + 1; + std::string offstr = uri.uriPath.substr(offbeg, sbeg - offbeg); + auto sizebeg = sbeg + sizeprefix.size() + 1; + std::string sizestr = + uri.uriPath.substr(sizebeg, uri.uriPath.size() - sizebeg); + offset = std::stoull(offstr, nullptr, 0); + size = std::stoull(sizestr, nullptr, 0); + rspos -= 1; + } else { + rspos = uri.uriPath.size() - 1; + } + if (uri.uriPath.substr(0, pos) == "file:") { + pos += 2; + // decode filepath + for (auto i = pos; i <= rspos;) { + cur = uri.uriPath[i]; + if (isalnum(cur) || cur == '/' || cur == '-' || cur == '_' || + cur == '.' || cur == '~') { + ss << cur; + i++; + } else { + // characters prefix with '%' char + char tbits = uri.uriPath[i + 1], lbits = uri.uriPath[i + 2]; + uint8_t t = (tbits < 58) ? (tbits - 48) : ((tbits - 65) + 10); + uint8_t l = (lbits < 58) ? (lbits - 48) : ((lbits - 65) + 10); + ss << (char)(((0b00000000 | t) << 4) | l); + i += 3; + } + } + uri.uriPath = ss.str(); + size_t fd_size; + GetFileHandle(uri.uriPath.c_str(), uri_fd, &fd_size); + // As per URI locator syntax, range_specifier is optional + // if range_specifier is absent return total size of the file + // and set offset to begin at 0. + if (size == 0) + size = fd_size; + } + return {offset, size}; +} + +UriLocator::UriInfo UriLocator::lookUpUri(uint64_t device_pc) { + UriInfo errorstate{"", 0}; + + if (!init_) { + + hsa_status_t result; + result = hsa_system_get_major_extension_table( + HSA_EXTENSION_AMD_LOADER, 1, sizeof(fn_table_), &fn_table_); + if (result != HSA_STATUS_SUCCESS) + return errorstate; + result = createUriRangeTable(); + if (result != HSA_STATUS_SUCCESS) { + rangeTab_.clear(); + return errorstate; + } + init_ = true; + } + + for (auto &seg : rangeTab_) + if (seg.startAddr_ <= device_pc && device_pc <= seg.endAddr_) + return UriInfo{seg.Uri_.c_str(), seg.elfDelta_}; + + return errorstate; +} + +static service_rc emissary_asan(emisArgBuf_t *ab, emis_argptr_t *args[MAXVARGS], + int NumLanes, int DeviceID) { + + for (int i = 0; i < 8; i++) { + printf("\n%llx\n", args[i]); + } + + std::cout << NumLanes << "\n"; + + uint64_t Addr = (uint64_t)args[0]; + uint64_t PC = (uint64_t)args[1]; + + uint64_t Wgidx = (uint64_t)args[2]; + uint64_t Wgidy = (uint64_t)args[3]; + uint64_t Wgidz = (uint64_t)args[4]; + + uint64_t n_activeLanes = NumLanes; + + uint64_t WaveID = (uint64_t)args[5]; + + uint64_t AccessInfo = (uint64_t)args[6]; + uint64_t AccessSize = (uint64_t)args[7]; + + bool IsWrite = false, IsAbort = true; + if (AccessInfo & 0xFFFFFFFF00000000) + IsAbort = false; + if (AccessInfo & 1) + IsWrite = true; + + uint64_t callstack[1]; + callstack[0] = PC; + uint32_t n_callstack = 1; + + uint64_t entity_id[68]; + entity_id[0] = DeviceID; // Device ID + entity_id[1] = Wgidx; + entity_id[2] = Wgidy; + entity_id[3] = Wgidz; + entity_id[4] = WaveID; + + uint64_t device_failing_addresses[64]; + device_failing_addresses[0] = Addr; + + bool first_workitem = false; + for (int i = 0; i < NumLanes; i++) { + } + + std::string fileuri; + uint64_t size = 0, offset = 0; + int64_t loadAddrAdjust = 0; + int uri_fd = -1; + UriLocator *uri_locator = new UriLocator(); + + if (uri_locator) { + UriLocator::UriInfo uri_info = uri_locator->lookUpUri(callstack[0]); + std::tie(offset, size) = uri_locator->decodeUriAndGetFd(uri_info, &uri_fd); + loadAddrAdjust = uri_info.loadAddressDiff; + } + + __asan_report_nonself_error(callstack, n_callstack, device_failing_addresses, + n_activeLanes, entity_id, n_activeLanes + 4, + IsWrite, AccessSize, IsAbort, "amdgpu", + loadAddrAdjust, uri_fd, size, offset); + return _RC_SUCCESS; +} + +extern "C" emis_return_t EmissarySanitizer(emisArgBuf_t *ab, + emis_argptr_t *args[MAXVARGS], + int NumLanes, int DeviceID) { + emis_return_t return_value; + service_rc rc; + + switch (ab->emisfnid) { + case _asan_report_idx: { + rc = emissary_asan(ab, args, NumLanes, DeviceID); + break; + } + case _unsupported_SANITIZER: + default: { + fprintf(stderr, "Unsupported Sanitizer ID: (%d). \n", ab->emisfnid); + return_value = 0; + rc = _RC_STATUS_ERROR; + } + }; + if (rc != _RC_SUCCESS) + fprintf(stderr, "HOST failure in EmissarySanitizer. \n"); + return (emis_return_t)return_value; +} diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp index 9462aa2828dbf..7750320c9c1c6 100644 --- a/offload/plugins-nextgen/common/src/RPC.cpp +++ b/offload/plugins-nextgen/common/src/RPC.cpp @@ -111,7 +111,11 @@ rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device, void *Args[NumLanes] = {nullptr}; Port.recv([&](rpc::Buffer *buffer, uint32_t ID) { Args[ID] = reinterpret_cast(buffer->data[0]); - Results[ID] = Emissary((char *)Args[ID]); + llvm::outs() << "DeviceID: " << Device.getDeviceId() << "\n"; + llvm::outs() << "LaneID: " << ID << "\n"; + llvm::outs() << "Args[ID]:" << Args[ID] << "\n"; + llvm::outs() << "NumLanes:" << NumLanes << "\n"; + Results[ID] = Emissary((char *)Args[ID], NumLanes, Device.getDeviceId()); }); Port.send([&](rpc::Buffer *Buffer, uint32_t ID) { Device.moveBusyToFree_ArgBuf(Args[ID]); diff --git a/openmp/device/CMakeLists.txt b/openmp/device/CMakeLists.txt index b88c8e6074a2e..e945954e9eb2c 100644 --- a/openmp/device/CMakeLists.txt +++ b/openmp/device/CMakeLists.txt @@ -1,52 +1,56 @@ # Ensure the compiler is a valid clang when building the GPU target. set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") -if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND - ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}")) - message(FATAL_ERROR "Cannot build GPU device runtime. CMake compiler " - "'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' " - " is not 'Clang ${req_ver}'.") +if(LLVM_VERSION_MAJOR + AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}")) + message( + FATAL_ERROR + "Cannot build GPU device runtime. CMake compiler " + "'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' " + " is not 'Clang ${req_ver}'.") endif() option(OFFLOAD_ENABLE_EMISSARY_APIS "Enable build of GPU Emissary APIs" ON) if(OFFLOAD_ENABLE_EMISSARY_APIS) add_definitions(-DOFFLOAD_ENABLE_EMISSARY_APIS) - set(emissary_sources - src/EmissaryFortrt.cpp - src/EmissaryPrint.cpp - ) + set(emissary_sources src/EmissaryFortrt.cpp src/EmissaryPrint.cpp) endif() set(src_files - ${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/ExtraMapping.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Xteamr.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Memory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/Xteams.cpp - ${emissary_sources} + ${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/ExtraMapping.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Xteamr.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Memory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/Xteams.cpp + ${emissary_sources}) -) if(NOT LLVM_TARGETS_TO_BUILD OR "AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) - find_package(AMDDeviceLibs REQUIRED CONFIG - HINTS ${CMAKE_BINARY_DIR}/../../tools/rocm-device-libs - ${CMAKE_BINARY_DIR}/../rocm-device-libs-prefix/src/rocm-device-libs-build - ${CMAKE_INSTALL_PREFIX} - ) + find_package( + AMDDeviceLibs + REQUIRED + CONFIG + HINTS + ${CMAKE_BINARY_DIR}/../../tools/rocm-device-libs + ${CMAKE_BINARY_DIR}/../rocm-device-libs-prefix/src/rocm-device-libs-build + ${CMAKE_INSTALL_PREFIX}) + get_target_property(_ocml_bc ocml IMPORTED_LOCATION) get_target_property(_ockl_bc ockl IMPORTED_LOCATION) + get_target_property(_asanrtl_bc asanrtl IMPORTED_LOCATION) if(NOT _ockl_bc) message(FATAL_ERROR "Could not find ockl.bc") @@ -54,8 +58,21 @@ if(NOT LLVM_TARGETS_TO_BUILD OR "AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) if(NOT _ocml_bc) message(FATAL_ERROR "Could not find ocml.bc") endif() - list(APPEND compile_flags "SHELL: -Xclang -mlink-builtin-bitcode -Xclang ${_ockl_bc}") - list(APPEND compile_flags "SHELL: -Xclang -mlink-builtin-bitcode -Xclang ${_ocml_bc}") + if(SANITIZER_AMDGPU) + if(NOT _asanrtl_bc) + message(FATAL_ERROR "Could not find asanrtl.bc") + endif() + list(APPEND compile_flags "SHELL: -DSANITIZER_AMDGPU=1") + list(APPEND compile_flags + "SHELL: -Xclang -mlink-bitcode-file -Xclang ${_asanrtl_bc}") + list(APPEND compile_flags + "SHELL: -Xclang -mlink-bitcode-file -Xclang ${_ockl_bc}") + else() + list(APPEND compile_flags + "SHELL: -Xclang -mlink-builtin-bitcode -Xclang ${_ockl_bc}") + endif() + list(APPEND compile_flags + "SHELL: -Xclang -mlink-builtin-bitcode -Xclang ${_ocml_bc}") endif() list(APPEND compile_options -flto) @@ -73,30 +90,31 @@ if(LLVM_DEFAULT_TARGET_TRIPLE) endif() # We disable the slp vectorizer during the runtime optimization to avoid -# vectorized accesses to the shared state. Generally, those are "good" but -# the optimizer pipeline (esp. Attributor) does not fully support vectorized +# vectorized accesses to the shared state. Generally, those are "good" but the +# optimizer pipeline (esp. Attributor) does not fully support vectorized # instructions yet and we end up missing out on way more important constant -# propagation. That said, we will run the vectorizer again after the runtime -# has been linked into the user program. +# propagation. That said, we will run the vectorizer again after the runtime has +# been linked into the user program. list(APPEND compile_flags "SHELL: -mllvm -vectorize-slp=false") -if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR - "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn") +if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" + OR "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn") set(target_name "amdgpu") list(APPEND compile_flags "SHELL:-Xclang -mcode-object-version=none") -elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR - "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx") +elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" + OR "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx") set(target_name "nvptx") list(APPEND compile_flags --cuda-feature=+ptx63) endif() # Trick to combine these into a bitcode file via the linker's LTO pass. add_executable(libompdevice ${src_files}) -set_target_properties(libompdevice PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - LINKER_LANGUAGE CXX - BUILD_RPATH "" - INSTALL_RPATH "" - RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc) +set_target_properties( + libompdevice + PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + LINKER_LANGUAGE CXX + BUILD_RPATH "" + INSTALL_RPATH "" + RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc) # If the user built with the GPU C library enabled we will use that instead. if(TARGET libc) @@ -104,31 +122,37 @@ if(TARGET libc) endif() target_compile_definitions(libompdevice PRIVATE SHARED_SCRATCHPAD_SIZE=512) -target_include_directories(libompdevice PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../libc - ${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include) +target_include_directories( + libompdevice + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../libc + ${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include) target_compile_options(libompdevice PRIVATE ${compile_options} ${compile_flags}) -target_link_options(libompdevice PRIVATE - "-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm") +target_link_options(libompdevice PRIVATE "-flto" "-r" "-nostdlib" + "-Wl,--lto-emit-llvm") if(LLVM_DEFAULT_TARGET_TRIPLE) - target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}") + target_link_options(libompdevice PRIVATE + "--target=${LLVM_DEFAULT_TARGET_TRIPLE}") endif() -install(TARGETS libompdevice - PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ - DESTINATION ${OPENMP_INSTALL_LIBDIR}) +install( + TARGETS libompdevice + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ + DESTINATION ${OPENMP_INSTALL_LIBDIR}) add_library(ompdevice.all_objs OBJECT IMPORTED) -set_property(TARGET ompdevice.all_objs APPEND PROPERTY IMPORTED_OBJECTS - ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-${target_name}.bc) +set_property( + TARGET ompdevice.all_objs + APPEND + PROPERTY IMPORTED_OBJECTS + ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-${target_name}.bc) # Archive all the object files generated above into a static library add_library(ompdevice STATIC) add_dependencies(ompdevice libompdevice) -set_target_properties(ompdevice PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${OPENMP_INSTALL_LIBDIR}" - ARCHIVE_OUTPUT_NAME ompdevice - LINKER_LANGUAGE CXX -) +set_target_properties( + ompdevice + PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${OPENMP_INSTALL_LIBDIR}" + ARCHIVE_OUTPUT_NAME ompdevice + LINKER_LANGUAGE CXX) target_link_libraries(ompdevice PRIVATE ompdevice.all_objs) install(TARGETS ompdevice ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") diff --git a/openmp/device/include/EmissaryIds.h b/openmp/device/include/EmissaryIds.h index dd820c9920395..4bc30f783f72f 100644 --- a/openmp/device/include/EmissaryIds.h +++ b/openmp/device/include/EmissaryIds.h @@ -21,13 +21,13 @@ typedef enum { EMIS_ID_MPI, EMIS_ID_HDF5, EMIS_ID_RESERVE, + EMIS_ID_SANITIZER, } offload_emis_id_t; typedef enum { _print_INVALID, _printf_idx, _fprintf_idx, - _ockl_asan_report_idx, } offload_emis_print_t; /// The vargs function used by emissary API device stubs @@ -56,6 +56,11 @@ typedef enum { _FortranAStopStatement_idx, } offload_emis_fortrt_idx; +typedef enum { + _unsupported_SANITIZER, + _asan_report_idx, +} offload_emis_sanitizer_idx; + /// This structure is created by emisExtractArgBuf to make it easier /// to get values from the data buffer passed by rpc. typedef struct { diff --git a/openmp/device/src/EmissaryPrint.cpp b/openmp/device/src/EmissaryPrint.cpp index 35c160d1ba4ce..bd452ab536d40 100644 --- a/openmp/device/src/EmissaryPrint.cpp +++ b/openmp/device/src/EmissaryPrint.cpp @@ -48,20 +48,20 @@ __attribute__((flatten, always_inline)) void f90printd_(char *s, double *d) { // override the weak symbol for __ockl_devmem_request and // __ockl_sanitizer_report in rocm device lib ockl.bc because ockl uses // hostcall but OpenMP uses rpc. -// +#if SANITIZER_AMDGPU +__attribute__((noinline)) uint64_t __asan_malloc_impl(uint64_t bufsz, + uint64_t pc); +__attribute__((noinline)) void __asan_free_impl(uint64_t ptr, uint64_t pc); + __attribute__((noinline)) void __ockl_sanitizer_report(uint64_t addr, uint64_t pc, uint64_t wgidx, uint64_t wgidy, uint64_t wgidz, uint64_t wave_id, uint64_t is_read, uint64_t access_size) { unsigned long long rc = - _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_PRINT, _ockl_asan_report_idx), addr, + _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_SANITIZER, _asan_report_idx), addr, pc, wgidx, wgidy, wgidz, wave_id, is_read, access_size); return; } -#if SANITIZER_AMDGPU -__attribute__((noinline)) uint64_t __asan_malloc_impl(uint64_t bufsz, - uint64_t pc); -__attribute__((noinline)) void __asan_free_impl(uint64_t ptr, uint64_t pc); #endif __attribute__((flatten, always_inline)) char *global_allocate(uint32_t bufsz) { diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp index 20fdf3c0be753..a23bb67104e46 100644 --- a/openmp/device/src/State.cpp +++ b/openmp/device/src/State.cpp @@ -108,7 +108,7 @@ __attribute__((noinline)) void internal_free(void *Ptr) { extern "C" { #ifdef __AMDGCN__ -#ifdef USE_BUMP_ALLOCATOR +#if defined(USE_BUMP_ALLOCATOR) && !defined(SANITIZER_AMDGPU) [[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); } [[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); } #else