[OpenMP][NVPTX] Replaced CUDA builtin vars with LLVM intrinsics
authorShilei Tian <tianshilei1992@gmail.com>
Wed, 20 Jan 2021 17:01:51 +0000 (12:01 -0500)
committerShilei Tian <tianshilei1992@gmail.com>
Wed, 20 Jan 2021 17:02:06 +0000 (12:02 -0500)
Replaced CUDA builtin vars with LLVM intrinsics such that we don't need
definitions of those intrinsics.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95013

openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu

index 8052b92..b5ef549 100644 (file)
@@ -115,10 +115,12 @@ DEVICE void __kmpc_impl_threadfence_block() { __threadfence_block(); }
 DEVICE void __kmpc_impl_threadfence_system() { __threadfence_system(); }
 
 // Calls to the NVPTX layer (assuming 1D layout)
-DEVICE int GetThreadIdInBlock() { return threadIdx.x; }
-DEVICE int GetBlockIdInKernel() { return blockIdx.x; }
-DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
-DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+DEVICE int GetThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
+DEVICE int GetBlockIdInKernel() { return __nvvm_read_ptx_sreg_ctaid_x(); }
+DEVICE int GetNumberOfBlocksInKernel() {
+  return __nvvm_read_ptx_sreg_nctaid_x();
+}
+DEVICE int GetNumberOfThreadsInBlock() { return __nvvm_read_ptx_sreg_ntid_x(); }
 DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
 DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }