9 #if defined(NANOVDB_USE_CUDA)    10 #include <cuda_runtime_api.h>    13 #if defined(NANOVDB_USE_TBB)    14 #include <tbb/parallel_for.h>    15 #include <tbb/blocked_range.h>    24 template<std::size_t...>
    29 template<std::size_t N, std::size_t... Is>
    34 template<std::size_t... Is>
    40 #if defined(__CUDACC__)    42 static inline bool checkCUDA(cudaError_t result, 
const char* file, 
const int line)
    44     if (result != cudaSuccess) {
    45         std::cerr << 
"CUDA Runtime API error " << result << 
" in file " << file << 
", line " << line << 
" : " << cudaGetErrorString(result) << 
".\n";
    51 #define NANOVDB_CUDA_SAFE_CALL(x) checkCUDA(x, __FILE__, __LINE__)    53 static inline void checkErrorCUDA(cudaError_t result, 
const char* file, 
const int line)
    55     if (result != cudaSuccess) {
    56         std::cerr << 
"CUDA Runtime API error " << result << 
" in file " << file << 
", line " << line << 
" : " << cudaGetErrorString(result) << 
".\n";
    61 #define NANOVDB_CUDA_CHECK_ERROR(result, file, line) checkErrorCUDA(result, file, line)    65 template<
typename Fn, 
typename... Args>
    69     ApplyFunc(
int count, 
int blockSize, 
const Fn& fn, Args... args)
    71         , mBlockSize(blockSize)
    77     template<std::size_t... Is>
    80         mFunc(start, end, std::get<Is>(mArgs)...);
    85         int start = i * mBlockSize;
    86         int end = i * mBlockSize + mBlockSize;
    92 #if defined(NANOVDB_USE_TBB)    93     void operator()(
const tbb::blocked_range<int>& r)
 const    95         int start = r.begin();
   107     std::tuple<Args...> mArgs;
   110 #if defined(__CUDACC__)   112 template<
int WorkPerThread, 
typename FnT, 
typename... Args>
   113 __global__ void parallelForKernel(
int numItems, FnT f, Args... args)
   115     for (
int j=0;j<WorkPerThread;++j)
   117         int i = threadIdx.x + blockIdx.x * blockDim.x + j * blockDim.x * gridDim.x;
   119             f(i, i + 1, args...);
   127 #if defined(__CUDACC__)   129         NANOVDB_CUDA_CHECK_ERROR(cudaDeviceSynchronize(), file, line);
   134 inline void computeFill(
bool useCuda, 
void* data, uint8_t value, 
size_t size)
   137 #if defined(__CUDACC__)   138         cudaMemset(data, value, size);
   141         std::memset(data, value, size);
   145 template<
typename FunctorT, 
typename... Args>
   146 inline void computeForEach(
bool useCuda, 
int numItems, 
int blockSize, 
const char* file, 
int line, 
const FunctorT& 
op, Args... args)
   152 #if defined(__CUDACC__)   153         static const int WorkPerThread = 1;
   154         int blockCount = ((numItems/WorkPerThread) + (blockSize - 1)) / blockSize;
   155         parallelForKernel<WorkPerThread, FunctorT, Args...><<<blockCount, blockSize, 0, 0>>>(numItems, 
op, args...);
   156         NANOVDB_CUDA_CHECK_ERROR(cudaGetLastError(), file, line);
   159 #if defined(NANOVDB_USE_TBB)   160         tbb::blocked_range<int> range(0, numItems, blockSize);
   163         for (
int i = 0; i < numItems; ++i)
   164             op(i, i + 1, args...);
   172 #if defined(__CUDACC__)   173         cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
   176         std::memcpy(dst, src, size);
   180 inline void computeCopy(
bool useCuda, 
void* dst, 
const void* src, 
size_t size)
   183 #if defined(__CUDACC__)   184         cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice);
   187         std::memcpy(dst, src, size);
 Definition: ComputePrimitives.h:23
void computeDownload(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:169
Definition: ComputePrimitives.h:30
void computeForEach(bool useCuda, int numItems, int blockSize, const char *file, int line, const FunctorT &op, Args...args)
Definition: ComputePrimitives.h:146
void computeFill(bool useCuda, void *data, uint8_t value, size_t size)
Definition: ComputePrimitives.h:134
void computeSync(bool useCuda, const char *file, int line)
Definition: ComputePrimitives.h:125
Definition: ComputePrimitives.h:25
void call(int start, int end, cxx14::index_sequence< Is... >) const 
Definition: ComputePrimitives.h:78
void operator()(int i) const 
Definition: ComputePrimitives.h:83
ApplyFunc(int count, int blockSize, const Fn &fn, Args...args)
Definition: ComputePrimitives.h:69
Definition: ComputePrimitives.h:66
#define __global__
Definition: Util.h:76
void computeCopy(bool useCuda, void *dst, const void *src, size_t size)
Definition: ComputePrimitives.h:180