9#if defined(NANOVDB_USE_CUDA) 
   10#include <cuda_runtime_api.h> 
   13#if defined(NANOVDB_USE_TBB) 
   14#include <tbb/parallel_for.h> 
   15#include <tbb/blocked_range.h> 
   24template<std::size_t...>
 
   29template<std::size_t N, std::size_t... Is>
 
   34template<std::size_t... Is>
 
 
   40#if defined(__CUDACC__) 
   42static inline bool checkCUDA(cudaError_t result, 
const char* file, 
const int line)
 
   44    if (result != cudaSuccess) {
 
   45        std::cerr << 
"CUDA Runtime API error " << result << 
" in file " << file << 
", line " << line << 
" : " << cudaGetErrorString(result) << 
".\n";
 
   51#define NANOVDB_CUDA_SAFE_CALL(x) checkCUDA(x, __FILE__, __LINE__) 
   53static inline void checkErrorCUDA(cudaError_t result, 
const char* file, 
const int line)
 
   55    if (result != cudaSuccess) {
 
   56        std::cerr << 
"CUDA Runtime API error " << result << 
" in file " << file << 
", line " << line << 
" : " << cudaGetErrorString(result) << 
".\n";
 
   61#define NANOVDB_CUDA_CHECK_ERROR(result, file, line) checkErrorCUDA(result, file, line) 
   65template<
typename Fn, 
typename... Args>
 
   69    ApplyFunc(
int count, 
int blockSize, 
const Fn& fn, Args... args)
 
   71        , mBlockSize(blockSize)
 
 
   77    template<std::size_t... Is>
 
   80        mFunc(start, end, std::get<Is>(mArgs)...);
 
 
   85        int start = i * mBlockSize;
 
   86        int end = i * mBlockSize + mBlockSize;
 
 
   92#if defined(NANOVDB_USE_TBB) 
   93    void operator()(
const tbb::blocked_range<int>& r)
 const 
   95        int start = r.begin();
 
  107    std::tuple<Args...> mArgs;
 
 
  110#if defined(__CUDACC__) 
  112template<
int WorkPerThread, 
typename FnT, 
typename... Args>
 
  113__global__ void parallelForKernel(
int numItems, FnT f, Args... args)
 
  115    for (
int j=0;j<WorkPerThread;++j)
 
  117        int i = threadIdx.x + blockIdx.x * blockDim.x + j * blockDim.x * gridDim.x;
 
  119            f(i, i + 1, args...);
 
  127#if defined(__CUDACC__) 
  129        NANOVDB_CUDA_CHECK_ERROR(cudaDeviceSynchronize(), file, line);
 
 
  134inline void computeFill(
bool useCuda, 
void* data, uint8_t value, 
size_t size)
 
  137#if defined(__CUDACC__) 
  138        cudaMemset(data, value, size);
 
  141        std::memset(data, value, size);
 
 
  145template<
typename FunctorT, 
typename... Args>
 
  146inline void computeForEach(
bool useCuda, 
int numItems, 
int blockSize, 
const char* file, 
int line, 
const FunctorT& op, Args... args)
 
  152#if defined(__CUDACC__) 
  153        static const int WorkPerThread = 1;
 
  154        int blockCount = ((numItems/WorkPerThread) + (blockSize - 1)) / blockSize;
 
  155        parallelForKernel<WorkPerThread, FunctorT, Args...><<<blockCount, blockSize, 0, 0>>>(numItems, op, args...);
 
  156        NANOVDB_CUDA_CHECK_ERROR(cudaGetLastError(), file, line);
 
  159#if defined(NANOVDB_USE_TBB) 
  160        tbb::blocked_range<int> range(0, numItems, blockSize);
 
  163        for (
int i = 0; i < numItems; ++i)
 
  164            op(i, i + 1, args...);
 
 
  172#if defined(__CUDACC__) 
  173        cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
 
  176        std::memcpy(dst, src, size);
 
 
  180inline void computeCopy(
bool useCuda, 
void* dst, 
const void* src, 
size_t size)
 
  183#if defined(__CUDACC__) 
  184        cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice);
 
  187        std::memcpy(dst, src, size);
 
 
void computeForEach(bool useCuda, int numItems, int blockSize, const char *file, int line, const FunctorT &op, Args... args)
Definition ComputePrimitives.h:146
void computeDownload(bool useCuda, void *dst, const void *src, size_t size)
Definition ComputePrimitives.h:169
void computeSync(bool useCuda, const char *file, int line)
Definition ComputePrimitives.h:125
void computeFill(bool useCuda, void *data, uint8_t value, size_t size)
Definition ComputePrimitives.h:134
void computeCopy(bool useCuda, void *dst, const void *src, size_t size)
Definition ComputePrimitives.h:180
Definition ComputePrimitives.h:67
void call(int start, int end, cxx14::index_sequence< Is... >) const
Definition ComputePrimitives.h:78
void operator()(int i) const
Definition ComputePrimitives.h:83
ApplyFunc(int count, int blockSize, const Fn &fn, Args... args)
Definition ComputePrimitives.h:69
Definition ComputePrimitives.h:23
#define __global__
Definition Util.h:76
Definition ComputePrimitives.h:26
Definition ComputePrimitives.h:31