OpenVDB  9.1.1
CudaDeviceBuffer.h
Go to the documentation of this file.
1 // Copyright Contributors to the OpenVDB Project
2 // SPDX-License-Identifier: MPL-2.0
3 
4 /*!
5  \file CudaDeviceBuffer.h
6 
7  \author Ken Museth
8 
9  \date January 8, 2020
10 
11  \brief Implements a simple CUDA allocator!
12 
13  CudaDeviceBuffer - a class for simple cuda buffer allocation and management
14 */
15 
16 #ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
17 #define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
18 
19 #include "HostBuffer.h"// for BufferTraits
20 
21 #include <cuda_runtime_api.h> // for cudaMalloc/cudaMallocManaged/cudaFree
22 
23 // Convenience function for checking CUDA runtime API results
24 // can be wrapped around any runtime API call. No-op in release builds.
25 #define cudaCheck(ans) \
26  { \
27  gpuAssert((ans), __FILE__, __LINE__); \
28  }
29 
30 #define checkPtr(ptr, msg) \
31  { \
32  ptrAssert((ptr), (msg), __FILE__, __LINE__); \
33  }
34 
35 namespace nanovdb {
36 
37 // ----------------------------> CudaDeviceBuffer <--------------------------------------
38 
39 /// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC.
40 /// Obviously this class is making explicit used of CUDA so replace it with your own memory
41 /// allocator if you are not using CUDA.
42 /// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device
43 /// it is significantly slower then cached (un-pinned) memory on the host.
45 {
46  uint64_t mSize; // total number of bytes for the NanoVDB grid.
47  uint8_t *mCpuData, *mGpuData; // raw buffer for the NanoVDB grid.
48 
49  static inline bool gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
50  {
51 #if defined(DEBUG) || defined(_DEBUG)
52  if (code != cudaSuccess) {
53  fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);
54  if (abort)
55  exit(code);
56  return false;
57  }
58 #endif
59  return true;
60  }
61 
62 #if defined(DEBUG) || defined(_DEBUG)
63  static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true)
64  {
65  if (ptr == nullptr) {
66  fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line);
67  if (abort)
68  exit(1);
69  }
70  }
71 #else
72  static inline void ptrAssert(void*, const char*, const char*, int, bool = true)
73  {
74  }
75 #endif
76 
77 public:
78  CudaDeviceBuffer(uint64_t size = 0)
79  : mSize(0)
80  , mCpuData(nullptr)
81  , mGpuData(nullptr)
82  {
83  this->init(size);
84  }
85  /// @brief Disallow copy-construction
86  CudaDeviceBuffer(const CudaDeviceBuffer&) = delete;
87  /// @brief Move copy-constructor
89  : mSize(other.mSize)
90  , mCpuData(other.mCpuData)
91  , mGpuData(other.mGpuData)
92  {
93  other.mSize = 0;
94  other.mCpuData = nullptr;
95  other.mGpuData = nullptr;
96  }
97  /// @brief Disallow copy assignment operation
99  /// @brief Move copy assignment operation
101  {
102  clear();
103  mSize = other.mSize;
104  mCpuData = other.mCpuData;
105  mGpuData = other.mGpuData;
106  other.mSize = 0;
107  other.mCpuData = nullptr;
108  other.mGpuData = nullptr;
109  return *this;
110  }
111  /// @brief Destructor frees memory on both the host and device
112  ~CudaDeviceBuffer() { this->clear(); };
113 
114  void init(uint64_t size);
115 
116  // @brief Retuns a pointer to the raw memory buffer managed by this allocator.
117  ///
118  /// @warning Note that the pointer can be NULL is the allocator was not initialized!
119  uint8_t* data() const { return mCpuData; }
120  uint8_t* deviceData() const { return mGpuData; }
121 
122  /// @brief Copy grid from the CPU/host to the GPU/device. If @c sync is false the memory copy is asynchronous!
123  ///
124  /// @note This will allocate memory on the GPU/device if it is not already allocated
125  void deviceUpload(void* stream = 0, bool sync = true) const;
126 
127  /// @brief Copy grid from the GPU/device to the CPU/host. If @c sync is false the memory copy is asynchronous!
128  void deviceDownload(void* stream = 0, bool sync = true) const;
129 
130  /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator.
131  uint64_t size() const { return mSize; }
132 
133  /// @brief Returns true if this allocator is empty, i.e. has no allocated memory
134  bool empty() const { return mSize == 0; }
135 
136  /// @brief De-allocate all memory managed by this allocator and set all pointer to NULL
137  void clear();
138 
139  static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* context = nullptr);
140 
141 }; // CudaDeviceBuffer class
142 
143 template<>
145 {
146  static const bool hasDeviceDual = true;
147 };
148 
149 // --------------------------> Implementations below <------------------------------------
150 
152 {
153  return CudaDeviceBuffer(size);
154 }
155 
156 inline void CudaDeviceBuffer::init(uint64_t size)
157 {
158  if (size == mSize)
159  return;
160  if (mSize > 0)
161  this->clear();
162  if (size == 0)
163  return;
164  mSize = size;
165  cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!)
166  checkPtr(mCpuData, "failed to allocate host data");
167 } // CudaDeviceBuffer::init
168 
169 inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const
170 {
171  checkPtr(mCpuData, "uninitialized cpu data");
172  if (mGpuData == nullptr)
173  cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device
174  checkPtr(mGpuData, "uninitialized gpu data");
175  cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream)));
176  if (sync)
177  cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
178 } // CudaDeviceBuffer::gpuUpload
179 
180 inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const
181 {
182  checkPtr(mCpuData, "uninitialized cpu data");
183  checkPtr(mGpuData, "uninitialized gpu data");
184  cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream)));
185  if (sync)
186  cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
187 } // CudaDeviceBuffer::gpuDownload
188 
190 {
191  if (mGpuData)
192  cudaCheck(cudaFree(mGpuData));
193  if (mCpuData)
194  cudaCheck(cudaFreeHost(mCpuData));
195  mCpuData = mGpuData = nullptr;
196  mSize = 0;
197 } // CudaDeviceBuffer::clear
198 
199 } // namespace nanovdb
200 
201 #endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
uint8_t * deviceData() const
Definition: CudaDeviceBuffer.h:120
uint64_t size() const
Returns the size in bytes of the raw memory buffer managed by this allocator.
Definition: CudaDeviceBuffer.h:131
Definition: HostBuffer.h:94
HostBuffer - a buffer that contains a shared or private bump pool to either externally or internally ...
void init(uint64_t size)
Definition: CudaDeviceBuffer.h:156
static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer *context=nullptr)
Definition: CudaDeviceBuffer.h:151
void deviceDownload(void *stream=0, bool sync=true) const
Copy grid from the GPU/device to the CPU/host. If sync is false the memory copy is asynchronous! ...
Definition: CudaDeviceBuffer.h:180
#define checkPtr(ptr, msg)
Definition: CudaDeviceBuffer.h:30
Definition: NanoVDB.h:184
CudaDeviceBuffer & operator=(const CudaDeviceBuffer &)=delete
Disallow copy assignment operation.
CudaDeviceBuffer(uint64_t size=0)
Definition: CudaDeviceBuffer.h:78
CudaDeviceBuffer(CudaDeviceBuffer &&other) noexcept
Move copy-constructor.
Definition: CudaDeviceBuffer.h:88
~CudaDeviceBuffer()
Destructor frees memory on both the host and device.
Definition: CudaDeviceBuffer.h:112
Simple memory buffer using un-managed pinned host memory when compiled with NVCC. Obviously this clas...
Definition: CudaDeviceBuffer.h:44
bool empty() const
Returns true if this allocator is empty, i.e. has no allocated memory.
Definition: CudaDeviceBuffer.h:134
uint8_t * data() const
Definition: CudaDeviceBuffer.h:119
CudaDeviceBuffer & operator=(CudaDeviceBuffer &&other) noexcept
Move copy assignment operation.
Definition: CudaDeviceBuffer.h:100
#define cudaCheck(ans)
Definition: CudaDeviceBuffer.h:25
void deviceUpload(void *stream=0, bool sync=true) const
Copy grid from the CPU/host to the GPU/device. If sync is false the memory copy is asynchronous! ...
Definition: CudaDeviceBuffer.h:169
void clear()
De-allocate all memory managed by this allocator and set all pointer to NULL.
Definition: CudaDeviceBuffer.h:189