25 #include <kvikio/error.hpp>
26 #include <kvikio/shim/cuda.hpp>
27 #include <kvikio/utils.hpp>
31 inline constexpr std::size_t posix_bounce_buffer_size = 2 << 23;
43 std::stack<void*> _free_allocs;
53 Alloc(
AllocRetain* manager,
void* alloc) : _manager(manager), _alloc{alloc} {}
58 ~
Alloc() noexcept { _manager->put(_alloc); }
59 void* get() noexcept {
return _alloc; }
63 [[nodiscard]]
Alloc get()
65 const std::lock_guard lock(_mutex);
67 if (!_free_allocs.empty()) {
68 void* ret = _free_allocs.top();
70 return Alloc(
this, ret);
76 CUDA_DRIVER_TRY(cudaAPI::instance().MemHostAlloc(
77 &alloc, posix_bounce_buffer_size, CU_MEMHOSTREGISTER_PORTABLE));
78 return Alloc(
this, alloc);
83 const std::lock_guard lock(_mutex);
84 _free_allocs.push(alloc);
89 const std::lock_guard lock(_mutex);
90 while (!_free_allocs.empty()) {
91 CUDA_DRIVER_TRY(cudaAPI::instance().MemFreeHost(_free_allocs.top()));
96 AllocRetain(
const AllocRetain&) =
delete;
97 AllocRetain& operator=(AllocRetain
const&) =
delete;
98 AllocRetain(AllocRetain&& o) =
delete;
99 AllocRetain& operator=(AllocRetain&& o) =
delete;
100 ~AllocRetain() noexcept = default;
103 inline AllocRetain manager;
116 template <
bool IsReadOperation>
117 ssize_t posix_host_io(
int fd, const
void* buf,
size_t count, off_t offset,
bool partial)
119 off_t cur_offset = offset;
120 size_t byte_remaining = count;
121 char* buffer =
const_cast<char*
>(
static_cast<const char*
>(buf));
122 while (byte_remaining > 0) {
124 if constexpr (IsReadOperation) {
125 nbytes = ::pread(fd, buffer, byte_remaining, cur_offset);
127 nbytes = ::pwrite(fd, buffer, byte_remaining, cur_offset);
130 const std::string name = IsReadOperation ?
"pread" :
"pwrite";
131 if (errno == EBADF) {
132 throw CUfileException{std::string{
"POSIX error on " + name +
" at: "} + __FILE__ +
":" +
133 KVIKIO_STRINGIFY(__LINE__) +
": unsupported file open flags"};
135 throw CUfileException{std::string{
"POSIX error on " + name +
" at: "} + __FILE__ +
":" +
136 KVIKIO_STRINGIFY(__LINE__) +
": " + strerror(errno)};
138 if constexpr (IsReadOperation) {
140 throw CUfileException{std::string{
"POSIX error on pread at: "} + __FILE__ +
":" +
141 KVIKIO_STRINGIFY(__LINE__) +
": EOF"};
144 if (partial) {
return nbytes; }
146 cur_offset += nbytes;
147 byte_remaining -= nbytes;
149 return convert_size2ssize(count);
163 template <
bool IsReadOperation>
164 std::size_t posix_device_io(
int fd,
165 const void* devPtr_base,
167 std::size_t file_offset,
168 std::size_t devPtr_offset)
170 auto alloc = manager.get();
171 CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset;
172 off_t cur_file_offset = convert_size2off(file_offset);
173 off_t byte_remaining = convert_size2off(size);
174 const off_t chunk_size2 = convert_size2off(posix_bounce_buffer_size);
176 while (byte_remaining > 0) {
177 const off_t nbytes_requested = std::min(chunk_size2, byte_remaining);
178 ssize_t nbytes_got = nbytes_requested;
179 if constexpr (IsReadOperation) {
180 nbytes_got = posix_host_io<true>(fd, alloc.get(), nbytes_requested, cur_file_offset,
true);
181 CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoD(devPtr, alloc.get(), nbytes_got));
183 CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyDtoH(alloc.get(), devPtr, nbytes_requested));
184 posix_host_io<false>(fd, alloc.get(), nbytes_requested, cur_file_offset,
false);
186 cur_file_offset += nbytes_got;
187 devPtr += nbytes_got;
188 byte_remaining -= nbytes_got;
208 inline std::size_t posix_host_read(
209 int fd,
void* buf, std::size_t size, std::size_t file_offset,
bool partial)
211 return detail::posix_host_io<true>(fd, buf, size, convert_size2off(file_offset), partial);
227 inline std::size_t posix_host_write(
228 int fd,
const void* buf, std::size_t size, std::size_t file_offset,
bool partial)
230 return detail::posix_host_io<false>(fd, buf, size, convert_size2off(file_offset), partial);
246 inline std::size_t posix_device_read(
int fd,
247 const void* devPtr_base,
249 std::size_t file_offset,
250 std::size_t devPtr_offset)
252 return detail::posix_device_io<true>(fd, devPtr_base, size, file_offset, devPtr_offset);
268 inline std::size_t posix_device_write(
int fd,
269 const void* devPtr_base,
271 std::size_t file_offset,
272 std::size_t devPtr_offset)
274 return detail::posix_device_io<false>(fd, devPtr_base, size, file_offset, devPtr_offset);
Class to retain host memory allocations.