libkvikio  23.12.00
posix_io.hpp
1 /*
2  * Copyright (c) 2022-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <unistd.h>
19 #include <cstddef>
20 #include <cstdlib>
21 #include <mutex>
22 #include <stack>
23 
24 #include <cstring>
25 #include <kvikio/error.hpp>
26 #include <kvikio/shim/cuda.hpp>
27 #include <kvikio/utils.hpp>
28 
29 namespace kvikio {
30 
31 inline constexpr std::size_t posix_bounce_buffer_size = 2 << 23; // 16 MiB
32 
33 namespace detail {
34 
41 class AllocRetain {
42  private:
43  std::stack<void*> _free_allocs;
44  std::mutex _mutex;
45 
46  public:
47  class Alloc {
48  private:
49  AllocRetain* _manager;
50  void* _alloc;
51 
52  public:
53  Alloc(AllocRetain* manager, void* alloc) : _manager(manager), _alloc{alloc} {}
54  Alloc(const Alloc&) = delete;
55  Alloc& operator=(Alloc const&) = delete;
56  Alloc(Alloc&& o) = delete;
57  Alloc& operator=(Alloc&& o) = delete;
58  ~Alloc() noexcept { _manager->put(_alloc); }
59  void* get() noexcept { return _alloc; }
60  };
61 
62  AllocRetain() = default;
63  [[nodiscard]] Alloc get()
64  {
65  const std::lock_guard lock(_mutex);
66  // Check if we have an allocation available
67  if (!_free_allocs.empty()) {
68  void* ret = _free_allocs.top();
69  _free_allocs.pop();
70  return Alloc(this, ret);
71  }
72 
73  // If no available allocation, allocate and register a new one
74  void* alloc{};
75  // Allocate page-locked host memory
76  CUDA_DRIVER_TRY(cudaAPI::instance().MemHostAlloc(
77  &alloc, posix_bounce_buffer_size, CU_MEMHOSTREGISTER_PORTABLE));
78  return Alloc(this, alloc);
79  }
80 
81  void put(void* alloc)
82  {
83  const std::lock_guard lock(_mutex);
84  _free_allocs.push(alloc);
85  }
86 
87  void clear()
88  {
89  const std::lock_guard lock(_mutex);
90  while (!_free_allocs.empty()) {
91  CUDA_DRIVER_TRY(cudaAPI::instance().MemFreeHost(_free_allocs.top()));
92  _free_allocs.pop();
93  }
94  }
95 
96  AllocRetain(const AllocRetain&) = delete;
97  AllocRetain& operator=(AllocRetain const&) = delete;
98  AllocRetain(AllocRetain&& o) = delete;
99  AllocRetain& operator=(AllocRetain&& o) = delete;
100  ~AllocRetain() noexcept = default;
101 };
102 
103 inline AllocRetain manager; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
104 
116 template <bool IsReadOperation>
117 ssize_t posix_host_io(int fd, const void* buf, size_t count, off_t offset, bool partial)
118 {
119  off_t cur_offset = offset;
120  size_t byte_remaining = count;
121  char* buffer = const_cast<char*>(static_cast<const char*>(buf));
122  while (byte_remaining > 0) {
123  ssize_t nbytes = 0;
124  if constexpr (IsReadOperation) {
125  nbytes = ::pread(fd, buffer, byte_remaining, cur_offset);
126  } else {
127  nbytes = ::pwrite(fd, buffer, byte_remaining, cur_offset);
128  }
129  if (nbytes == -1) {
130  const std::string name = IsReadOperation ? "pread" : "pwrite";
131  if (errno == EBADF) {
132  throw CUfileException{std::string{"POSIX error on " + name + " at: "} + __FILE__ + ":" +
133  KVIKIO_STRINGIFY(__LINE__) + ": unsupported file open flags"};
134  }
135  throw CUfileException{std::string{"POSIX error on " + name + " at: "} + __FILE__ + ":" +
136  KVIKIO_STRINGIFY(__LINE__) + ": " + strerror(errno)};
137  }
138  if constexpr (IsReadOperation) {
139  if (nbytes == 0) {
140  throw CUfileException{std::string{"POSIX error on pread at: "} + __FILE__ + ":" +
141  KVIKIO_STRINGIFY(__LINE__) + ": EOF"};
142  }
143  }
144  if (partial) { return nbytes; }
145  buffer += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
146  cur_offset += nbytes;
147  byte_remaining -= nbytes;
148  }
149  return convert_size2ssize(count);
150 }
151 
163 template <bool IsReadOperation>
164 std::size_t posix_device_io(int fd,
165  const void* devPtr_base,
166  std::size_t size,
167  std::size_t file_offset,
168  std::size_t devPtr_offset)
169 {
170  auto alloc = manager.get();
171  CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset;
172  off_t cur_file_offset = convert_size2off(file_offset);
173  off_t byte_remaining = convert_size2off(size);
174  const off_t chunk_size2 = convert_size2off(posix_bounce_buffer_size);
175 
176  while (byte_remaining > 0) {
177  const off_t nbytes_requested = std::min(chunk_size2, byte_remaining);
178  ssize_t nbytes_got = nbytes_requested;
179  if constexpr (IsReadOperation) {
180  nbytes_got = posix_host_io<true>(fd, alloc.get(), nbytes_requested, cur_file_offset, true);
181  CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoD(devPtr, alloc.get(), nbytes_got));
182  } else { // Is a write operation
183  CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyDtoH(alloc.get(), devPtr, nbytes_requested));
184  posix_host_io<false>(fd, alloc.get(), nbytes_requested, cur_file_offset, false);
185  }
186  cur_file_offset += nbytes_got;
187  devPtr += nbytes_got;
188  byte_remaining -= nbytes_got;
189  }
190  return size;
191 }
192 
193 } // namespace detail
194 
208 inline std::size_t posix_host_read(
209  int fd, void* buf, std::size_t size, std::size_t file_offset, bool partial)
210 {
211  return detail::posix_host_io<true>(fd, buf, size, convert_size2off(file_offset), partial);
212 }
213 
227 inline std::size_t posix_host_write(
228  int fd, const void* buf, std::size_t size, std::size_t file_offset, bool partial)
229 {
230  return detail::posix_host_io<false>(fd, buf, size, convert_size2off(file_offset), partial);
231 }
232 
246 inline std::size_t posix_device_read(int fd,
247  const void* devPtr_base,
248  std::size_t size,
249  std::size_t file_offset,
250  std::size_t devPtr_offset)
251 {
252  return detail::posix_device_io<true>(fd, devPtr_base, size, file_offset, devPtr_offset);
253 }
254 
268 inline std::size_t posix_device_write(int fd,
269  const void* devPtr_base,
270  std::size_t size,
271  std::size_t file_offset,
272  std::size_t devPtr_offset)
273 {
274  return detail::posix_device_io<false>(fd, devPtr_base, size, file_offset, devPtr_offset);
275 }
276 
277 } // namespace kvikio
Class to retain host memory allocations.
Definition: posix_io.hpp:41