20 #include <rmm/detail/cuda_util.hpp>
21 #include <rmm/detail/dynamic_load_runtime.hpp>
22 #include <rmm/detail/error.hpp>
26 #include <rmm/detail/thrust_namespace.h>
27 #include <thrust/optional.h>
29 #include <cuda_runtime_api.h>
34 #if CUDART_VERSION >= 11020
35 #ifndef RMM_DISABLE_CUDA_MALLOC_ASYNC
36 #define RMM_CUDA_MALLOC_ASYNC_SUPPORT
90 thrust::optional<std::size_t> release_threshold = {},
91 thrust::optional<allocation_handle_type> export_handle_type = {})
93 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
95 RMM_EXPECTS(rmm::detail::async_alloc::is_supported(),
96 "cudaMallocAsync not supported with this CUDA driver/runtime version");
99 cudaMemPoolProps pool_props{};
100 pool_props.allocType = cudaMemAllocationTypePinned;
101 pool_props.handleTypes =
static_cast<cudaMemAllocationHandleType
>(
103 RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes),
104 "Requested IPC memory handle type not supported");
105 pool_props.location.type = cudaMemLocationTypeDevice;
107 cudaMemPool_t cuda_pool_handle{};
108 RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props));
109 pool_ = cuda_async_view_memory_resource{cuda_pool_handle};
114 int driver_version{};
115 RMM_CUDA_TRY(cudaDriverGetVersion(&driver_version));
116 constexpr
auto min_async_version{11050};
117 if (driver_version < min_async_version) {
119 RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
120 pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
123 auto const [free, total] = rmm::detail::available_device_memory();
126 uint64_t threshold = release_threshold.value_or(total);
127 RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute(
128 pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold));
132 auto const pool_size = initial_pool_size.value_or(free / 2);
133 auto* ptr = do_allocate(pool_size, cuda_stream_default);
134 do_deallocate(ptr, pool_size, cuda_stream_default);
137 "cudaMallocAsync not supported by the version of the CUDA Toolkit used for this build");
141 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
146 [[nodiscard]] cudaMemPool_t pool_handle() const noexcept {
return pool_.pool_handle(); }
151 #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT)
152 RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaMemPoolDestroy(pool_handle()));
176 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
192 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
193 ptr = pool_.
allocate(bytes, stream);
211 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
212 pool_.deallocate(ptr, bytes, stream);
227 [[nodiscard]]
bool do_is_equal(device_memory_resource
const& other)
const noexcept
override
230 #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
231 return (async_mr !=
nullptr) && (this->pool_handle() == async_mr->pool_handle());
233 return async_mr !=
nullptr;
244 [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
247 return std::make_pair(0, 0);
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
device_memory_resource derived class that uses cudaMallocAsync/cudaFreeAsync for allocation/deallocat...
Definition: cuda_async_memory_resource.hpp:51
allocation_handle_type
Flags for specifying memory allocation handle types.
Definition: cuda_async_memory_resource.hpp:63
@ none
Does not allow any export mechanism.
@ win32_kmt
Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
@ win32
Allows a Win32 NT handle to be used for exporting. (HANDLE)
bool supports_streams() const noexcept override
Query whether the resource supports use of non-null CUDA streams for allocation/deallocation....
Definition: cuda_async_memory_resource.hpp:166
bool supports_get_mem_info() const noexcept override
Query whether the resource supports the get_mem_info API.
Definition: cuda_async_memory_resource.hpp:173
cuda_async_memory_resource(thrust::optional< std::size_t > initial_pool_size={}, thrust::optional< std::size_t > release_threshold={}, thrust::optional< allocation_handle_type > export_handle_type={})
Constructs a cuda_async_memory_resource with the optionally specified initial pool size and release t...
Definition: cuda_async_memory_resource.hpp:89
device_memory_resource derived class that uses cudaMallocAsync/cudaFreeAsync for allocation/deallocat...
Definition: cuda_async_view_memory_resource.hpp:48
Base class for all libcudf device memory allocation.
Definition: device_memory_resource.hpp:89
void * allocate(std::size_t bytes, cuda_stream_view stream=cuda_stream_view{})
Allocates memory of size at least bytes.
Definition: device_memory_resource.hpp:116
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:86
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:44