Interface class for providing input data to the readers.
More...
#include <datasource.hpp>
|
class | buffer |
| Interface class for buffers that the datasource returns to the caller. More...
|
|
class | non_owning_buffer |
| Implementation for non owning buffer where datasource holds buffer until destruction. More...
|
|
class | owning_buffer |
| Derived implementation of buffer that owns the data. More...
|
|
|
virtual | ~datasource () |
| Base class destructor.
|
|
virtual std::unique_ptr< datasource::buffer > | host_read (size_t offset, size_t size)=0 |
| Returns a buffer with a subset of data from the source. More...
|
|
virtual size_t | host_read (size_t offset, size_t size, uint8_t *dst)=0 |
| Reads a selected range into a preallocated buffer. More...
|
|
virtual bool | supports_device_read () const |
| Whether or not this source supports reading directly into device memory. More...
|
|
virtual bool | is_device_read_preferred (size_t size) const |
| Estimates whether a direct device read would be more optimal for the given size. More...
|
|
virtual std::unique_ptr< datasource::buffer > | device_read (size_t offset, size_t size, rmm::cuda_stream_view stream) |
| Returns a device buffer with a subset of data from the source. More...
|
|
virtual size_t | device_read (size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) |
| Reads a selected range into a preallocated device buffer. More...
|
|
virtual std::future< size_t > | device_read_async (size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) |
| Asynchronously reads a selected range into a preallocated device buffer. More...
|
|
virtual size_t | size () const =0 |
| Returns the size of the data in the source. More...
|
|
virtual bool | is_empty () const |
| Returns whether the source contains any data. More...
|
|
Interface class for providing input data to the readers.
Definition at line 41 of file datasource.hpp.
◆ create() [1/6]
Creates a source from a device memory buffer.
- Parameters
-
buffer | Device buffer object |
- Returns
- Constructed datasource object
◆ create() [2/6]
Creates a source from a host memory buffer.
- Parameters
-
[in] | buffer | Host buffer object |
- Returns
- Constructed datasource object
◆ create() [3/6]
Creates a source from an user implemented datasource object.
- Parameters
-
[in] | source | Non-owning pointer to the datasource object |
- Returns
- Constructed datasource object
◆ create() [4/6]
Creates a source from a host memory buffer.
@deprecated Since 23.04
- Parameters
-
[in] | buffer | Host buffer object |
- Returns
- Constructed datasource object
◆ create() [5/6]
static std::unique_ptr<datasource> cudf::io::datasource::create |
( |
std::string const & |
filepath, |
|
|
size_t |
offset = 0 , |
|
|
size_t |
size = 0 |
|
) |
| |
|
static |
Creates a source from a file path.
- Parameters
-
[in] | filepath | Path to the file to use |
[in] | offset | Bytes from the start of the file (the default is zero) |
[in] | size | Bytes from the offset; use zero for entire file (the default is zero) |
- Returns
- Constructed datasource object
◆ create() [6/6]
template<typename T >
static std::vector<std::unique_ptr<datasource> > cudf::io::datasource::create |
( |
std::vector< T > const & |
args | ) |
|
|
inlinestatic |
Creates a vector of datasources, one per element in the input vector.
- Parameters
-
[in] | args | vector of parameters |
- Returns
- Constructed vector of datasource objects
Definition at line 138 of file datasource.hpp.
◆ device_read() [1/2]
Returns a device buffer with a subset of data from the source.
For optimal performance, should only be called when is_device_read_preferred
returns true
. Data source implementations that don't support direct device reads don't need to override this function.
- Exceptions
-
cudf::logic_error | the object does not support direct device reads, i.e. supports_device_read returns false . |
- Parameters
-
offset | Number of bytes from the start |
size | Number of bytes to read |
stream | CUDA stream to use |
- Returns
- The data buffer in the device memory
Definition at line 215 of file datasource.hpp.
◆ device_read() [2/2]
virtual size_t cudf::io::datasource::device_read |
( |
size_t |
offset, |
|
|
size_t |
size, |
|
|
uint8_t * |
dst, |
|
|
rmm::cuda_stream_view |
stream |
|
) |
| |
|
inlinevirtual |
Reads a selected range into a preallocated device buffer.
For optimal performance, should only be called when is_device_read_preferred
returns true
. Data source implementations that don't support direct device reads don't need to override this function.
- Exceptions
-
cudf::logic_error | when the object does not support direct device reads, i.e. supports_device_read returns false . |
- Parameters
-
offset | Number of bytes from the start |
size | Number of bytes to read |
dst | Address of the existing device memory |
stream | CUDA stream to use |
- Returns
- The number of bytes read (can be smaller than size)
Definition at line 239 of file datasource.hpp.
◆ device_read_async()
virtual std::future<size_t> cudf::io::datasource::device_read_async |
( |
size_t |
offset, |
|
|
size_t |
size, |
|
|
uint8_t * |
dst, |
|
|
rmm::cuda_stream_view |
stream |
|
) |
| |
|
inlinevirtual |
Asynchronously reads a selected range into a preallocated device buffer.
Returns a future value that contains the number of bytes read. Calling get()
method of the return value synchronizes this function.
For optimal performance, should only be called when is_device_read_preferred
returns true
. Data source implementations that don't support direct device reads don't need to override this function.
- Exceptions
-
cudf::logic_error | when the object does not support direct device reads, i.e. supports_device_read returns false . |
- Parameters
-
offset | Number of bytes from the start |
size | Number of bytes to read |
dst | Address of the existing device memory |
stream | CUDA stream to use |
- Returns
- The number of bytes read as a future value (can be smaller than size)
Definition at line 264 of file datasource.hpp.
◆ host_read() [1/2]
virtual std::unique_ptr<datasource::buffer> cudf::io::datasource::host_read |
( |
size_t |
offset, |
|
|
size_t |
size |
|
) |
| |
|
pure virtual |
◆ host_read() [2/2]
virtual size_t cudf::io::datasource::host_read |
( |
size_t |
offset, |
|
|
size_t |
size, |
|
|
uint8_t * |
dst |
|
) |
| |
|
pure virtual |
◆ is_device_read_preferred()
virtual bool cudf::io::datasource::is_device_read_preferred |
( |
size_t |
size | ) |
const |
|
inlinevirtual |
Estimates whether a direct device read would be more optimal for the given size.
- Parameters
-
size | Number of bytes to read |
- Returns
- whether the device read is expected to be more performant for the given size
Definition at line 194 of file datasource.hpp.
◆ is_empty()
virtual bool cudf::io::datasource::is_empty |
( |
| ) |
const |
|
inlinevirtual |
Returns whether the source contains any data.
- Returns
- True if there is data, False otherwise
Definition at line 284 of file datasource.hpp.
◆ size()
virtual size_t cudf::io::datasource::size |
( |
| ) |
const |
|
pure virtual |
◆ supports_device_read()
virtual bool cudf::io::datasource::supports_device_read |
( |
| ) |
const |
|
inlinevirtual |
Whether or not this source supports reading directly into device memory.
If this function returns true, the datasource will receive calls to device_read() instead of host_read() when the reader processes the data on the device. Most readers will still make host_read() calls, for the parts of input that are processed on the host (e.g. metadata).
Data source implementations that don't support direct device reads don't need to override this function. The implementations that do should override it to return false.
- Returns
- bool Whether this source supports device_read() calls
Definition at line 186 of file datasource.hpp.
The documentation for this class was generated from the following file: