libcudf  24.04.00
Public Member Functions | Static Public Member Functions | List of all members
cudf::io::parquet_writer_options Class Reference

Settings for write_parquet(). More...

#include <parquet.hpp>

Public Member Functions

 parquet_writer_options ()=default
 Default constructor. More...
 
sink_info const & get_sink () const
 Returns sink info. More...
 
compression_type get_compression () const
 Returns compression format used. More...
 
statistics_freq get_stats_level () const
 Returns level of statistics requested in output file. More...
 
table_view get_table () const
 Returns table_view. More...
 
std::vector< partition_info > const & get_partitions () const
 Returns partitions. More...
 
auto const & get_metadata () const
 Returns associated metadata. More...
 
std::vector< std::map< std::string, std::string > > const & get_key_value_metadata () const
 Returns Key-Value footer metadata information. More...
 
bool is_enabled_int96_timestamps () const
 Returns true if timestamps will be written as INT96. More...
 
auto is_enabled_utc_timestamps () const
 Returns true if timestamps will be written as UTC. More...
 
std::vector< std::string > const & get_column_chunks_file_paths () const
 Returns Column chunks file paths to be set in the raw output metadata. More...
 
auto get_row_group_size_bytes () const
 Returns maximum row group size, in bytes. More...
 
auto get_row_group_size_rows () const
 Returns maximum row group size, in rows. More...
 
auto get_max_page_size_bytes () const
 Returns the maximum uncompressed page size, in bytes. More...
 
auto get_max_page_size_rows () const
 Returns maximum page size, in rows. More...
 
auto get_column_index_truncate_length () const
 Returns maximum length of min or max values in column index, in bytes. More...
 
dictionary_policy get_dictionary_policy () const
 Returns policy for dictionary use. More...
 
auto get_max_dictionary_size () const
 Returns maximum dictionary size, in bytes. More...
 
auto get_max_page_fragment_size () const
 Returns maximum page fragment size, in rows. More...
 
std::shared_ptr< writer_compression_statisticsget_compression_statistics () const
 Returns a shared pointer to the user-provided compression statistics. More...
 
auto is_enabled_write_v2_headers () const
 Returns true if V2 page headers should be written. More...
 
void set_partitions (std::vector< partition_info > partitions)
 Sets partitions. More...
 
void set_metadata (table_input_metadata metadata)
 Sets metadata. More...
 
void set_key_value_metadata (std::vector< std::map< std::string, std::string >> metadata)
 Sets metadata. More...
 
void set_stats_level (statistics_freq sf)
 Sets the level of statistics. More...
 
void set_compression (compression_type compression)
 Sets compression type. More...
 
void enable_int96_timestamps (bool req)
 Sets timestamp writing preferences. INT96 timestamps will be written if true and TIMESTAMP_MICROS will be written if false. More...
 
void enable_utc_timestamps (bool val)
 Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true. More...
 
void set_column_chunks_file_paths (std::vector< std::string > file_paths)
 Sets column chunks file path to be set in the raw output metadata. More...
 
void set_row_group_size_bytes (size_t size_bytes)
 Sets the maximum row group size, in bytes. More...
 
void set_row_group_size_rows (size_type size_rows)
 Sets the maximum row group size, in rows. More...
 
void set_max_page_size_bytes (size_t size_bytes)
 Sets the maximum uncompressed page size, in bytes. More...
 
void set_max_page_size_rows (size_type size_rows)
 Sets the maximum page size, in rows. More...
 
void set_column_index_truncate_length (int32_t size_bytes)
 Sets the maximum length of min or max values in column index, in bytes. More...
 
void set_dictionary_policy (dictionary_policy policy)
 Sets the policy for dictionary use. More...
 
void set_max_dictionary_size (size_t size_bytes)
 Sets the maximum dictionary size, in bytes. More...
 
void set_max_page_fragment_size (size_type size_rows)
 Sets the maximum page fragment size, in rows. More...
 
void set_compression_statistics (std::shared_ptr< writer_compression_statistics > comp_stats)
 Sets the pointer to the output compression statistics. More...
 
void enable_write_v2_headers (bool val)
 Sets preference for V2 page headers. Write V2 page headers if set to true. More...
 

Static Public Member Functions

static parquet_writer_options_builder builder (sink_info const &sink, table_view const &table)
 Create builder to create parquet_writer_options. More...
 
static parquet_writer_options_builder builder ()
 Create builder to create parquet_writer_options. More...
 

Detailed Description

Settings for write_parquet().

Definition at line 523 of file parquet.hpp.

Constructor & Destructor Documentation

◆ parquet_writer_options()

cudf::io::parquet_writer_options::parquet_writer_options ( )
default

Default constructor.

This has been added since Cython requires a default constructor to create objects on stack.

Member Function Documentation

◆ builder() [1/2]

static parquet_writer_options_builder cudf::io::parquet_writer_options::builder ( )
static

Create builder to create parquet_writer_options.

Returns
parquet_writer_options_builder

◆ builder() [2/2]

static parquet_writer_options_builder cudf::io::parquet_writer_options::builder ( sink_info const &  sink,
table_view const &  table 
)
static

Create builder to create parquet_writer_options.

Parameters
sinkThe sink used for writer output
tableTable to be written to output
Returns
Builder to build parquet_writer_options

◆ enable_int96_timestamps()

void cudf::io::parquet_writer_options::enable_int96_timestamps ( bool  req)
inline

Sets timestamp writing preferences. INT96 timestamps will be written if true and TIMESTAMP_MICROS will be written if false.

Parameters
reqBoolean value to enable/disable writing of INT96 timestamps

Definition at line 806 of file parquet.hpp.

◆ enable_utc_timestamps()

void cudf::io::parquet_writer_options::enable_utc_timestamps ( bool  val)
inline

Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true.

Parameters
valBoolean value to enable/disable writing of timestamps as UTC.

Definition at line 813 of file parquet.hpp.

◆ enable_write_v2_headers()

void cudf::io::parquet_writer_options::enable_write_v2_headers ( bool  val)
inline

Sets preference for V2 page headers. Write V2 page headers if set to true.

Parameters
valBoolean value to enable/disable writing of V2 page headers.

Definition at line 894 of file parquet.hpp.

◆ get_column_chunks_file_paths()

std::vector<std::string> const& cudf::io::parquet_writer_options::get_column_chunks_file_paths ( ) const
inline

Returns Column chunks file paths to be set in the raw output metadata.

Returns
Column chunks file paths to be set in the raw output metadata

Definition at line 676 of file parquet.hpp.

◆ get_column_index_truncate_length()

auto cudf::io::parquet_writer_options::get_column_index_truncate_length ( ) const
inline

Returns maximum length of min or max values in column index, in bytes.

Returns
length min/max will be truncated to

Definition at line 724 of file parquet.hpp.

◆ get_compression()

compression_type cudf::io::parquet_writer_options::get_compression ( ) const
inline

Returns compression format used.

Returns
Compression format

Definition at line 617 of file parquet.hpp.

◆ get_compression_statistics()

std::shared_ptr<writer_compression_statistics> cudf::io::parquet_writer_options::get_compression_statistics ( ) const
inline

Returns a shared pointer to the user-provided compression statistics.

Returns
Compression statistics

Definition at line 752 of file parquet.hpp.

◆ get_dictionary_policy()

dictionary_policy cudf::io::parquet_writer_options::get_dictionary_policy ( ) const
inline

Returns policy for dictionary use.

Returns
policy for dictionary use

Definition at line 731 of file parquet.hpp.

◆ get_key_value_metadata()

std::vector<std::map<std::string, std::string> > const& cudf::io::parquet_writer_options::get_key_value_metadata ( ) const
inline

Returns Key-Value footer metadata information.

Returns
Key-Value footer metadata information

Definition at line 652 of file parquet.hpp.

◆ get_max_dictionary_size()

auto cudf::io::parquet_writer_options::get_max_dictionary_size ( ) const
inline

Returns maximum dictionary size, in bytes.

Returns
Maximum dictionary size, in bytes.

Definition at line 738 of file parquet.hpp.

◆ get_max_page_fragment_size()

auto cudf::io::parquet_writer_options::get_max_page_fragment_size ( ) const
inline

Returns maximum page fragment size, in rows.

Returns
Maximum page fragment size, in rows.

Definition at line 745 of file parquet.hpp.

◆ get_max_page_size_bytes()

auto cudf::io::parquet_writer_options::get_max_page_size_bytes ( ) const
inline

Returns the maximum uncompressed page size, in bytes.

If set larger than the row group size, then this will return the row group size.

Returns
Maximum uncompressed page size, in bytes

Definition at line 702 of file parquet.hpp.

◆ get_max_page_size_rows()

auto cudf::io::parquet_writer_options::get_max_page_size_rows ( ) const
inline

Returns maximum page size, in rows.

If set larger than the row group size, then this will return the row group size.

Returns
Maximum page size, in rows

Definition at line 714 of file parquet.hpp.

◆ get_metadata()

auto const& cudf::io::parquet_writer_options::get_metadata ( ) const
inline

Returns associated metadata.

Returns
Associated metadata

Definition at line 645 of file parquet.hpp.

◆ get_partitions()

std::vector<partition_info> const& cudf::io::parquet_writer_options::get_partitions ( ) const
inline

Returns partitions.

Returns
Partitions

Definition at line 638 of file parquet.hpp.

◆ get_row_group_size_bytes()

auto cudf::io::parquet_writer_options::get_row_group_size_bytes ( ) const
inline

Returns maximum row group size, in bytes.

Returns
Maximum row group size, in bytes

Definition at line 686 of file parquet.hpp.

◆ get_row_group_size_rows()

auto cudf::io::parquet_writer_options::get_row_group_size_rows ( ) const
inline

Returns maximum row group size, in rows.

Returns
Maximum row group size, in rows

Definition at line 693 of file parquet.hpp.

◆ get_sink()

sink_info const& cudf::io::parquet_writer_options::get_sink ( ) const
inline

Returns sink info.

Returns
Sink info

Definition at line 610 of file parquet.hpp.

◆ get_stats_level()

statistics_freq cudf::io::parquet_writer_options::get_stats_level ( ) const
inline

Returns level of statistics requested in output file.

Returns
level of statistics requested in output file

Definition at line 624 of file parquet.hpp.

◆ get_table()

table_view cudf::io::parquet_writer_options::get_table ( ) const
inline

Returns table_view.

Returns
Table view

Definition at line 631 of file parquet.hpp.

◆ is_enabled_int96_timestamps()

bool cudf::io::parquet_writer_options::is_enabled_int96_timestamps ( ) const
inline

Returns true if timestamps will be written as INT96.

Returns
true if timestamps will be written as INT96

Definition at line 662 of file parquet.hpp.

◆ is_enabled_utc_timestamps()

auto cudf::io::parquet_writer_options::is_enabled_utc_timestamps ( ) const
inline

Returns true if timestamps will be written as UTC.

Returns
true if timestamps will be written as UTC

Definition at line 669 of file parquet.hpp.

◆ is_enabled_write_v2_headers()

auto cudf::io::parquet_writer_options::is_enabled_write_v2_headers ( ) const
inline

Returns true if V2 page headers should be written.

Returns
true if V2 page headers should be written.

Definition at line 762 of file parquet.hpp.

◆ set_column_chunks_file_paths()

void cudf::io::parquet_writer_options::set_column_chunks_file_paths ( std::vector< std::string >  file_paths)

Sets column chunks file path to be set in the raw output metadata.

Parameters
file_pathsVector of Strings which indicates file path. Must be same size as number of data sinks in sink info

◆ set_column_index_truncate_length()

void cudf::io::parquet_writer_options::set_column_index_truncate_length ( int32_t  size_bytes)

Sets the maximum length of min or max values in column index, in bytes.

Parameters
size_byteslength min/max will be truncated to

◆ set_compression()

void cudf::io::parquet_writer_options::set_compression ( compression_type  compression)
inline

Sets compression type.

Parameters
compressionThe compression type to use

Definition at line 798 of file parquet.hpp.

◆ set_compression_statistics()

void cudf::io::parquet_writer_options::set_compression_statistics ( std::shared_ptr< writer_compression_statistics comp_stats)
inline

Sets the pointer to the output compression statistics.

Parameters
comp_statsPointer to compression statistics to be updated after writing

Definition at line 884 of file parquet.hpp.

◆ set_dictionary_policy()

void cudf::io::parquet_writer_options::set_dictionary_policy ( dictionary_policy  policy)

Sets the policy for dictionary use.

Parameters
policyPolicy for dictionary use

◆ set_key_value_metadata()

void cudf::io::parquet_writer_options::set_key_value_metadata ( std::vector< std::map< std::string, std::string >>  metadata)

Sets metadata.

Parameters
metadataKey-Value footer metadata

◆ set_max_dictionary_size()

void cudf::io::parquet_writer_options::set_max_dictionary_size ( size_t  size_bytes)

Sets the maximum dictionary size, in bytes.

Parameters
size_bytesMaximum dictionary size, in bytes

◆ set_max_page_fragment_size()

void cudf::io::parquet_writer_options::set_max_page_fragment_size ( size_type  size_rows)

Sets the maximum page fragment size, in rows.

Parameters
size_rowsMaximum page fragment size, in rows.

◆ set_max_page_size_bytes()

void cudf::io::parquet_writer_options::set_max_page_size_bytes ( size_t  size_bytes)

Sets the maximum uncompressed page size, in bytes.

Parameters
size_bytesMaximum uncompressed page size, in bytes to set

◆ set_max_page_size_rows()

void cudf::io::parquet_writer_options::set_max_page_size_rows ( size_type  size_rows)

Sets the maximum page size, in rows.

Parameters
size_rowsMaximum page size, in rows to set

◆ set_metadata()

void cudf::io::parquet_writer_options::set_metadata ( table_input_metadata  metadata)
inline

Sets metadata.

Parameters
metadataAssociated metadata

Definition at line 777 of file parquet.hpp.

◆ set_partitions()

void cudf::io::parquet_writer_options::set_partitions ( std::vector< partition_info partitions)

Sets partitions.

Parameters
partitionsPartitions of input table in {start_row, num_rows} pairs. If specified, must be same size as number of sinks in sink_info

◆ set_row_group_size_bytes()

void cudf::io::parquet_writer_options::set_row_group_size_bytes ( size_t  size_bytes)

Sets the maximum row group size, in bytes.

Parameters
size_bytesMaximum row group size, in bytes to set

◆ set_row_group_size_rows()

void cudf::io::parquet_writer_options::set_row_group_size_rows ( size_type  size_rows)

Sets the maximum row group size, in rows.

Parameters
size_rowsMaximum row group size, in rows to set

◆ set_stats_level()

void cudf::io::parquet_writer_options::set_stats_level ( statistics_freq  sf)
inline

Sets the level of statistics.

Parameters
sfLevel of statistics requested in the output file

Definition at line 791 of file parquet.hpp.


The documentation for this class was generated from the following file: