libcudf  24.04.00
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hashing.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/table/table.hpp>
20 
21 #include <rmm/mr/device/per_device_resource.hpp>
22 
23 namespace cudf {
24 
35 using hash_value_type = uint32_t;
36 
41 enum class hash_id {
42  HASH_IDENTITY = 0,
43  HASH_MURMUR3,
45  HASH_MD5
46 };
47 
51 static constexpr uint32_t DEFAULT_HASH_SEED = 0;
52 
66 [[deprecated]] std::unique_ptr<column> hash(
67  table_view const& input,
68  hash_id hash_function = hash_id::HASH_MURMUR3,
69  uint32_t seed = DEFAULT_HASH_SEED,
70  rmm::cuda_stream_view stream = cudf::get_default_stream(),
71  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
72 
74 namespace hashing {
75 
90 std::unique_ptr<column> murmurhash3_x86_32(
91  table_view const& input,
92  uint32_t seed = DEFAULT_HASH_SEED,
93  rmm::cuda_stream_view stream = cudf::get_default_stream(),
94  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
95 
109 std::unique_ptr<table> murmurhash3_x64_128(
110  table_view const& input,
111  uint64_t seed = DEFAULT_HASH_SEED,
112  rmm::cuda_stream_view stream = cudf::get_default_stream(),
113  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
114 
130 [[deprecated]] std::unique_ptr<column> spark_murmurhash3_x86_32(
131  table_view const& input,
132  uint32_t seed = DEFAULT_HASH_SEED,
133  rmm::cuda_stream_view stream = cudf::get_default_stream(),
134  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
135 
145 std::unique_ptr<column> md5(
146  table_view const& input,
147  rmm::cuda_stream_view stream = cudf::get_default_stream(),
148  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
149 
159 std::unique_ptr<column> sha1(
160  table_view const& input,
161  rmm::cuda_stream_view stream = cudf::get_default_stream(),
162  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
163 
173 std::unique_ptr<column> sha224(
174  table_view const& input,
175  rmm::cuda_stream_view stream = cudf::get_default_stream(),
176  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
177 
187 std::unique_ptr<column> sha256(
188  table_view const& input,
189  rmm::cuda_stream_view stream = cudf::get_default_stream(),
190  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
191 
201 std::unique_ptr<column> sha384(
202  table_view const& input,
203  rmm::cuda_stream_view stream = cudf::get_default_stream(),
204  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
205 
215 std::unique_ptr<column> sha512(
216  table_view const& input,
217  rmm::cuda_stream_view stream = cudf::get_default_stream(),
218  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
219 
232 std::unique_ptr<column> xxhash_64(
233  table_view const& input,
234  uint64_t seed = DEFAULT_HASH_SEED,
235  rmm::cuda_stream_view stream = cudf::get_default_stream(),
236  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
237 
238 } // namespace hashing
239  // end of group
241 } // namespace cudf
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
std::unique_ptr< column > hash(table_view const &input, hash_id hash_function=hash_id::HASH_MURMUR3, uint32_t seed=DEFAULT_HASH_SEED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the hash value of each row in the input set of columns.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:35
hash_id
Identifies the hash function to be used.
Definition: hashing.hpp:41
@ HASH_MD5
MD5 hash function.
@ HASH_SPARK_MURMUR3
Spark Murmur3 hash function.
@ HASH_IDENTITY
Identity hash function that simply returns the key to be hashed.
@ HASH_MURMUR3
Murmur3 hash function.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< column > sha1(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the SHA-1 hash value of each row in the given table.
std::unique_ptr< column > xxhash_64(table_view const &input, uint64_t seed=DEFAULT_HASH_SEED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the XXHash_64 hash value of each row in the given table.
std::unique_ptr< column > murmurhash3_x86_32(table_view const &input, uint32_t seed=DEFAULT_HASH_SEED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the MurmurHash3 32-bit hash value of each row in the given table.
std::unique_ptr< column > sha224(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the SHA-224 hash value of each row in the given table.
std::unique_ptr< column > sha384(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the SHA-384 hash value of each row in the given table.
std::unique_ptr< column > sha256(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the SHA-256 hash value of each row in the given table.
std::unique_ptr< column > sha512(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the SHA-512 hash value of each row in the given table.
std::unique_ptr< column > spark_murmurhash3_x86_32(table_view const &input, uint32_t seed=DEFAULT_HASH_SEED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the MurmurHash3 32-bit hash value of each row in the given table.
std::unique_ptr< column > md5(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the MD5 hash value of each row in the given table.
std::unique_ptr< table > murmurhash3_x64_128(table_view const &input, uint64_t seed=DEFAULT_HASH_SEED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the MurmurHash3 64-bit hash value of each row in the given table.
cuDF interfaces
Definition: aggregation.hpp:34
Class definition for cudf::table.
Class definitions for (mutable)_table_view