libcudf  24.02.00
experimental/row_operators.cuh
1 /*
2  * Copyright (c) 2022-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/iterator.cuh>
21 #include <cudf/detail/utilities/algorithm.cuh>
22 #include <cudf/detail/utilities/assert.cuh>
23 #include <cudf/hashing/detail/default_hash.cuh>
24 #include <cudf/hashing/detail/hashing.hpp>
25 #include <cudf/lists/detail/dremel.hpp>
26 #include <cudf/lists/list_device_view.cuh>
27 #include <cudf/lists/lists_column_device_view.cuh>
28 #include <cudf/sorting.hpp>
29 #include <cudf/structs/structs_column_device_view.cuh>
30 #include <cudf/table/row_operators.cuh>
35 
36 #include <thrust/detail/use_default.h>
37 #include <thrust/equal.h>
38 #include <thrust/execution_policy.h>
39 #include <thrust/functional.h>
40 #include <thrust/iterator/counting_iterator.h>
41 #include <thrust/iterator/iterator_adaptor.h>
42 #include <thrust/iterator/iterator_categories.h>
43 #include <thrust/iterator/iterator_facade.h>
44 #include <thrust/iterator/transform_iterator.h>
45 #include <thrust/logical.h>
46 #include <thrust/swap.h>
47 #include <thrust/transform_reduce.h>
48 
49 #include <cuda/std/tuple>
50 #include <cuda/std/utility>
51 
52 #include <limits>
53 #include <memory>
54 #include <optional>
55 #include <type_traits>
56 #include <utility>
57 
58 namespace cudf {
59 
60 namespace experimental {
61 
77 template <cudf::type_id t>
80  using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
81 };
82 
83 namespace row {
84 
85 enum class lhs_index_type : size_type {};
86 enum class rhs_index_type : size_type {};
87 
101 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
102 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
103  Index,
104  thrust::use_default,
105  thrust::random_access_traversal_tag,
106  Index,
107  Underlying> {
108  using super_t =
109  thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
110 
116  explicit constexpr strong_index_iterator(Underlying n) : begin{n} {}
117 
119 
120  private:
121  __device__ constexpr void increment() { ++begin; }
122  __device__ constexpr void decrement() { --begin; }
123 
124  __device__ constexpr void advance(Underlying n) { begin += n; }
125 
126  __device__ constexpr bool equal(strong_index_iterator<Index> const& other) const noexcept
127  {
128  return begin == other.begin;
129  }
130 
131  __device__ constexpr Index dereference() const noexcept { return static_cast<Index>(begin); }
132 
133  __device__ constexpr Underlying distance_to(
134  strong_index_iterator<Index> const& other) const noexcept
135  {
136  return other.begin - begin;
137  }
138 
139  Underlying begin{};
140 };
141 
145 using lhs_iterator = strong_index_iterator<lhs_index_type>;
146 
150 using rhs_iterator = strong_index_iterator<rhs_index_type>;
151 
152 namespace lexicographic {
153 
169  template <typename Element>
170  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
171  {
172  return detail::compare_elements(lhs, rhs);
173  }
174 };
175 
189  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
190  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
191  {
192  return detail::compare_elements(lhs, rhs);
193  }
194 
202  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
203  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
204  {
205  if (isnan(lhs)) {
207  } else if (isnan(rhs)) {
208  return weak_ordering::LESS;
209  }
210 
211  return detail::compare_elements(lhs, rhs);
212  }
213 };
214 
215 using optional_dremel_view = thrust::optional<detail::dremel_device_view const>;
216 
217 // The has_nested_columns template parameter of the device_row_comparator is
218 // necessary to help the compiler optimize our code. Without it, the list and
219 // struct view specializations are present in the code paths used for primitive
220 // types, and the compiler fails to inline this nearly as well resulting in a
221 // significant performance drop. As a result, there is some minor tension in
222 // the current design between the presence of this parameter and the way that
223 // the Dremel data is passed around, first as a
224 // std::optional<device_span<dremel_device_view>> in the
225 // preprocessed_table/device_row_comparator (which is always valid when
226 // has_nested_columns and is otherwise invalid) that is then unpacked to a
227 // thrust::optional<dremel_device_view> at the element_comparator level (which
228 // is always valid for a list column and otherwise invalid). We cannot use an
229 // additional template parameter for the element_comparator on a per-column
230 // basis because we cannot conditionally define dremel_device_view member
231 // variables without jumping through extra hoops with inheritance, so the
232 // thrust::optional<dremel_device_view> member must be an optional rather than
233 // a raw dremel_device_view.
264 template <bool has_nested_columns,
265  typename Nullate,
266  typename PhysicalElementComparator = sorting_physical_element_comparator>
268  public:
269  friend class self_comparator;
270  friend class two_table_comparator;
271 
291  device_row_comparator(Nullate check_nulls,
292  table_device_view lhs,
293  table_device_view rhs,
294  device_span<detail::dremel_device_view const> l_dremel_device_views,
295  device_span<detail::dremel_device_view const> r_dremel_device_views,
296  std::optional<device_span<int const>> depth = std::nullopt,
297  std::optional<device_span<order const>> column_order = std::nullopt,
298  std::optional<device_span<null_order const>> null_precedence = std::nullopt,
299  PhysicalElementComparator comparator = {}) noexcept
300  : _lhs{lhs},
301  _rhs{rhs},
302  _l_dremel(l_dremel_device_views),
303  _r_dremel(r_dremel_device_views),
304  _check_nulls{check_nulls},
305  _depth{depth},
306  _column_order{column_order},
307  _null_precedence{null_precedence},
308  _comparator{comparator}
309  {
310  }
311 
330  template <bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
332  Nullate check_nulls,
333  table_device_view lhs,
334  table_device_view rhs,
335  std::optional<device_span<order const>> column_order = std::nullopt,
336  std::optional<device_span<null_order const>> null_precedence = std::nullopt,
337  PhysicalElementComparator comparator = {}) noexcept
338  : _lhs{lhs},
339  _rhs{rhs},
340  _l_dremel{},
341  _r_dremel{},
342  _check_nulls{check_nulls},
343  _depth{},
344  _column_order{column_order},
345  _null_precedence{null_precedence},
346  _comparator{comparator}
347  {
348  }
349 
354  public:
371  __device__ element_comparator(Nullate check_nulls,
372  column_device_view lhs,
373  column_device_view rhs,
374  null_order null_precedence = null_order::BEFORE,
375  int depth = 0,
376  PhysicalElementComparator comparator = {},
377  optional_dremel_view l_dremel_device_view = {},
378  optional_dremel_view r_dremel_device_view = {})
379  : _lhs{lhs},
380  _rhs{rhs},
381  _check_nulls{check_nulls},
382  _null_precedence{null_precedence},
383  _depth{depth},
384  _l_dremel_device_view{l_dremel_device_view},
385  _r_dremel_device_view{r_dremel_device_view},
386  _comparator{comparator}
387  {
388  }
389 
398  template <typename Element,
399  CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
400  __device__ cuda::std::pair<weak_ordering, int> operator()(
401  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
402  {
403  if (_check_nulls) {
404  bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
405  bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
406 
407  if (lhs_is_null or rhs_is_null) { // at least one is null
408  return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
409  }
410  }
411 
412  return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
413  _rhs.element<Element>(rhs_element_index)),
414  std::numeric_limits<int>::max());
415  }
416 
424  template <typename Element,
425  CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
426  (not has_nested_columns or not cudf::is_nested<Element>()))>
427  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type const,
428  size_type const) const noexcept
429  {
430  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
431  }
432 
441  template <typename Element,
442  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
443  __device__ cuda::std::pair<weak_ordering, int> operator()(
444  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
445  {
446  column_device_view lcol = _lhs;
447  column_device_view rcol = _rhs;
448  int depth = _depth;
449  while (lcol.type().id() == type_id::STRUCT) {
450  bool const lhs_is_null{lcol.is_null(lhs_element_index)};
451  bool const rhs_is_null{rcol.is_null(rhs_element_index)};
452 
453  if (lhs_is_null or rhs_is_null) { // at least one is null
454  weak_ordering state = null_compare(lhs_is_null, rhs_is_null, _null_precedence);
455  return cuda::std::pair(state, depth);
456  }
457 
458  if (lcol.num_child_columns() == 0) {
459  return cuda::std::pair(weak_ordering::EQUIVALENT, std::numeric_limits<int>::max());
460  }
461 
462  // Non-empty structs have been modified to only have 1 child when using this.
465  ++depth;
466  }
467 
468  return cudf::type_dispatcher<dispatch_void_if_nested>(
469  lcol.type(),
470  element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator},
471  lhs_element_index,
472  rhs_element_index);
473  }
474 
483  template <typename Element,
484  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
485  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type lhs_element_index,
486  size_type rhs_element_index)
487  {
488  // only order top-NULLs according to null_order
489  auto const is_l_row_null = _lhs.is_null(lhs_element_index);
490  auto const is_r_row_null = _rhs.is_null(rhs_element_index);
491  if (is_l_row_null || is_r_row_null) {
492  return cuda::std::pair(null_compare(is_l_row_null, is_r_row_null, _null_precedence),
493  _depth);
494  }
495 
496  // These are all the values from the Dremel encoding.
497  auto const l_max_def_level = _l_dremel_device_view->max_def_level;
498  auto const r_max_def_level = _r_dremel_device_view->max_def_level;
499  auto const l_def_levels = _l_dremel_device_view->def_levels;
500  auto const r_def_levels = _r_dremel_device_view->def_levels;
501  auto const l_rep_levels = _l_dremel_device_view->rep_levels;
502  auto const r_rep_levels = _r_dremel_device_view->rep_levels;
503 
504  // Traverse the nested list hierarchy to get a column device view
505  // pointing to the underlying child data.
506  column_device_view lcol = _lhs.slice(lhs_element_index, 1);
507  column_device_view rcol = _rhs.slice(rhs_element_index, 1);
508 
509  while (lcol.type().id() == type_id::LIST) {
512  }
513 
514  // These start and end values indicate the start and end points of all
515  // the elements of the lists in the current list element
516  // (`[lhs|rhs]_element_index`) that we are comparing.
517  auto const l_offsets = _l_dremel_device_view->offsets;
518  auto const r_offsets = _r_dremel_device_view->offsets;
519  auto l_start = l_offsets[lhs_element_index];
520  auto l_end = l_offsets[lhs_element_index + 1];
521  auto r_start = r_offsets[rhs_element_index];
522  auto r_end = r_offsets[rhs_element_index + 1];
523 
524  // This comparator will be used to compare leaf (non-nested) data types.
525  auto comparator =
526  element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator};
527 
528  // Loop over each element in the encoding. Note that this includes nulls
529  // and empty lists, so not every index corresponds to an actual element
530  // in the child column. The element_index is used to keep track of the current
531  // child element that we're actually comparing.
532  for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
533  l_dremel_index < l_end and r_dremel_index < r_end;
534  ++l_dremel_index, ++r_dremel_index) {
535  auto const l_rep_level = l_rep_levels[l_dremel_index];
536  auto const r_rep_level = r_rep_levels[r_dremel_index];
537 
538  // early exit for smaller sub-list
539  if (l_rep_level != r_rep_level) {
540  // the lower repetition level is a smaller sub-list
541  return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
542  : cuda::std::pair(weak_ordering::GREATER, _depth);
543  }
544 
545  // only compare if left and right are at same nesting level
546  auto const l_def_level = l_def_levels[l_dremel_index];
547  auto const r_def_level = r_def_levels[r_dremel_index];
548 
549  // either left or right are empty or NULLs of arbitrary nesting
550  if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
551  // in the fully unraveled version of the list column, only the
552  // most nested NULLs and leafs are present
553  // In this rare condition that we get to the most nested NULL, we increment
554  // element_index because either both rows have a deeply nested NULL at the
555  // same position, and we'll "continue" in our iteration, or we will early
556  // exit if only one of the rows has a deeply nested NULL
557  if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
558  (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
559  ++element_index;
560  }
561  if (l_def_level == r_def_level) { continue; }
562  // We require [] < [NULL] < [leaf] for nested nulls.
563  // The null_precedence only affects top level nulls.
564  return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
565  : cuda::std::pair(weak_ordering::GREATER, _depth);
566  }
567 
568  // finally, compare leaf to leaf
570  int last_null_depth = _depth;
571  cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
572  lcol.type(), comparator, element_index, element_index);
573  if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); }
574  ++element_index;
575  }
576 
577  // If we have reached this stage, we know that definition levels,
578  // repetition levels, and actual elements are identical in both list
579  // columns up to the `min(l_end - l_start, r_end - r_start)` element of
580  // the Dremel encoding. However, two lists can only compare equivalent if
581  // they are of the same length. Otherwise, the shorter of the two is less
582  // than the longer. This final check determines the appropriate resulting
583  // ordering by checking how many total elements each list is composed of.
584  return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
585  }
586 
587  private:
588  column_device_view const _lhs;
589  column_device_view const _rhs;
590  Nullate const _check_nulls;
591  null_order const _null_precedence;
592  int const _depth;
593  optional_dremel_view _l_dremel_device_view;
594  optional_dremel_view _r_dremel_device_view;
595  PhysicalElementComparator const _comparator;
596  };
597 
598  public:
608  __device__ constexpr weak_ordering operator()(size_type const lhs_index,
609  size_type const rhs_index) const noexcept
610  {
611  int last_null_depth = std::numeric_limits<int>::max();
612  size_type list_column_index{-1};
613  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
614  if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
615 
616  int const depth = _depth.has_value() ? (*_depth)[i] : 0;
617  if (depth > last_null_depth) { continue; }
618 
619  bool const ascending =
620  _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true;
621 
622  null_order const null_precedence =
623  _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
624 
625  // TODO: At what point do we verify that the columns of lhs and rhs are
626  // all of the same types? I assume that it's already happened before
627  // here, otherwise the current code would be failing.
628  auto const [l_dremel_i, r_dremel_i] =
629  _lhs.column(i).type().id() == type_id::LIST
630  ? std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
631  optional_dremel_view(_r_dremel[list_column_index]))
632  : std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
633 
634  auto element_comp = element_comparator{_check_nulls,
635  _lhs.column(i),
636  _rhs.column(i),
637  null_precedence,
638  depth,
639  _comparator,
640  l_dremel_i,
641  r_dremel_i};
642 
643  weak_ordering state;
644  cuda::std::tie(state, last_null_depth) =
645  cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index);
646 
647  if (state == weak_ordering::EQUIVALENT) { continue; }
648 
649  return ascending
650  ? state
652  }
654  }
655 
656  private:
657  table_device_view const _lhs;
658  table_device_view const _rhs;
661  Nullate const _check_nulls;
662  std::optional<device_span<int const>> const _depth;
663  std::optional<device_span<order const>> const _column_order;
664  std::optional<device_span<null_order const>> const _null_precedence;
665  PhysicalElementComparator const _comparator;
666 }; // class device_row_comparator
667 
678 template <typename Comparator, weak_ordering... values>
679 struct weak_ordering_comparator_impl {
680  static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
681  "weak_ordering_comparator should not be used for pure equality comparisons. The "
682  "`row_equality_comparator` should be used instead");
683 
684  template <typename LhsType, typename RhsType>
685  __device__ constexpr bool operator()(LhsType const lhs_index,
686  RhsType const rhs_index) const noexcept
687  {
688  weak_ordering const result = comparator(lhs_index, rhs_index);
689  return ((result == values) || ...);
690  }
691  Comparator const comparator;
692 };
693 
700 template <typename Comparator>
701 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
707  less_comparator(Comparator const& comparator)
708  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS>{comparator}
709  {
710  }
711 };
712 
720 template <typename Comparator>
722  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
728  less_equivalent_comparator(Comparator const& comparator)
729  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT>{
730  comparator}
731  {
732  }
733 };
734 
742  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
743 
766  static std::shared_ptr<preprocessed_table> create(table_view const& table,
767  host_span<order const> column_order,
768  host_span<null_order const> null_precedence,
769  rmm::cuda_stream_view stream);
770 
792  static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>> create(
793  table_view const& lhs,
794  table_view const& rhs,
795  host_span<order const> column_order,
796  host_span<null_order const> null_precedence,
797  rmm::cuda_stream_view stream);
798 
799  private:
800  friend class self_comparator;
801  friend class two_table_comparator;
802 
822  static std::shared_ptr<preprocessed_table> create(
823  table_view const& preprocessed_input,
824  std::vector<int>&& verticalized_col_depths,
825  std::vector<std::unique_ptr<column>>&& transformed_columns,
826  host_span<order const> column_order,
827  host_span<null_order const> null_precedence,
828  bool has_ranked_children,
829  rmm::cuda_stream_view stream);
830 
858  rmm::device_uvector<order>&& column_order,
859  rmm::device_uvector<null_order>&& null_precedence,
861  std::vector<detail::dremel_data>&& dremel_data,
863  std::vector<std::unique_ptr<column>>&& transformed_columns,
864  bool has_ranked_children);
865 
867  rmm::device_uvector<order>&& column_order,
868  rmm::device_uvector<null_order>&& null_precedence,
870  std::vector<std::unique_ptr<column>>&& transformed_columns,
871  bool has_ranked_children);
872 
878  operator table_device_view() { return *_t; }
879 
886  [[nodiscard]] std::optional<device_span<order const>> column_order() const
887  {
888  return _column_order.size() ? std::optional<device_span<order const>>(_column_order)
889  : std::nullopt;
890  }
891 
899  [[nodiscard]] std::optional<device_span<null_order const>> null_precedence() const
900  {
901  return _null_precedence.size() ? std::optional<device_span<null_order const>>(_null_precedence)
902  : std::nullopt;
903  }
904 
913  [[nodiscard]] std::optional<device_span<int const>> depths() const
914  {
915  return _depths.size() ? std::optional<device_span<int const>>(_depths) : std::nullopt;
916  }
917 
918  [[nodiscard]] device_span<detail::dremel_device_view const> dremel_device_views() const
919  {
920  if (_dremel_device_views.has_value()) {
921  return device_span<detail::dremel_device_view const>(*_dremel_device_views);
922  } else {
923  return {};
924  }
925  }
926 
927  template <typename PhysicalElementComparator>
928  void check_physical_element_comparator()
929  {
930  if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
931  CUDF_EXPECTS(!_has_ranked_children,
932  "The input table has nested type children and they were transformed using a "
933  "different type of physical element comparator.");
934  }
935  }
936 
937  private:
938  table_device_view_owner const _t;
939  rmm::device_uvector<order> const _column_order;
940  rmm::device_uvector<null_order> const _null_precedence;
941  rmm::device_uvector<size_type> const _depths;
942 
943  // Dremel encoding of list columns used for the comparison algorithm
944  std::optional<std::vector<detail::dremel_data>> _dremel_data;
945  std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
946 
947  // Intermediate columns generated from transforming nested children columns into
948  // integers columns using `cudf::rank()`, need to be kept alive.
949  std::vector<std::unique_ptr<column>> _transformed_columns;
950 
951  // Flag to record if the input table was preprocessed to transform any nested children column(s)
952  // into integer column(s) using `cudf::rank`.
953  bool const _has_ranked_children;
954 };
955 
970  public:
986  host_span<order const> column_order = {},
987  host_span<null_order const> null_precedence = {},
989  : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
990  {
991  }
992 
1002  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1003 
1032  template <bool has_nested_columns,
1033  typename Nullate,
1034  typename PhysicalElementComparator = sorting_physical_element_comparator>
1035  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1036  {
1037  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1038 
1039  return less_comparator{
1040  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1041  nullate,
1042  *d_t,
1043  *d_t,
1044  d_t->dremel_device_views(),
1045  d_t->dremel_device_views(),
1046  d_t->depths(),
1047  d_t->column_order(),
1048  d_t->null_precedence(),
1049  comparator}};
1050  }
1051 
1053  template <bool has_nested_columns,
1054  typename Nullate,
1055  typename PhysicalElementComparator = sorting_physical_element_comparator>
1056  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1057  {
1058  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1059 
1060  return less_equivalent_comparator{
1061  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1062  nullate,
1063  *d_t,
1064  *d_t,
1065  d_t->dremel_device_views(),
1066  d_t->dremel_device_views(),
1067  d_t->depths(),
1068  d_t->column_order(),
1069  d_t->null_precedence(),
1070  comparator}};
1071  }
1072 
1073  private:
1074  std::shared_ptr<preprocessed_table> d_t;
1075 };
1076 
1077 // @cond
1078 template <typename Comparator>
1079 struct strong_index_comparator_adapter {
1080  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1081 
1082  __device__ constexpr weak_ordering operator()(lhs_index_type const lhs_index,
1083  rhs_index_type const rhs_index) const noexcept
1084  {
1085  return comparator(static_cast<cudf::size_type>(lhs_index),
1086  static_cast<cudf::size_type>(rhs_index));
1087  }
1088 
1089  __device__ constexpr weak_ordering operator()(rhs_index_type const rhs_index,
1090  lhs_index_type const lhs_index) const noexcept
1091  {
1092  auto const left_right_ordering =
1093  comparator(static_cast<cudf::size_type>(lhs_index), static_cast<cudf::size_type>(rhs_index));
1094 
1095  // Invert less/greater values to reflect right to left ordering
1096  if (left_right_ordering == weak_ordering::LESS) {
1097  return weak_ordering::GREATER;
1098  } else if (left_right_ordering == weak_ordering::GREATER) {
1099  return weak_ordering::LESS;
1100  }
1102  }
1103 
1104  Comparator const comparator;
1105 };
1106 // @endcond
1107 
1123  public:
1143  table_view const& right,
1144  host_span<order const> column_order = {},
1145  host_span<null_order const> null_precedence = {},
1147 
1162  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1163  std::shared_ptr<preprocessed_table> right)
1164  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1165  {
1166  }
1167 
1204  template <bool has_nested_columns,
1205  typename Nullate,
1206  typename PhysicalElementComparator = sorting_physical_element_comparator>
1207  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1208  {
1209  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1210  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1211 
1212  return less_comparator{strong_index_comparator_adapter{
1213  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1214  nullate,
1215  *d_left_table,
1216  *d_right_table,
1217  d_left_table->dremel_device_views(),
1218  d_right_table->dremel_device_views(),
1219  d_left_table->depths(),
1220  d_left_table->column_order(),
1221  d_left_table->null_precedence(),
1222  comparator}}};
1223  }
1224 
1226  template <bool has_nested_columns,
1227  typename Nullate,
1228  typename PhysicalElementComparator = sorting_physical_element_comparator>
1229  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1230  {
1231  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1232  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1233 
1234  return less_equivalent_comparator{strong_index_comparator_adapter{
1235  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1236  nullate,
1237  *d_left_table,
1238  *d_right_table,
1239  d_left_table->dremel_device_views(),
1240  d_right_table->dremel_device_views(),
1241  d_left_table->depths(),
1242  d_left_table->column_order(),
1243  d_left_table->null_precedence(),
1244  comparator}}};
1245  }
1246 
1247  private:
1248  std::shared_ptr<preprocessed_table> d_left_table;
1249  std::shared_ptr<preprocessed_table> d_right_table;
1250 };
1251 
1252 } // namespace lexicographic
1253 
1254 namespace hash {
1255 class row_hasher;
1256 } // namespace hash
1257 
1258 namespace equality {
1259 
1275  template <typename Element>
1276  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1277  {
1278  return lhs == rhs;
1279  }
1280 };
1281 
1294  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
1295  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1296  {
1297  return lhs == rhs;
1298  }
1299 
1309  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
1310  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1311  {
1312  return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1313  }
1314 };
1315 
1337 template <bool has_nested_columns,
1338  typename Nullate,
1339  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1341  friend class self_comparator;
1342  friend class two_table_comparator;
1343 
1344  public:
1353  __device__ constexpr bool operator()(size_type const lhs_index,
1354  size_type const rhs_index) const noexcept
1355  {
1356  auto equal_elements = [=](column_device_view l, column_device_view r) {
1357  return cudf::type_dispatcher(
1358  l.type(),
1359  element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1360  lhs_index,
1361  rhs_index);
1362  };
1363 
1364  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
1365  }
1366 
1367  private:
1378  device_row_comparator(Nullate check_nulls,
1379  table_device_view lhs,
1380  table_device_view rhs,
1381  null_equality nulls_are_equal = null_equality::EQUAL,
1382  PhysicalEqualityComparator comparator = {}) noexcept
1383  : lhs{lhs},
1384  rhs{rhs},
1385  check_nulls{check_nulls},
1386  nulls_are_equal{nulls_are_equal},
1387  comparator{comparator}
1388  {
1389  }
1390 
1394  class element_comparator {
1395  public:
1408  __device__ element_comparator(Nullate check_nulls,
1409  column_device_view lhs,
1410  column_device_view rhs,
1411  null_equality nulls_are_equal = null_equality::EQUAL,
1412  PhysicalEqualityComparator comparator = {}) noexcept
1413  : lhs{lhs},
1414  rhs{rhs},
1415  check_nulls{check_nulls},
1416  nulls_are_equal{nulls_are_equal},
1417  comparator{comparator}
1418  {
1419  }
1420 
1429  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1430  __device__ bool operator()(size_type const lhs_element_index,
1431  size_type const rhs_element_index) const noexcept
1432  {
1433  if (check_nulls) {
1434  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1435  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1436  if (lhs_is_null and rhs_is_null) {
1437  return nulls_are_equal == null_equality::EQUAL;
1438  } else if (lhs_is_null != rhs_is_null) {
1439  return false;
1440  }
1441  }
1442 
1443  return comparator(lhs.element<Element>(lhs_element_index),
1444  rhs.element<Element>(rhs_element_index));
1445  }
1446 
1447  template <typename Element,
1448  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1449  (not has_nested_columns or not cudf::is_nested<Element>())),
1450  typename... Args>
1451  __device__ bool operator()(Args...)
1452  {
1453  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1454  }
1455 
1456  template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1457  __device__ bool operator()(size_type const lhs_element_index,
1458  size_type const rhs_element_index) const noexcept
1459  {
1460  column_device_view lcol = lhs.slice(lhs_element_index, 1);
1461  column_device_view rcol = rhs.slice(rhs_element_index, 1);
1462  while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1463  if (check_nulls) {
1464  auto lvalid = detail::make_validity_iterator<true>(lcol);
1465  auto rvalid = detail::make_validity_iterator<true>(rcol);
1466  if (nulls_are_equal == null_equality::UNEQUAL) {
1467  if (thrust::any_of(
1468  thrust::seq, lvalid, lvalid + lcol.size(), thrust::logical_not<bool>()) or
1469  thrust::any_of(
1470  thrust::seq, rvalid, rvalid + rcol.size(), thrust::logical_not<bool>())) {
1471  return false;
1472  }
1473  } else {
1474  if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1475  return false;
1476  }
1477  }
1478  }
1479  if (lcol.type().id() == type_id::STRUCT) {
1480  if (lcol.num_child_columns() == 0) { return true; }
1481  // Non-empty structs are assumed to be decomposed and contain only one child
1482  lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1483  rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1484  } else if (lcol.type().id() == type_id::LIST) {
1485  auto l_list_col = detail::lists_column_device_view(lcol);
1486  auto r_list_col = detail::lists_column_device_view(rcol);
1487 
1488  auto lsizes = make_list_size_iterator(l_list_col);
1489  auto rsizes = make_list_size_iterator(r_list_col);
1490  if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1491  return false;
1492  }
1493 
1494  lcol = l_list_col.get_sliced_child();
1495  rcol = r_list_col.get_sliced_child();
1496  if (lcol.size() != rcol.size()) { return false; }
1497  }
1498  }
1499 
1500  auto comp = column_comparator{
1501  element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1502  return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1503  }
1504 
1505  private:
1513  struct column_comparator {
1514  element_comparator const comp;
1515  size_type const size;
1516 
1522  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1523  __device__ bool operator()() const noexcept
1524  {
1525  return thrust::all_of(thrust::seq,
1526  thrust::make_counting_iterator(0),
1527  thrust::make_counting_iterator(0) + size,
1528  [=](auto i) { return comp.template operator()<Element>(i, i); });
1529  }
1530 
1531  template <typename Element,
1532  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1533  typename... Args>
1534  __device__ bool operator()(Args...) const noexcept
1535  {
1536  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1537  }
1538  };
1539 
1540  column_device_view const lhs;
1541  column_device_view const rhs;
1542  Nullate const check_nulls;
1543  null_equality const nulls_are_equal;
1544  PhysicalEqualityComparator const comparator;
1545  };
1546 
1547  table_device_view const lhs;
1548  table_device_view const rhs;
1549  Nullate const check_nulls;
1550  null_equality const nulls_are_equal;
1551  PhysicalEqualityComparator const comparator;
1552 };
1553 
1571  static std::shared_ptr<preprocessed_table> create(table_view const& table,
1572  rmm::cuda_stream_view stream);
1573 
1574  private:
1575  friend class self_comparator;
1576  friend class two_table_comparator;
1577  friend class hash::row_hasher;
1578 
1579  using table_device_view_owner =
1580  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
1581 
1582  preprocessed_table(table_device_view_owner&& table,
1583  std::vector<rmm::device_buffer>&& null_buffers,
1584  std::vector<std::unique_ptr<column>>&& tmp_columns)
1585  : _t(std::move(table)),
1586  _null_buffers(std::move(null_buffers)),
1587  _tmp_columns(std::move(tmp_columns))
1588  {
1589  }
1590 
1596  operator table_device_view() { return *_t; }
1597 
1598  table_device_view_owner _t;
1599  std::vector<rmm::device_buffer> _null_buffers;
1600  std::vector<std::unique_ptr<column>> _tmp_columns;
1601 };
1602 
1608  public:
1618  : d_t(preprocessed_table::create(t, stream))
1619  {
1620  }
1621 
1631  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1632 
1658  template <bool has_nested_columns,
1659  typename Nullate,
1660  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1661  auto equal_to(Nullate nullate = {},
1662  null_equality nulls_are_equal = null_equality::EQUAL,
1663  PhysicalEqualityComparator comparator = {}) const noexcept
1664  {
1665  return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1666  nullate, *d_t, *d_t, nulls_are_equal, comparator};
1667  }
1668 
1669  private:
1670  std::shared_ptr<preprocessed_table> d_t;
1671 };
1672 
1673 // @cond
1674 template <typename Comparator>
1675 struct strong_index_comparator_adapter {
1676  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1677 
1678  __device__ constexpr bool operator()(lhs_index_type const lhs_index,
1679  rhs_index_type const rhs_index) const noexcept
1680  {
1681  return comparator(static_cast<cudf::size_type>(lhs_index),
1682  static_cast<cudf::size_type>(rhs_index));
1683  }
1684 
1685  __device__ constexpr bool operator()(rhs_index_type const rhs_index,
1686  lhs_index_type const lhs_index) const noexcept
1687  {
1688  return this->operator()(lhs_index, rhs_index);
1689  }
1690 
1691  Comparator const comparator;
1692 };
1693 // @endcond
1694 
1709  public:
1723  table_view const& right,
1724  rmm::cuda_stream_view stream);
1725 
1736  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1737  std::shared_ptr<preprocessed_table> right)
1738  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1739  {
1740  }
1741 
1772  template <bool has_nested_columns,
1773  typename Nullate,
1774  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1775  auto equal_to(Nullate nullate = {},
1776  null_equality nulls_are_equal = null_equality::EQUAL,
1777  PhysicalEqualityComparator comparator = {}) const noexcept
1778  {
1779  return strong_index_comparator_adapter{
1780  device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1781  nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1782  }
1783 
1784  private:
1785  std::shared_ptr<preprocessed_table> d_left_table;
1786  std::shared_ptr<preprocessed_table> d_right_table;
1787 };
1788 
1789 } // namespace equality
1790 
1791 namespace hash {
1792 
1799 template <template <typename> class hash_function, typename Nullate>
1801  public:
1809  __device__ element_hasher(
1810  Nullate nulls,
1811  uint32_t seed = DEFAULT_HASH_SEED,
1812  hash_value_type null_hash = std::numeric_limits<hash_value_type>::max()) noexcept
1813  : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1814  {
1815  }
1816 
1825  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1826  __device__ hash_value_type operator()(column_device_view const& col,
1827  size_type row_index) const noexcept
1828  {
1829  if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
1830  return hash_function<T>{_seed}(col.element<T>(row_index));
1831  }
1832 
1841  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1842  __device__ hash_value_type operator()(column_device_view const& col,
1843  size_type row_index) const noexcept
1844  {
1845  CUDF_UNREACHABLE("Unsupported type in hash.");
1846  }
1847 
1848  Nullate _check_nulls;
1849  uint32_t _seed;
1851 };
1852 
1859 template <template <typename> class hash_function, typename Nullate>
1861  friend class row_hasher;
1862 
1863  public:
1870  __device__ auto operator()(size_type row_index) const noexcept
1871  {
1872  auto it = thrust::make_transform_iterator(_table.begin(), [=](auto const& column) {
1873  return cudf::type_dispatcher<dispatch_storage_type>(
1874  column.type(),
1875  element_hasher_adapter<hash_function>{_check_nulls, _seed},
1876  column,
1877  row_index);
1878  });
1879 
1880  // Hash each element and combine all the hash values together
1881  return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
1882  return cudf::hashing::detail::hash_combine(hash, h);
1883  });
1884  }
1885 
1886  private:
1894  template <template <typename> class hash_fn>
1895  class element_hasher_adapter {
1896  static constexpr hash_value_type NULL_HASH = std::numeric_limits<hash_value_type>::max();
1897  static constexpr hash_value_type NON_NULL_HASH = 0;
1898 
1899  public:
1900  __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1901  : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1902  {
1903  }
1904 
1905  template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1906  __device__ hash_value_type operator()(column_device_view const& col,
1907  size_type row_index) const noexcept
1908  {
1909  return _element_hasher.template operator()<T>(col, row_index);
1910  }
1911 
1912  template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1913  __device__ hash_value_type operator()(column_device_view const& col,
1914  size_type row_index) const noexcept
1915  {
1916  auto hash = hash_value_type{0};
1917  column_device_view curr_col = col.slice(row_index, 1);
1918  while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1919  if (_check_nulls) {
1920  auto validity_it = detail::make_validity_iterator<true>(curr_col);
1921  hash = detail::accumulate(
1922  validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
1923  return cudf::hashing::detail::hash_combine(hash,
1924  is_valid ? NON_NULL_HASH : NULL_HASH);
1925  });
1926  }
1927  if (curr_col.type().id() == type_id::STRUCT) {
1928  if (curr_col.num_child_columns() == 0) { return hash; }
1929  // Non-empty structs are assumed to be decomposed and contain only one child
1930  curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
1931  } else if (curr_col.type().id() == type_id::LIST) {
1932  auto list_col = detail::lists_column_device_view(curr_col);
1933  auto list_sizes = make_list_size_iterator(list_col);
1934  hash = detail::accumulate(
1935  list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
1936  return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
1937  });
1938  curr_col = list_col.get_sliced_child();
1939  }
1940  }
1941  for (int i = 0; i < curr_col.size(); ++i) {
1942  hash = cudf::hashing::detail::hash_combine(
1943  hash,
1944  type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
1945  }
1946  return hash;
1947  }
1948 
1949  element_hasher<hash_fn, Nullate> const _element_hasher;
1950  Nullate const _check_nulls;
1951  };
1952 
1953  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
1954  table_device_view t,
1955  uint32_t seed = DEFAULT_HASH_SEED) noexcept
1956  : _check_nulls{check_nulls}, _table{t}, _seed(seed)
1957  {
1958  }
1959 
1960  Nullate const _check_nulls;
1961  table_device_view const _table;
1962  uint32_t const _seed;
1963 };
1964 
1965 // Inject row::equality::preprocessed_table into the row::hash namespace
1966 // As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
1967 // type and are interchangeable.
1968 using preprocessed_table = row::equality::preprocessed_table;
1969 
1974 class row_hasher {
1975  public:
1984  : d_t(preprocessed_table::create(t, stream))
1985  {
1986  }
1987 
1997  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1998 
2011  template <template <typename> class hash_function = cudf::hashing::detail::default_hash,
2012  template <template <typename> class, typename>
2013  class DeviceRowHasher = device_row_hasher,
2014  typename Nullate>
2015  DeviceRowHasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
2016  uint32_t seed = DEFAULT_HASH_SEED) const
2017  {
2018  return DeviceRowHasher<hash_function, Nullate>(nullate, *d_t, seed);
2019  }
2020 
2021  private:
2022  std::shared_ptr<preprocessed_table> d_t;
2023 };
2024 
2025 } // namespace hash
2026 
2027 } // namespace row
2028 
2029 } // namespace experimental
2030 } // namespace cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
Definition: column.hpp:48
constexpr type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:285
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
ColumnDeviceView const & column(size_type column_index) const noexcept
Returns a reference to the view of the specified column.
ColumnDeviceView * begin() const noexcept
Returns an iterator to the first view in the table.
size_type num_columns() const noexcept
Returns the number of columns.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, std::optional< device_span< order const >> column_order=std::nullopt, std::optional< device_span< null_order const >> null_precedence=std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, std::optional< device_span< int const >> depth=std::nullopt, std::optional< device_span< order const >> column_order=std::nullopt, std::optional< device_span< null_order const >> null_precedence=std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
static auto create(table_view source_view, rmm::cuda_stream_view stream=cudf::get_default_stream())
Factory to construct a table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
A set of cudf::column's of the same size.
Definition: table.hpp:40
std::size_t size() const noexcept
Column device view class definitions.
std::unique_ptr< column > hash(table_view const &input, hash_id hash_function=hash_id::HASH_MURMUR3, uint32_t seed=DEFAULT_HASH_SEED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Computes the hash value of each row in the input set of columns.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:35
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:176
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:157
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:149
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:50
@ BEFORE
NULL values ordered before all other values.
@ EQUAL
nulls compare equal
@ UNEQUAL
nulls compare unequal
@ ASCENDING
Elements ordered from small to large.
@ LIST
List elements.
@ STRUCT
Struct elements.
cuDF interfaces
Definition: aggregation.hpp:34
weak_ordering
Result type of the element_relational_comparator function object.
@ LESS
Indicates a is less than (ordered before) b
@ EQUIVALENT
Indicates a is ordered neither before nor after b
@ GREATER
Indicates a is greater than (ordered after) b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:291
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
friend class thrust::iterator_core_access
Allow access to the base class.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Definition: span.hpp:224
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32