libcudf  24.02.00
column_utilities.hpp
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/column/column.hpp>
21 #include <cudf/detail/utilities/vector_factories.hpp>
22 #include <cudf/null_mask.hpp>
24 #include <cudf/types.hpp>
26 #include <cudf/utilities/error.hpp>
27 
28 #include <thrust/host_vector.h>
29 #include <thrust/iterator/transform_iterator.h>
30 
31 namespace cudf::test {
32 
36 enum class debug_output_level {
37  FIRST_ERROR = 0, // print first error only
38  ALL_ERRORS, // print all errors
39  QUIET // no debug output
40 };
41 
42 constexpr size_type default_ulp = 4;
43 
44 namespace detail {
45 
58 bool expect_column_properties_equal(cudf::column_view const& lhs,
59  cudf::column_view const& rhs,
60  debug_output_level verbosity = debug_output_level::FIRST_ERROR);
61 
78 bool expect_column_properties_equivalent(
79  cudf::column_view const& lhs,
80  cudf::column_view const& rhs,
81  debug_output_level verbosity = debug_output_level::FIRST_ERROR);
82 
97 bool expect_columns_equal(cudf::column_view const& lhs,
98  cudf::column_view const& rhs,
99  debug_output_level verbosity = debug_output_level::FIRST_ERROR);
100 
118 bool expect_columns_equivalent(cudf::column_view const& lhs,
119  cudf::column_view const& rhs,
120  debug_output_level verbosity = debug_output_level::FIRST_ERROR,
121  size_type fp_ulps = cudf::test::default_ulp);
122 
132 void expect_equal_buffers(void const* lhs, void const* rhs, std::size_t size_bytes);
133 
134 } // namespace detail
135 
141 void expect_column_empty(cudf::column_view const& col);
142 
149 std::vector<bitmask_type> bitmask_to_host(cudf::column_view const& c);
150 
162 bool validate_host_masks(std::vector<bitmask_type> const& expected_mask,
163  std::vector<bitmask_type> const& got_mask_begin,
164  size_type number_of_elements);
165 
174 template <typename T, std::enable_if_t<not cudf::is_fixed_point<T>()>* = nullptr>
175 std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
176 {
177  thrust::host_vector<T> host_data(c.size());
178  CUDF_CUDA_TRY(cudaMemcpy(host_data.data(), c.data<T>(), c.size() * sizeof(T), cudaMemcpyDefault));
179  return {host_data, bitmask_to_host(c)};
180 }
181 
182 // This signature is identical to the above overload apart from SFINAE so
183 // doxygen sees it as a duplicate.
185 
196 template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
197 std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
198 {
199  using namespace numeric;
200  using Rep = typename T::rep;
201 
202  auto host_rep_types = thrust::host_vector<Rep>(c.size());
203 
205  cudaMemcpy(host_rep_types.data(), c.begin<Rep>(), c.size() * sizeof(Rep), cudaMemcpyDefault));
206 
207  auto to_fp = [&](Rep val) { return T{scaled_integer<Rep>{val, scale_type{c.type().scale()}}}; };
208  auto begin = thrust::make_transform_iterator(std::cbegin(host_rep_types), to_fp);
209  auto const host_fixed_points = thrust::host_vector<T>(begin, begin + c.size());
210 
211  return {host_fixed_points, bitmask_to_host(c)};
212 }
214 
225 template <>
226 inline std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to_host(column_view c)
227 {
228  thrust::host_vector<std::string> host_data(c.size());
229  auto stream = cudf::get_default_stream();
230  if (c.size() > c.null_count()) {
231  auto const scv = strings_column_view(c);
232  auto const h_chars = cudf::detail::make_std_vector_sync<char>(
233  cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
234  auto const h_offsets = cudf::detail::make_std_vector_sync(
235  cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
236  scv.size() + 1),
237  stream);
238 
239  // build std::string vector from chars and offsets
241  std::begin(h_offsets),
242  std::end(h_offsets) - 1,
243  std::begin(h_offsets) + 1,
244  host_data.begin(),
245  [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); });
246  }
247  return {std::move(host_data), bitmask_to_host(c)};
248 }
249 
250 } // namespace cudf::test
251 
252 // Macros for showing line of failure.
253 #define CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUAL(lhs, rhs) \
254  do { \
255  SCOPED_TRACE(" <-- line of failure\n"); \
256  cudf::test::detail::expect_column_properties_equal(lhs, rhs); \
257  } while (0)
258 
259 #define CUDF_TEST_EXPECT_COLUMN_PROPERTIES_EQUIVALENT(lhs, rhs) \
260  do { \
261  SCOPED_TRACE(" <-- line of failure\n"); \
262  cudf::test::detail::expect_column_properties_equivalent(lhs, rhs); \
263  } while (0)
264 
265 #define CUDF_TEST_EXPECT_COLUMNS_EQUAL(lhs, rhs...) \
266  do { \
267  SCOPED_TRACE(" <-- line of failure\n"); \
268  cudf::test::detail::expect_columns_equal(lhs, rhs); \
269  } while (0)
270 
271 #define CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(lhs, rhs...) \
272  do { \
273  SCOPED_TRACE(" <-- line of failure\n"); \
274  cudf::test::detail::expect_columns_equivalent(lhs, rhs); \
275  } while (0)
276 
277 #define CUDF_TEST_EXPECT_EQUAL_BUFFERS(lhs, rhs, size_bytes) \
278  do { \
279  SCOPED_TRACE(" <-- line of failure\n"); \
280  cudf::test::detail::expect_equal_buffers(lhs, rhs, size_bytes); \
281  } while (0)
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Class definition for cudf::column.
column view class definitions
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
scale_type
The scale type for fixed_point.
Definition: fixed_point.hpp:42
std::unique_ptr< column > transform(column_view const &input, std::string const &unary_udf, data_type output_type, bool is_ptx, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a new column by applying a unary function against every element of an input column.
#define CUDF_CUDA_TRY(call)
Error checking macro for CUDA runtime API functions.
Definition: error.hpp:262
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
fixed_point and supporting types
Definition: fixed_point.hpp:32
APIs for managing validity bitmasks.
Class definition for cudf::strings_column_view.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:291
Helper struct for constructing fixed_point when value is already shifted.
scale_type scale
The scale of the value.
Type declarations for libcudf.