NVIDIA · ustcfy · Jan 13, 2025 · Jan 13, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
@@ -211,6 +211,7 @@ add_library(
   src/case_when.cu
   src/cast_decimal_to_string.cu
   src/cast_float_to_string.cu
+  src/cast_long_to_binary_string.cu
   src/cast_string.cu
   src/cast_string_to_float.cu
   src/datetime_rebase.cu

diff --git a/src/main/cpp/src/CastStringJni.cpp b/src/main/cpp/src/CastStringJni.cpp
@@ -156,6 +156,21 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal
   CATCH_CAST_EXCEPTION(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromLongToBinary(
+  JNIEnv* env, jclass, jlong input_column)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+
+    auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::release_as_jlong(
+      spark_rapids_jni::long_to_binary_string(cv, cudf::get_default_stream()));
+  }
+  CATCH_CAST_EXCEPTION(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toIntegersWithBase(
   JNIEnv* env, jclass, jlong input_column, jint base, jboolean ansi_enabled, jint j_dtype)
 {

diff --git a/src/main/cpp/src/cast_long_to_binary_string.cu b/src/main/cpp/src/cast_long_to_binary_string.cu
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cast_string.hpp"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+namespace spark_rapids_jni {
+
+namespace detail {
+namespace {
+
+template <typename LongType>
+struct long_to_binary_string_fn {
+  cudf::column_device_view d_longs;
+  cudf::size_type* d_sizes;
+  char* d_chars;
+  cudf::detail::input_offsetalator d_offsets;
+
+  __device__ cudf::size_type compute_output_size(LongType value)
+  {
+    auto const size = 64 - __clzll(value);
+    // If the value is 0, the size should be 1
+    return size > 0 ? size : 1;
+  }
+
+  __device__ void long_to_binary_string(cudf::size_type idx)
+  {
+    auto const value = d_longs.element<LongType>(idx);
+    char* d_buffer   = d_chars + d_offsets[idx];
+    for (auto i = d_sizes[idx] - 1; i >= 0; --i) {
+      *d_buffer++ = value & (1LL << i) ? '1' : '0';
+    }
+  }
+
+  __device__ void operator()(cudf::size_type idx)
+  {
+    if (d_longs.is_null(idx)) {
+      if (d_chars == nullptr) { d_sizes[idx] = 0; }
+      return;
+    }
+    if (d_chars != nullptr) {
+      long_to_binary_string(idx);
+    } else {
+      d_sizes[idx] = compute_output_size(d_longs.element<LongType>(idx));
+    }
+  }
+};
+
+struct dispatch_long_to_binary_string_fn {
+  template <typename LongType, CUDF_ENABLE_IF(std::is_same_v<LongType, std::int64_t>)>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr) const
+  {
+    auto const d_column = cudf::column_device_view::create(input, stream);
+
+    auto [offsets, chars] = cudf::strings::detail::make_strings_children(
+      long_to_binary_string_fn<LongType>{*d_column}, input.size(), stream, mr);
+
+    return cudf::make_strings_column(input.size(),
+                                     std::move(offsets),
+                                     chars.release(),
+                                     input.null_count(),
+                                     cudf::detail::copy_bitmask(input, stream, mr));
+  }
+
+  template <typename LongType, CUDF_ENABLE_IF(not std::is_same_v<LongType, std::int64_t>)>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
+                                           rmm::cuda_stream_view,
+                                           rmm::device_async_resource_ref) const
+  {
+    CUDF_FAIL("Values for long_to_binary_string function must be a long type.");
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<cudf::column> long_to_binary_string(cudf::column_view const& input,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::device_async_resource_ref mr)
+{
+  if (input.is_empty()) return cudf::make_empty_column(cudf::type_id::STRING);
+  return type_dispatcher(input.type(), dispatch_long_to_binary_string_fn{}, input, stream, mr);
+}
+
+}  // namespace detail
+
+// external API
+std::unique_ptr<cudf::column> long_to_binary_string(cudf::column_view const& input,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::long_to_binary_string(input, stream, mr);
+}
+
+}  // namespace spark_rapids_jni
diff --git a/src/main/cpp/src/cast_string.hpp b/src/main/cpp/src/cast_string.hpp
@@ -133,4 +133,9 @@ std::unique_ptr<cudf::column> decimal_to_non_ansi_string(
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
+std::unique_ptr<cudf::column> long_to_binary_string(
+  cudf::column_view const& input,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
 }  // namespace spark_rapids_jni
diff --git a/src/main/cpp/tests/CMakeLists.txt b/src/main/cpp/tests/CMakeLists.txt
@@ -58,6 +58,9 @@ ConfigureTest(FORMAT_FLOAT
 ConfigureTest(CAST_FLOAT_TO_STRING
     cast_float_to_string.cpp)
 
+ConfigureTest(CAST_LONG_TO_BINARY_STRING
+    cast_long_to_binary_string.cpp)
+
 ConfigureTest(DATETIME_REBASE
     datetime_rebase.cpp)
 

diff --git a/src/main/cpp/tests/cast_long_to_binary_string.cpp b/src/main/cpp/tests/cast_long_to_binary_string.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <cast_string.hpp>
+
+#include <limits>
+
+using namespace cudf;
+
+constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR};
+
+struct LongToBinaryStringTests : public cudf::test::BaseFixture {};
+
+TEST_F(LongToBinaryStringTests, FromLongToBinary)
+{
+  auto const longs = cudf::test::fixed_width_column_wrapper<int64_t>{0L, 1L, 10L, -0L, -1L};
+
+  auto results = spark_rapids_jni::long_to_binary_string(longs, cudf::get_default_stream());
+
+  auto const expected = cudf::test::strings_column_wrapper{
+    "0", "1", "1010", "0", "1111111111111111111111111111111111111111111111111111111111111111"};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
+}
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java b/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java
@@ -111,6 +111,10 @@ public static ColumnVector fromDecimal(ColumnView cv) {
     return new ColumnVector(fromDecimal(cv.getNativeView()));
   }
 
+  public static ColumnVector fromLongToBinary(ColumnView cv) {
+    return new ColumnVector(fromLongToBinary(cv.getNativeView()));
+  }
+
   /**
    * Convert a string column to a given floating-point type column.
    *
@@ -160,6 +164,7 @@ private static native long toDecimal(long nativeColumnView, boolean ansi_enabled
   private static native long fromDecimal(long nativeColumnView);
   private static native long fromFloatWithFormat(long nativeColumnView, int digits);
   private static native long fromFloat(long nativeColumnView);
+  private static native long fromLongToBinary(long nativeColumnView);
   private static native long toIntegersWithBase(long nativeColumnView, int base,
     boolean ansiEnabled, int dtype);
   private static native long fromIntegersWithBase(long nativeColumnView, int base);