From b2d9f259f046eeace163091c26472ddc4a4bd867 Mon Sep 17 00:00:00 2001 From: Yan Feng Date: Tue, 14 Jan 2025 17:38:02 +0800 Subject: [PATCH] Add support for Bin Signed-off-by: Yan Feng --- .../advanced_configs.md | 1 + docs/supported_ops.md | 51 +++++++++++++++++++ .../src/main/python/arithmetic_ops_test.py | 7 ++- .../nvidia/spark/rapids/GpuOverrides.scala | 8 ++- .../spark/sql/rapids/mathExpressions.scala | 14 ++++- tools/generated_files/320/operatorsScore.csv | 1 + tools/generated_files/320/supportedExprs.csv | 2 + tools/generated_files/330/operatorsScore.csv | 1 + tools/generated_files/330/supportedExprs.csv | 2 + tools/generated_files/operatorsScore.csv | 1 + tools/generated_files/supportedExprs.csv | 2 + 11 files changed, 87 insertions(+), 3 deletions(-) diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md index 5519e56b419..33d753c6b11 100644 --- a/docs/additional-functionality/advanced_configs.md +++ b/docs/additional-functionality/advanced_configs.md @@ -210,6 +210,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.Atanh|`atanh`|Inverse hyperbolic tangent|true|None| spark.rapids.sql.expression.AttributeReference| |References an input column|true|None| spark.rapids.sql.expression.BRound|`bround`|Round an expression to d decimal places using HALF_EVEN rounding mode|true|None| +spark.rapids.sql.expression.Bin|`bin`|Returns the string representation of the long value `expr` represented in binary|true|None| spark.rapids.sql.expression.BitLength|`bit_length`|The bit length of string data|true|None| spark.rapids.sql.expression.BitwiseAnd|`&`|Returns the bitwise AND of the operands|true|None| spark.rapids.sql.expression.BitwiseNot|`~`|Returns the bitwise NOT of the operands|true|None| diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 2bc819b4a41..e82d959e2d1 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -4050,6 +4050,57 @@ are limited. +Bin +`bin` +Returns the string representation of the long value `expr` represented in binary +None +project +input + + + + +S + + + + + + + + + + + + + + + + + +result + + + + + + + + + +S + + + + + + + + + + + + BitLength `bit_length` The bit length of string data diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py index d7fd941b97b..f7ac1cf4db8 100644 --- a/integration_tests/src/main/python/arithmetic_ops_test.py +++ b/integration_tests/src/main/python/arithmetic_ops_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -844,6 +844,11 @@ def test_degrees_small(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr('degrees(a)')) +@pytest.mark.parametrize('data_gen', [long_gen], ids=idfn) +def test_bin_long(data_gen): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, data_gen).selectExpr('bin(a)')) + @approximate_float @pytest.mark.parametrize('data_gen', double_gens, ids=idfn) def test_cos(data_gen): diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index c23728ba43a..04165752ea7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -983,6 +983,12 @@ object GpuOverrides extends Logging { (a, conf, p, r) => new UnaryExprMeta[ToRadians](a, conf, p, r) { override def convertToGpu(child: Expression): GpuToRadians = GpuToRadians(child) }), + expr[Bin]( + "Returns the string representation of the long value `expr` represented in binary", + ExprChecks.unaryProject(TypeSig.STRING, TypeSig.STRING, TypeSig.LONG, TypeSig.LONG), + (a, conf, p, r) => new UnaryExprMeta[Bin](a, conf, p, r) { + override def convertToGpu(child: Expression): GpuBin = GpuBin(child) + }), expr[WindowExpression]( "Calculates a return value for every input row of a table based on a group (or " + "\"window\") of rows", diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala index 24134d74add..afccd0dd8ca 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import ai.rapids.cudf._ import ai.rapids.cudf.ast.BinaryOperator import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} +import com.nvidia.spark.rapids.jni.CastStrings import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes} import org.apache.spark.sql.rapids.shims.RapidsErrorUtils @@ -68,6 +69,17 @@ case class GpuToRadians(child: Expression) extends GpuUnaryMathExpression("RADIA } } +case class GpuBin(child: Expression) extends GpuUnaryExpression + with ImplicitCastInputTypes with Serializable { + override def nullable: Boolean = true + override def inputTypes: Seq[AbstractDataType] = Seq(LongType) + override def dataType: DataType = StringType + + override def doColumnar(input: GpuColumnVector): ColumnVector = { + CastStrings.fromLongToBinary(input.getBase) + } +} + case class GpuAcoshImproved(child: Expression) extends CudfUnaryMathExpression("ACOSH") { override def unaryOp: UnaryOp = UnaryOp.ARCCOSH } diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv index d8c4ca63adc..31f37bb5168 100644 --- a/tools/generated_files/320/operatorsScore.csv +++ b/tools/generated_files/320/operatorsScore.csv @@ -72,6 +72,7 @@ Atanh,4 AttributeReference,4 Average,4 BRound,4 +Bin,4 BitLength,4 BitwiseAnd,4 BitwiseNot,4 diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv index a9b6720be13..2eca274ce5b 100644 --- a/tools/generated_files/320/supportedExprs.csv +++ b/tools/generated_files/320/supportedExprs.csv @@ -92,6 +92,8 @@ AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Bin,S,`bin`,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Bin,S,`bin`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv index e86e30e606c..c06190e875b 100644 --- a/tools/generated_files/330/operatorsScore.csv +++ b/tools/generated_files/330/operatorsScore.csv @@ -73,6 +73,7 @@ Atanh,4 AttributeReference,4 Average,4 BRound,4 +Bin,4 BitLength,4 BitwiseAnd,4 BitwiseNot,4 diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv index aa744638220..551f50d0888 100644 --- a/tools/generated_files/330/supportedExprs.csv +++ b/tools/generated_files/330/supportedExprs.csv @@ -92,6 +92,8 @@ AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Bin,S,`bin`,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Bin,S,`bin`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/operatorsScore.csv b/tools/generated_files/operatorsScore.csv index d8c4ca63adc..31f37bb5168 100644 --- a/tools/generated_files/operatorsScore.csv +++ b/tools/generated_files/operatorsScore.csv @@ -72,6 +72,7 @@ Atanh,4 AttributeReference,4 Average,4 BRound,4 +Bin,4 BitLength,4 BitwiseAnd,4 BitwiseNot,4 diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv index a9b6720be13..2eca274ce5b 100644 --- a/tools/generated_files/supportedExprs.csv +++ b/tools/generated_files/supportedExprs.csv @@ -92,6 +92,8 @@ AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Bin,S,`bin`,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Bin,S,`bin`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA