Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into merge-c2c
Browse files Browse the repository at this point in the history
  • Loading branch information
Chong Gao committed Jan 14, 2025
2 parents d718d8c + 60b6806 commit 4e79c2b
Show file tree
Hide file tree
Showing 474 changed files with 9,654 additions and 10,946 deletions.
14 changes: 8 additions & 6 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -9,8 +9,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.


/jenkins/ @jlowe @revans2 @tgravescs @GaryShen2008 @NvTimLiu @gerashegalov
pom.xml @jlowe @revans2 @tgravescs @GaryShen2008 @NvTimLiu @gerashegalov
/dist/ @jlowe @revans2 @tgravescs @GaryShen2008 @NvTimLiu @gerashegalov
/.github/ @jlowe @revans2 @tgravescs @GaryShen2008 @NvTimLiu @gerashegalov
# Build-related
/jenkins/ @NVIDIA/sparkrapids-cicd-codeowners
pom.xml @NVIDIA/sparkrapids-cicd-codeowners
/dist/ @NVIDIA/sparkrapids-cicd-codeowners
/.github/ @NVIDIA/sparkrapids-cicd-codeowners
/build/ @NVIDIA/sparkrapids-cicd-codeowners
/scripts/ @NVIDIA/sparkrapids-cicd-codeowners
5 changes: 2 additions & 3 deletions .github/workflows/blossom-ci.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -74,7 +74,6 @@ jobs:
github.actor == 'binmahone' ||
github.actor == 'zpuller' ||
github.actor == 'pxLi' ||
github.actor == 'Feng-Jiang28' ||
github.actor == 'SurajAralihalli' ||
github.actor == 'jihoonson' ||
github.actor == 'ustcfy' ||
Expand All @@ -91,7 +90,7 @@ jobs:
Vulnerability-scan:
name: Vulnerability scan
needs: [Authorization]
runs-on: ubuntu-latest
runs-on: vulnerability-scan
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/mvn-verify-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,15 @@ jobs:
echo "dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT
- name: Cache local Maven repository
id: cache
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ env.dailyCacheKey }}
restore-keys: ${{ runner.os }}-maven-
- name: populate-daily-cache
timeout-minutes: 30
continue-on-error: true
if: steps.cache.outputs.cache-hit != 'true'
env:
SCALA_VER: '2.12'
Expand Down Expand Up @@ -118,6 +121,7 @@ jobs:
java-version: 8

- name: Cache local Maven repository
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
Expand Down Expand Up @@ -171,12 +175,15 @@ jobs:
echo "scala213dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT
- name: Cache local Maven repository
id: cache
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ env.scala213dailyCacheKey }}
restore-keys: ${{ runner.os }}-maven-
- name: populate-daily-cache
timeout-minutes: 30
continue-on-error: true
if: steps.cache.outputs.cache-hit != 'true'
env:
SCALA_VER: '2.13'
Expand Down Expand Up @@ -223,6 +230,7 @@ jobs:
java-version: 17

- name: Cache local Maven repository
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
Expand Down Expand Up @@ -278,6 +286,7 @@ jobs:
java-version: 17

- name: Cache local Maven repository
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
Expand Down Expand Up @@ -332,6 +341,7 @@ jobs:
java-version: ${{ matrix.java-version }}

- name: Cache local Maven repository
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
Expand Down Expand Up @@ -379,6 +389,7 @@ jobs:
java-version: 11

- name: Cache local Maven repository
continue-on-error: true
uses: actions/cache@v4
with:
path: ~/.m2
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ scalastyle-output.xml
scalastyle.txt
target/
cufile.log
cudf_log.txt
build/*.class
399 changes: 187 additions & 212 deletions CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
RAPIDS plugin for Apache Spark
Copyright (c) 2019-2024, NVIDIA CORPORATION
Copyright (c) 2019-2025, NVIDIA CORPORATION

--------------------------------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion NOTICE-binary
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
RAPIDS plugin for Apache Spark
Copyright (c) 2019-2024, NVIDIA CORPORATION
Copyright (c) 2019-2025, NVIDIA CORPORATION

// ------------------------------------------------------------------
// NOTICE file corresponding to the section 4d of The Apache License,
Expand Down
2 changes: 1 addition & 1 deletion aggregator/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2021-2024, NVIDIA CORPORATION.
Copyright (c) 2021-2025, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion api_validation/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2020-2024, NVIDIA CORPORATION.
Copyright (c) 2020-2025, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion datagen/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2023-2024 NVIDIA CORPORATION.
Copyright (c) 2023-2025 NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,6 +41,7 @@
{"spark": "351"}
{"spark": "352"}
{"spark": "353"}
{"spark": "354"}
spark-rapids-shim-json-lines ***/
package org.apache.spark.sql.tests.datagen

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,8 +63,7 @@ abstract class GpuDeltaParquetFileFormatBase extends GpuReadParquetFileFormat {
pushedFilters,
fileScan.rapidsConf,
fileScan.allMetrics,
fileScan.queryUsesInputFile,
fileScan.alluxioPathsMap)
fileScan.queryUsesInputFile)
}

override def buildReaderWithPartitionValuesAndMetrics(
Expand All @@ -75,8 +74,7 @@ abstract class GpuDeltaParquetFileFormatBase extends GpuReadParquetFileFormat {
filters: Seq[Filter],
options: Map[String, String],
hadoopConf: Configuration,
metrics: Map[String, GpuMetric],
alluxioPathReplacementMap: Option[Map[String, String]])
metrics: Map[String, GpuMetric])
: PartitionedFile => Iterator[InternalRow] = {
super.buildReaderWithPartitionValuesAndMetrics(
sparkSession,
Expand All @@ -86,8 +84,7 @@ abstract class GpuDeltaParquetFileFormatBase extends GpuReadParquetFileFormat {
filters,
options,
hadoopConf,
metrics,
alluxioPathReplacementMap)
metrics)
}

override def supportFieldName(name: String): Boolean = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,8 +63,7 @@ trait GpuDeltaParquetFileFormat extends GpuReadParquetFileFormat {
pushedFilters,
fileScan.rapidsConf,
fileScan.allMetrics,
fileScan.queryUsesInputFile,
fileScan.alluxioPathsMap)
fileScan.queryUsesInputFile)
}

override def buildReaderWithPartitionValuesAndMetrics(
Expand All @@ -75,8 +74,7 @@ trait GpuDeltaParquetFileFormat extends GpuReadParquetFileFormat {
filters: Seq[Filter],
options: Map[String, String],
hadoopConf: Configuration,
metrics: Map[String, GpuMetric],
alluxioPathReplacementMap: Option[Map[String, String]])
metrics: Map[String, GpuMetric])
: PartitionedFile => Iterator[InternalRow] = {
super.buildReaderWithPartitionValuesAndMetrics(
sparkSession,
Expand All @@ -86,8 +84,7 @@ trait GpuDeltaParquetFileFormat extends GpuReadParquetFileFormat {
filters,
options,
hadoopConf,
metrics,
alluxioPathReplacementMap)
metrics)
}

override def supportFieldName(name: String): Boolean = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy, UnaryExecNode}
import org.apache.spark.sql.execution.datasources.v2.V2CommandExec
import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.rapids.GpuWriteJobStatsTracker
import org.apache.spark.sql.vectorized.ColumnarBatch

Expand Down Expand Up @@ -106,11 +105,11 @@ case class GpuRapidsDeltaWriteExec(child: SparkPlan) extends V2CommandExec
with UnaryExecNode with GpuExec {
override def output: Seq[Attribute] = child.output

lazy val basicMetrics: Map[String, SQLMetric] = GpuWriteJobStatsTracker.basicMetrics
lazy val taskMetrics: Map[String, SQLMetric] = GpuWriteJobStatsTracker.taskMetrics
lazy val basicMetrics: Map[String, GpuMetric] = GpuWriteJobStatsTracker.basicMetrics
lazy val taskMetrics: Map[String, GpuMetric] = GpuWriteJobStatsTracker.taskMetrics

override lazy val allMetrics: Map[String, GpuMetric] =
GpuMetric.wrap(basicMetrics ++ taskMetrics)
basicMetrics ++ taskMetrics

override def internalDoExecuteColumnar(): RDD[ColumnarBatch] = {
// This is just a stub node for planning purposes and does not actually perform
Expand Down
2 changes: 1 addition & 1 deletion delta-lake/delta-20x/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2023-2024 NVIDIA CORPORATION.
Copyright (c) 2023-2025 NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
Expand Down Expand Up @@ -179,8 +179,8 @@ class GpuOptimisticTransaction
val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
val basicWriteJobStatsTracker = new BasicColumnarWriteJobStatsTracker(
serializableHadoopConf,
BasicWriteJobStatsTracker.metrics)
registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics)
GpuMetric.wrap(BasicWriteJobStatsTracker.metrics))
registerSQLMetrics(spark, GpuMetric.unwrap(basicWriteJobStatsTracker.driverSideMetrics))
statsTrackers.append(basicWriteJobStatsTracker)
gpuRapidsWrite.foreach { grw =>
val tracker = new GpuWriteJobStatsTracker(serializableHadoopConf,
Expand Down Expand Up @@ -219,8 +219,9 @@ class GpuOptimisticTransaction
bucketSpec = None,
statsTrackers = optionalStatsTracker.toSeq ++ statsTrackers,
options = options,
rapidsConf.stableSort,
rapidsConf.concurrentWriterPartitionFlushSize)
useStableSort = rapidsConf.stableSort,
concurrentWriterPartitionFlushSize = rapidsConf.concurrentWriterPartitionFlushSize,
baseDebugOutputPath = rapidsConf.outputDebugDumpPrefix)
} catch {
case s: SparkException =>
// Pull an InvariantViolationException up to the top level if it was the root cause.
Expand Down
2 changes: 1 addition & 1 deletion delta-lake/delta-21x/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2023-2024 NVIDIA CORPORATION.
Copyright (c) 2023-2025 NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
Expand Down Expand Up @@ -179,8 +179,8 @@ class GpuOptimisticTransaction
val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
val basicWriteJobStatsTracker = new BasicColumnarWriteJobStatsTracker(
serializableHadoopConf,
BasicWriteJobStatsTracker.metrics)
registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics)
GpuMetric.wrap(BasicWriteJobStatsTracker.metrics))
registerSQLMetrics(spark, GpuMetric.unwrap(basicWriteJobStatsTracker.driverSideMetrics))
statsTrackers.append(basicWriteJobStatsTracker)
gpuRapidsWrite.foreach { grw =>
val tracker = new GpuWriteJobStatsTracker(serializableHadoopConf,
Expand Down Expand Up @@ -219,8 +219,9 @@ class GpuOptimisticTransaction
bucketSpec = None,
statsTrackers = optionalStatsTracker.toSeq ++ statsTrackers,
options = options,
rapidsConf.stableSort,
rapidsConf.concurrentWriterPartitionFlushSize)
useStableSort = rapidsConf.stableSort,
concurrentWriterPartitionFlushSize = rapidsConf.concurrentWriterPartitionFlushSize,
baseDebugOutputPath = rapidsConf.outputDebugDumpPrefix)
} catch {
case s: SparkException =>
// Pull an InvariantViolationException up to the top level if it was the root cause.
Expand Down
2 changes: 1 addition & 1 deletion delta-lake/delta-22x/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2023-2024 NVIDIA CORPORATION.
Copyright (c) 2023-2025 NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
* in the Delta Lake project at https://github.com/delta-io/delta.
Expand Down Expand Up @@ -201,8 +201,8 @@ class GpuOptimisticTransaction
val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
val basicWriteJobStatsTracker = new BasicColumnarWriteJobStatsTracker(
serializableHadoopConf,
BasicWriteJobStatsTracker.metrics)
registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics)
GpuMetric.wrap(BasicWriteJobStatsTracker.metrics))
registerSQLMetrics(spark, GpuMetric.unwrap(basicWriteJobStatsTracker.driverSideMetrics))
statsTrackers.append(basicWriteJobStatsTracker)
gpuRapidsWrite.foreach { grw =>
val tracker = new GpuWriteJobStatsTracker(serializableHadoopConf,
Expand Down Expand Up @@ -241,8 +241,9 @@ class GpuOptimisticTransaction
bucketSpec = None,
statsTrackers = optionalStatsTracker.toSeq ++ statsTrackers,
options = options,
rapidsConf.stableSort,
rapidsConf.concurrentWriterPartitionFlushSize)
useStableSort = rapidsConf.stableSort,
concurrentWriterPartitionFlushSize = rapidsConf.concurrentWriterPartitionFlushSize,
baseDebugOutputPath = rapidsConf.outputDebugDumpPrefix)
} catch {
case s: SparkException =>
// Pull an InvariantViolationException up to the top level if it was the root cause.
Expand Down
Loading

0 comments on commit 4e79c2b

Please sign in to comment.