jenkins/Jenkinsfile-blossom.premerge-databricks

#!/usr/local/env groovy
/*
 * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 *
 * Jenkinsfile for building rapids-plugin on Databricks cluster
 *
 */

@Library('blossom-lib')
@Library('blossom-github-lib@master')

import ipp.blossom.*

def githubHelper // blossom github helper
def IMAGE_DB = pod.getCPUYAML("${common.ARTIFACTORY_NAME}/sw-spark-docker/spark:rapids-databricks")

pipeline {
    agent {
        kubernetes {
            label "premerge-init-${BUILD_TAG}"
            cloud "${common.CLOUD_NAME}"
            yaml "${IMAGE_DB}"
        }
    }

    options {
        ansiColor('xterm')
        buildDiscarder(logRotator(numToKeepStr: '50'))
        skipDefaultCheckout true
        timeout(time: 12, unit: 'HOURS')
    }

    parameters {
        // Put a default value for REF to avoid error when running the pipeline manually
        string(name: 'REF', defaultValue: 'main',
            description: 'Merged commit of specific PR')
        string(name: 'GITHUB_DATA', defaultValue: '',
            description: 'Json-formatted github data from upstream blossom-ci')
        choice(name: 'TEST_MODE', choices: ['CI_PART1', 'CI_PART2'],
            description: 'Separate integration tests into 2 parts, and run each part in parallell')
    }

    environment {
        GITHUB_TOKEN = credentials("github-token")
        PVC = credentials("pvc")
        CUSTOM_WORKSPACE = "/home/jenkins/agent/workspace/${BUILD_TAG}"
        // DB related ENVs
        IDLE_TIMEOUT = '390' // 6.5 hours
        NUM_WORKERS = '0'
        DB_TYPE = getDbType()
        DATABRICKS_HOST = DbUtils.getHost("$DB_TYPE")
        DATABRICKS_TOKEN = credentials("${DbUtils.getToken("$DB_TYPE")}")
        DATABRICKS_PUBKEY = credentials("SPARK_DATABRICKS_PUBKEY")
        DATABRICKS_DRIVER = DbUtils.getDriver("$DB_TYPE")
        DATABRICKS_WORKER = DbUtils.getWorker("$DB_TYPE")
        INIT_SCRIPTS_DIR = "/databricks/init_scripts/${BUILD_TAG}"
    }

    stages {
        stage("Init githubHelper") {
            steps {
                script {
                    githubHelper = GithubHelper.getInstance("${GITHUB_TOKEN}", params.GITHUB_DATA)
                    // desc contains the PR ID and can be accessed from different builds
                    currentBuild.description = githubHelper.getBuildDescription()
                    checkoutCode(githubHelper.getCloneUrl(), githubHelper.getMergedSHA())
                }
            }
        } // end of Init githubHelper

        stage('Databricks') {
            failFast true // Abort running if one branch failed
            matrix {
                axes {
                    axis {
                        // 'name' and 'value' only supprt literal string in the declarative Jenkins
                        // Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
                        name 'DB_RUNTIME'
                        values '11.3', '12.2', '13.3'
                    }
                }
                stages {
                    stage('Build') {
                        agent {
                            kubernetes {
                                label "premerge-ci-${DB_RUNTIME}-${BUILD_NUMBER}"
                                cloud "${common.CLOUD_NAME}"
                                yaml "${IMAGE_DB}"
                                workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
                                customWorkspace "${CUSTOM_WORKSPACE}-${DB_RUNTIME}-${BUILD_NUMBER}"
                            }
                        }
                        environment {
                            DATABRICKS_RUNTIME = DbUtils.getRuntime("$DB_RUNTIME")
                            BASE_SPARK_VERSION = DbUtils.getSparkVer("$DB_RUNTIME")
                            BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS = DbUtils.getInstallVer("$DB_RUNTIME")
                            INIT_SCRIPTS = DbUtils.getInitScripts("$DB_RUNTIME")
                        }
                        steps {
                            script {
                                unstash('source_tree')
                                databricksBuild()
                                deleteDir() // cleanup content if no error
                            }
                        }
                    }
                }
            } // end of matrix
        } // end of stage Databricks
    } // end of stages
} // end of pipeline

// params.DATABRICKS_TYPE: 'aws' or 'azure', param can be defined through the jenkins webUI
String getDbType() {
    return params.DATABRICKS_TYPE ? params.DATABRICKS_TYPE : 'aws'
}

void databricksBuild() {
    def CLUSTER_ID = ''
    def SPARK_MAJOR = BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS.replace('.', '')
    def ws_path = "$INIT_SCRIPTS_DIR-$DB_TYPE"
    try {
        stage("Create $SPARK_MAJOR DB") {
            script {
                container('cpu') {
                    sh "rm -rf spark-rapids-ci.tgz"
                    sh "tar -zcf spark-rapids-ci.tgz *"
                    def CREATE_PARAMS = " -r $DATABRICKS_RUNTIME -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN" +
                            " -s $DB_TYPE -n CI-${BUILD_TAG}-${BASE_SPARK_VERSION} -k \"$DATABRICKS_PUBKEY\" -i $IDLE_TIMEOUT" +
                            " -d $DATABRICKS_DRIVER -o $DATABRICKS_WORKER -e $NUM_WORKERS"

                    // handle init scripts if exist
                    if (env.INIT_SCRIPTS) {
                        // foo.sh,bar.sh --> /path/foo.sh,/path/bar.sh
                        CREATE_PARAMS += " -f " + DbUtils.uploadFiles(this, env.INIT_SCRIPTS, ws_path)
                    }

                    CLUSTER_ID = sh(script: "python3 ./jenkins/databricks/create.py $CREATE_PARAMS",
                            returnStdout: true).trim()
                    echo CLUSTER_ID
                }
            }
        }

        stage("Build against $SPARK_MAJOR DB") {
            script {
                container('cpu') {
                    withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
                        def BUILD_PARAMS = " -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID -z ./spark-rapids-ci.tgz" +
                                " -p $DATABRICKS_PRIVKEY -l ./jenkins/databricks/build.sh -d /home/ubuntu/build.sh" +
                                " -v $BASE_SPARK_VERSION -i $BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS"

                        //  add retry for build step to try
                        //  mitigate the issue of downloading dependencies while maven/sonatype is quite unstable
                        retry(3) {
                            sh "python3 ./jenkins/databricks/run-build.py $BUILD_PARAMS"
                        }
                    }
                }
            }
        }

        stage("Test agaist $SPARK_MAJOR DB") {
            script {
                container('cpu') {
                    try {
                        withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
                            def TEST_PARAMS = " -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID  -e TEST_MODE=$TEST_MODE" +
                                " -p $DATABRICKS_PRIVKEY -l ./jenkins/databricks/test.sh -v $BASE_SPARK_VERSION -d /home/ubuntu/test.sh"
                            if (params.SPARK_CONF) {
                                TEST_PARAMS += " -f ${params.SPARK_CONF}"
                            }
                            sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS"
                        }
                    } finally {
                        common.publishPytestResult(this, "${STAGE_NAME}")
                    }
                }
            }
        }

    } finally {
        if (CLUSTER_ID) {
            container('cpu') {
                retry(3) {
                    if (env.INIT_SCRIPTS) {
                        DbUtils.cleanUp(this, ws_path)
                    }
                    sh "python3 ./jenkins/databricks/shutdown.py -s $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID -d"
                }
            }
        }
    }
}

void checkoutCode(String url, String sha) {
    checkout(
        changelog: false,
        poll: true,
        scm: [
            $class           : 'GitSCM', branches: [[name: sha]],
            userRemoteConfigs: [[
                                    credentialsId: 'github-token',
                                    url          : url,
                                    refspec      : '+refs/pull/*/merge:refs/remotes/origin/pr/*']],
             extensions      : [[$class: 'CloneOption', shallow: true]]
        ]
    )
    sh "git submodule update --init"
    if (!common.isSubmoduleInit(this)) {
        error "Failed to clone submodule : thirdparty/parquet-testing"
    }

    stash(name: 'source_tree', includes: '**,.git/**', useDefaultExcludes: false)
}