diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..e005367822 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "integration/xgboost/encryption_plugins/cuda_plugin/CGBN"] + path = integration/xgboost/encryption_plugins/cuda_plugin/CGBN + url = https://github.com/NVlabs/CGBN.git diff --git a/integration/xgboost/encryption_plugins/CMakeLists.txt b/integration/xgboost/encryption_plugins/CMakeLists.txt index f5d71dd61c..f7701e5742 100644 --- a/integration/xgboost/encryption_plugins/CMakeLists.txt +++ b/integration/xgboost/encryption_plugins/CMakeLists.txt @@ -1,41 +1,32 @@ cmake_minimum_required(VERSION 3.19) -project(xgb_nvflare LANGUAGES CXX C VERSION 1.0) +set(CMAKE_C_STANDARD 17) set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CUDA_STANDARD 17) set(CMAKE_BUILD_TYPE Debug) +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}") +cmake_policy(VERSION ${CMAKE_VERSION}) +message(STATUS "CMAKE_SOURCE_DIR: ${CMAKE_SOURCE_DIR}") +message(STATUS "CMAKE_VERSION: ${CMAKE_VERSION}") -option(GOOGLE_TEST "Build google tests" OFF) -file(GLOB_RECURSE LIB_SRC "src/*.cc") - -add_library(nvflare SHARED ${LIB_SRC}) -set_target_properties(nvflare PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - ENABLE_EXPORTS ON -) -target_include_directories(nvflare PRIVATE ${xgb_nvflare_SOURCE_DIR}/src/include) - -if (APPLE) - add_link_options("LINKER:-object_path_lto,$_lto.o") - add_link_options("LINKER:-cache_path_lto,${CMAKE_BINARY_DIR}/LTOCache") -endif () - -#-- Unit Tests -if(GOOGLE_TEST) - find_package(GTest REQUIRED) - enable_testing() - add_executable(nvflare_test) - target_link_libraries(nvflare_test PRIVATE nvflare) - - - target_include_directories(nvflare_test PRIVATE ${xgb_nvflare_SOURCE_DIR}/src/include) +# this has to be set before project() +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70) +endif() - add_subdirectory(${xgb_nvflare_SOURCE_DIR}/tests) +project(xgb_plugins LANGUAGES CUDA CXX VERSION 1.0) +option(BUILD_CUDA_PLUGIN "Build CUDA plugin" ON) +option(BUILD_NVFLARE_PLUGIN "Build NVFlare plugin" ON) - add_test( - NAME TestNvflarePlugins - COMMAND nvflare_test - WORKING_DIRECTORY ${xgb_nvflare_BINARY_DIR}) +if (BUILD_CUDA_PLUGIN) + add_subdirectory(cuda_plugin) +else() + message(STATUS "Skipping CUDA plugin") +endif() +if (BUILD_NVFLARE_PLUGIN) + add_subdirectory(nvflare_plugin) +else() + message(STATUS "Skipping NVFLARE plugin") endif() + diff --git a/integration/xgboost/encryption_plugins/README.md b/integration/xgboost/encryption_plugins/README.md index 57f2c4621e..be92cf4b40 100644 --- a/integration/xgboost/encryption_plugins/README.md +++ b/integration/xgboost/encryption_plugins/README.md @@ -1,9 +1,23 @@ -# Build Instruction +# XGBoost plugins -cd NVFlare/integration/xgboost/encryption_plugins + +## Install required dependencies for CUDA plugin +If you want to build CUDA plugin, you need to install the following libraries: +Require `libgmp-dev`, CUDA runtime >= 12.1, CUDA driver >= 12.1, NVIDIA GPU Driver >= 535 +Compute Compatibility >= 7.0 + +## Build instructions + +``` mkdir build cd build cmake .. make +``` + +## Disable build of CUDA plugin +You can pass option to cmake to disable the build of CUDA plugin if you don't have the environment: +``` +cmake -DBUILD_CUDA_PLUGIN=OFF .. +``` -The library is libxgb_nvflare.so diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/CGBN b/integration/xgboost/encryption_plugins/cuda_plugin/CGBN new file mode 160000 index 0000000000..e8b9d265c7 --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/CGBN @@ -0,0 +1 @@ +Subproject commit e8b9d265c7b84077d02340b0986f3c91b2eb02fb diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/CMakeLists.txt b/integration/xgboost/encryption_plugins/cuda_plugin/CMakeLists.txt new file mode 100644 index 0000000000..c93780d4bf --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/CMakeLists.txt @@ -0,0 +1,52 @@ +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}") +find_package(gmp REQUIRED) + +# Set NVCC compiler +find_program(NVCC nvcc) +if(NOT NVCC) + message(FATAL_ERROR "NVCC not found! Please make sure CUDA is installed.") +endif() + +file(GLOB_RECURSE LIB_SRC + ${CMAKE_SOURCE_DIR}/shared/dam/*.cc + ${CMAKE_SOURCE_DIR}/shared/plugins/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc +) +message(STATUS "LIB_SRC files: ${LIB_SRC}") + +file(GLOB_RECURSE CUDA_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/CGBN/include/cgbn/cgbn.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h +) +message(STATUS "CUDA_SRC files: ${CUDA_SRC}") + +set(TARGET_NAME cuda_paillier) + +set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/src/delegated_plugin.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin_main.cc + ${CUDA_SRC} + PROPERTIES LANGUAGE CUDA +) + +add_library(${TARGET_NAME} SHARED ${LIB_SRC}) +set_target_properties(${TARGET_NAME} + PROPERTIES + CUDA_RUNTIME_LIBRARY Shared +) +set_target_properties(${TARGET_NAME} + PROPERTIES + LINKER_LANGUAGE CUDA +) + +target_include_directories(${TARGET_NAME} PRIVATE + ${CMAKE_SOURCE_DIR}/shared/include + ${CMAKE_CURRENT_SOURCE_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR}/CGBN/include/cgbn +) + +target_compile_features(${TARGET_NAME} PRIVATE cuda_std_17) +target_link_libraries(${TARGET_NAME} PRIVATE gmp::gmpc) +target_link_libraries(${TARGET_NAME} PRIVATE gmp::gmpxx) +target_link_libraries(${TARGET_NAME} PRIVATE gmp::gmp) + diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/Findgmp.cmake b/integration/xgboost/encryption_plugins/cuda_plugin/Findgmp.cmake new file mode 100644 index 0000000000..452fee2bba --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/Findgmp.cmake @@ -0,0 +1,87 @@ +#################################################### +# Looking for main header file in standard locations +find_path(gmpc_INCLUDE_DIR gmp.h) +find_path(gmpxx_INCLUDE_DIR gmpxx.h) + +############################################ +# Looking for binaries in standard locations +find_library(gmpc_LIBRARY NAMES gmp libgmp) +find_library(gmpxx_LIBRARY NAMES gmpxx libgmpxx) + +###################################################################################### +# QUESTION: IS ALL THAT NECESSARY OR find_package_handle_standard_args DOES THE JOB? # +###################################################################################### +IF (gmpc_INCLUDE_DIR STREQUAL "gmpc_INCLUDE_DIR-NOTFOUND") + MESSAGE(WARNING "GMP c headers not found") + SET(GMP_DETECTION_ERROR TRUE) +ELSEIF(gmpxx_INCLUDE_DIR STREQUAL "gmpxx_INCLUDE_DIR-NOTFOUND") + MESSAGE(WARNING "GMP c++ headers not found") + SET(GMP_DETECTION_ERROR TRUE) +ELSEIF(gmpc_LIBRARY STREQUAL "gmpc_LIBRARY-NOTFOUND") + MESSAGE(WARNING "GMP c library not found") + SET(GMP_DETECTION_ERROR TRUE) +ELSEIF(gmpxx_LIBRARY STREQUAL "gmpxx_LIBRARY-NOTFOUND") + MESSAGE(WARNING "GMP c++ library not found") + SET(GMP_DETECTION_ERROR TRUE) +ENDIF() + +IF (NOT GMP_DETECTION_ERROR) + + mark_as_advanced(gmpc_INCLUDE_DIR gmpc_LIBRARY gmpxx_INCLUDE_DIR gmpxx_LIBRARY) + + ############################# + # Setting find_package output + # gmp_FOUND + # Cache variables + # gmp_INCLUDE_DIR + # gmp_LIBRARY + # CMakeLists variables + # gmp_INCLUDE_DIRS + # gmp_LIBRARIES + include( FindPackageHandleStandardArgs ) + find_package_handle_standard_args(gmp REQUIRED_VARS + gmpc_LIBRARY + gmpxx_LIBRARY + gmpc_INCLUDE_DIR + gmpxx_INCLUDE_DIR + ) + + IF (gmp_FOUND) + + SET(gmpc_LIBRARIES ${gmpc_LIBRARY}) + SET(gmpc_INCLUDE_DIRS ${gmpc_INCLUDE_DIR}) + SET(gmpxx_LIBRARIES ${gmpxx_LIBRARY}) + SET(gmpxx_INCLUDE_DIRS ${gmpxx_INCLUDE_DIR}) + + ################################## + # Setting gmp::gmp + IF (NOT TARGET gmp::gmpc) + add_library(gmp::gmpc UNKNOWN IMPORTED) + set_target_properties(gmp::gmpc PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + IMPORTED_LOCATION "${gmpc_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${gmpc_INCLUDE_DIR}" + ) + ENDIF() + #SET(GMPC_TARGET "gmp::gmpc") + IF (NOT TARGET gmp::gmpxx) + add_library(gmp::gmpxx UNKNOWN IMPORTED) + set_target_properties(gmp::gmpxx PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + IMPORTED_LOCATION "${gmpxx_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${gmpxx_INCLUDE_DIR}" + ) + ENDIF() + #SET(GMPXX_TARGET "gmp::gmpxx") + IF (NOT TARGET gmp::gmp) + add_library(gmp::gmp INTERFACE IMPORTED) + #SET(GMP_TARGET "${GMPC_TARGET};${GMPXX_TARGET}") + set_target_properties(gmp::gmp PROPERTIES + LINK_INTERFACE_LIBRARIES "gmp::gmpc;gmp::gmpxx" + IMPORTED_LOCATION "${gmpc_LIBRARY};${gmpxx_LIBRARY}") + ENDIF() + + ENDIF() + +ENDIF() + diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/README.md b/integration/xgboost/encryption_plugins/cuda_plugin/README.md new file mode 100644 index 0000000000..fd3354bba1 --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/README.md @@ -0,0 +1,4 @@ +# CUDA plugin + +Use CUDA to do paillier encryption and addition. + diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/src/cuda_plugin.h b/integration/xgboost/encryption_plugins/cuda_plugin/src/cuda_plugin.h new file mode 100755 index 0000000000..539e513f47 --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/src/cuda_plugin.h @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CUDA_PLUGIN_H +#define CUDA_PLUGIN_H + +#pragma once +#include +#include +#include +#include "paillier.h" +#include "base_plugin.h" +#include "local_plugin.h" +#include "endec.h" + +#define PRECISION 1e6 + +namespace nvflare { + +// Define a structured header for the buffer +struct BufferHeader { + bool has_key; + size_t key_size; + size_t rand_seed_size; +}; + +class CUDAPlugin: public LocalPlugin { + private: + PaillierCipher* paillier_cipher_ptr_ = nullptr; + CgbnPair* encrypted_gh_pairs_ = nullptr; + Endec* endec_ptr_ = nullptr; + + public: + explicit CUDAPlugin(std::vector> const &args): LocalPlugin(args) { + bool fix_seed = get_bool(args, "fix_seed"); + paillier_cipher_ptr_ = new PaillierCipher(bits/2, fix_seed, debug_); + encrypted_gh_pairs_ = nullptr; + } + + ~CUDAPlugin() { + delete paillier_cipher_ptr_; + if (endec_ptr_ != nullptr) { + delete endec_ptr_; + endec_ptr_ = nullptr; + } + } + + void setGHPairs() { + if (debug_) std::cout << "setGHPairs is called" << std::endl; + const std::uint8_t* pointer = encrypted_gh_.data(); + + // Retrieve header + BufferHeader header; + std::memcpy(&header, pointer, sizeof(BufferHeader)); + pointer += sizeof(BufferHeader); + + // Get key and n (if present) + cgbn_mem_t* key_ptr; + if (header.has_key) { + mpz_t n; + mpz_init(n); + key_ptr = (cgbn_mem_t* )malloc(header.key_size); + if (!key_ptr) { + std::cout << "bad alloc with key_ptr" << std::endl; + throw std::bad_alloc(); + } + memcpy(key_ptr, pointer, header.key_size); + store2Gmp(n, key_ptr); + pointer += header.key_size; + + if (header.rand_seed_size != sizeof(uint64_t)) { + free(key_ptr); + mpz_clear(n); + std::cout << "rand_seed_size " << header.rand_seed_size << " is wrong " << std::endl; + throw std::runtime_error("Invalid random seed size"); + } + uint64_t rand_seed; + memcpy(&rand_seed, pointer, header.rand_seed_size); + pointer += header.rand_seed_size; + + if (!paillier_cipher_ptr_->has_pub_key) { + paillier_cipher_ptr_->set_pub_key(n, rand_seed); + } + mpz_clear(n); + free(key_ptr); + } + + // Access payload + std::vector payload(pointer, pointer + (encrypted_gh_.size() - (pointer - encrypted_gh_.data()))); + + ck(cudaMalloc((void **)&encrypted_gh_pairs_, payload.size())); + cudaMemcpy(encrypted_gh_pairs_, payload.data(), payload.size(), cudaMemcpyHostToDevice); + } + + void clearGHPairs() { + if (debug_) std::cout << "clearGHPairs is called" << std::endl; + if (encrypted_gh_pairs_) { + cudaFree(encrypted_gh_pairs_); + encrypted_gh_pairs_ = nullptr; + } + } + + Buffer createBuffer( + bool has_key_flag, + cgbn_mem_t* key_ptr, + size_t key_size, + uint64_t rand_seed, + size_t rand_seed_size, + cgbn_mem_t* d_ciphers_ptr, + size_t payload_size + ) { + if (debug_) std::cout << "createBuffer is called" << std::endl; + // Calculate header size and total buffer size + size_t header_size = sizeof(BufferHeader); + size_t mem_size = header_size + key_size + rand_seed_size + payload_size; + + // Allocate buffer + void* buffer = malloc(mem_size); + if (!buffer) { + std::cout << "bad alloc with buffer" << std::endl; + throw std::bad_alloc(); + } + + // Construct header + BufferHeader header; + header.has_key = has_key_flag; + header.key_size = key_size; + header.rand_seed_size = rand_seed_size; + + // Copy header to buffer + memcpy(buffer, &header, header_size); + + // Copy the key (if present) + if (has_key_flag) { + memcpy((char*)buffer + header_size, key_ptr, key_size); + memcpy((char*)buffer + header_size + key_size, &rand_seed, rand_seed_size); + } + + // Copy the payload + cudaMemcpy((char*)buffer + header_size + key_size + rand_seed_size, d_ciphers_ptr, payload_size, cudaMemcpyDeviceToHost); + + Buffer result(buffer, mem_size, true); + + return result; + } + + Buffer EncryptVector(const std::vector& cleartext) override { + if (debug_) std::cout << "Calling EncryptVector with count " << cleartext.size() << std::endl; + if (endec_ptr_ != nullptr) { + delete endec_ptr_; + } + endec_ptr_ = new Endec(PRECISION); + + size_t count = cleartext.size(); + int byte_length = bits / 8; + size_t mem_size = sizeof(cgbn_mem_t) * count; + cgbn_mem_t* h_ptr=(cgbn_mem_t* )malloc(mem_size); + if (debug_) std::cout << "h_ptr size is " << mem_size << " indata size is " << count * byte_length << std::endl; + for (size_t i = 0; i < count; ++i) { + mpz_t n; + mpz_init(n); + + endec_ptr_->encode(n, cleartext[i]); + store2Cgbn(h_ptr + i, n); + + mpz_clear(n); + } + + cgbn_mem_t* d_plains_ptr; + cgbn_mem_t* d_ciphers_ptr; + ck(cudaMalloc((void **)&d_plains_ptr, mem_size)); + ck(cudaMalloc((void **)&d_ciphers_ptr, mem_size)); + cudaMemcpy(d_plains_ptr, h_ptr, mem_size, cudaMemcpyHostToDevice); + + if (!paillier_cipher_ptr_->has_prv_key) { +#ifdef TIME + CudaTimer cuda_timer(0); + float gen_time=0; + cuda_timer.start(); +#endif + if (debug_) std::cout<<"Gen KeyPair with bits: " << bits << std::endl; + paillier_cipher_ptr_->genKeypair(); +#ifdef TIME + gen_time += cuda_timer.stop(); + std::cout<<"Gen KeyPair Time "<< gen_time <<" MS"<encrypt(d_plains_ptr, d_ciphers_ptr, count); + + // get pub_key n + mpz_t n; + mpz_init(n); + size_t key_size = sizeof(cgbn_mem_t); + paillier_cipher_ptr_->getN(n); + store2Cgbn(h_ptr, n); + mpz_clear(n); + + // get rand_seed + size_t rand_seed_size = sizeof(uint64_t); + uint64_t rand_seed = paillier_cipher_ptr_->get_rand_seed(); + + Buffer result = createBuffer(true, h_ptr, key_size, rand_seed, rand_seed_size, d_ciphers_ptr, mem_size); + + void* buffer = malloc(mem_size); + cudaMemcpy(buffer, d_ciphers_ptr, mem_size, cudaMemcpyDeviceToHost); + cudaFree(d_plains_ptr); + cudaFree(d_ciphers_ptr); + free(h_ptr); + + //Buffer result(buffer, mem_size, true); + + return result; + } + + std::vector DecryptVector(const std::vector& ciphertext) override { + if (debug_) std::cout << "Calling DecryptVector" << std::endl; + size_t mem_size = 0; + for (int i = 0; i < ciphertext.size(); ++i) { + mem_size += ciphertext[i].buf_size; + if (ciphertext[i].buf_size != 2 * sizeof(cgbn_mem_t)) { + std::cout << "buf_size is " << ciphertext[i].buf_size << std::endl; + std::cout << "expected buf_size is " << 2 * sizeof(cgbn_mem_t) << std::endl; + std::cout << "Fatal Error" << std::endl; + } + } + + size_t count = mem_size / sizeof(cgbn_mem_t); + cgbn_mem_t* h_ptr=(cgbn_mem_t* )malloc(mem_size); + if (debug_) std::cout << "h_ptr size is " << mem_size << " how many gh is " << count << std::endl; + + + cgbn_mem_t* d_plains_ptr; + cgbn_mem_t* d_ciphers_ptr; + ck(cudaMalloc((void **)&d_plains_ptr, mem_size)); + ck(cudaMalloc((void **)&d_ciphers_ptr, mem_size)); + + size_t offset = 0; + for (int i = 0; i < ciphertext.size(); ++i) { + cudaMemcpy(d_ciphers_ptr + offset, ciphertext[i].buffer, ciphertext[i].buf_size, cudaMemcpyHostToDevice); + offset += ciphertext[i].buf_size / sizeof(cgbn_mem_t); + } + + if (!paillier_cipher_ptr_->has_prv_key) { + std::cout << "Can't call DecryptVector if paillier does not have private key." << std::endl; + throw std::runtime_error("Can't call DecryptVector if paillier does not have private key."); + } + + paillier_cipher_ptr_->decrypt(d_ciphers_ptr, d_plains_ptr, count); + + + cudaMemcpy(h_ptr, d_plains_ptr, mem_size, cudaMemcpyDeviceToHost); + std::vector result; + for (size_t i = 0; i < count; ++i) { + mpz_t n; + mpz_init(n); + store2Gmp(n, h_ptr + i); + double output_num = endec_ptr_->decode(n); + result.push_back(output_num); + mpz_clear(n); + } + cudaFree(d_plains_ptr); + cudaFree(d_ciphers_ptr); + free(h_ptr); + return result; + } + + std::map AddGHPairs(const std::map>& sample_ids) override{ + if (debug_) std::cout << "Calling AddGHPairs with sample_ids size " << sample_ids.size() << std::endl; + if (!encrypted_gh_pairs_) { + setGHPairs(); + } + std::map result; + + CgbnPair* d_res_ptr; + size_t mem_size = sizeof(CgbnPair); + if (mem_size != 2*sizeof(cgbn_mem_t)) { + std::cout << "Fatal Error" << std::endl; + } + ck(cudaMalloc((void **)&d_res_ptr, mem_size)); + cgbn_mem_t* d_plains_ptr; + ck(cudaMalloc((void **)&d_plains_ptr, mem_size)); + + if (!paillier_cipher_ptr_->has_pub_key) { + std::cout << "Can't call AddGHPairs if paillier does not have public key." << std::endl; + throw std::runtime_error("Can't call AddGHPairs if paillier does not have public key."); + } + + // Iterate through the map + for (auto& pair : sample_ids) { + int key = pair.first; + const int* sample_id = pair.second.data(); + int count = pair.second.size(); + + int* sample_id_d; + ck(cudaMalloc((void **)&sample_id_d, sizeof(int) * count)); + cudaMemcpy(sample_id_d, sample_id, sizeof(int) * count, cudaMemcpyHostToDevice); + + paillier_cipher_ptr_->sum(d_res_ptr, encrypted_gh_pairs_, sample_id_d, count); + + void* data = malloc(mem_size); + cudaMemcpy(data, d_res_ptr, mem_size, cudaMemcpyDeviceToHost); + Buffer buffer(data, mem_size, true); + result[key] = buffer; // Add the Buffer object to the result map + cudaFree(sample_id_d); + } + cudaFree(d_res_ptr); + cudaFree(d_plains_ptr); + if (debug_) std::cout << "Finish AddGHPairs" << std::endl; + if (encrypted_gh_pairs_) { + clearGHPairs(); + } + return result; + } +}; +} // namespace nvflare + +#endif // CUDA_PLUGIN_H diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/src/cuda_utils.h b/integration/xgboost/encryption_plugins/cuda_plugin/src/cuda_utils.h new file mode 100755 index 0000000000..f0911a664e --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/src/cuda_utils.h @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CUDA_UTILS_H +#define CUDA_UTILS_H + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cgbn.h" +#include // For rand() function +#include // For time() function + +/********** Constant Values **************/ +const static unsigned int bits=2048; +const static unsigned int key_len=1024; + +//const static unsigned int bits=4096; +//const static unsigned int key_len=2048; + +//const static unsigned int bits=6144; +//const static unsigned int key_len=3072; + + +const static int TPB=512; +const static int TPI=32; +const static int window_bits=5; + +/** Class **/ +struct CgbnPair { + cgbn_mem_t g; + cgbn_mem_t h; +}; + +/*************Error Handling**************/ +bool check(cudaError_t e, int iLine, const char *szFile) { + if (e != cudaSuccess) { + std::cout << "CUDA runtime API error " << cudaGetErrorString(e) << " at line " << iLine << " in file " << szFile << std::endl; + exit(0); + return false; + } + return true; +} +#define ck(call) check(call, __LINE__, __FILE__) + +void cgbn_check(cgbn_error_report_t *report, const char *file=NULL, int32_t line=0) { + // check for cgbn errors + if(cgbn_error_report_check(report)) { + printf("\n"); + printf("CGBN error occurred: %s\n", cgbn_error_string(report)); + + if(report->_instance!=0xFFFFFFFF) { + printf("Error reported by instance %d", report->_instance); + if(report->_blockIdx.x!=0xFFFFFFFF || report->_threadIdx.x!=0xFFFFFFFF) + printf(", "); + if(report->_blockIdx.x!=0xFFFFFFFF) + printf("blockIdx=(%d, %d, %d) ", report->_blockIdx.x, report->_blockIdx.y, report->_blockIdx.z); + if(report->_threadIdx.x!=0xFFFFFFFF) + printf("threadIdx=(%d, %d, %d)", report->_threadIdx.x, report->_threadIdx.y, report->_threadIdx.z); + printf("\n"); + } + else { + printf("Error reported by blockIdx=(%d %d %d)", report->_blockIdx.x, report->_blockIdx.y, report->_blockIdx.z); + printf("threadIdx=(%d %d %d)\n", report->_threadIdx.x, report->_threadIdx.y, report->_threadIdx.z); + } + if(file!=NULL) + printf("file %s, line %d\n", file, line); + exit(1); + } +} +#define CGBN_CHECK(report) cgbn_check(report, __FILE__, __LINE__) + +/*************Time Handling**************/ +class CudaTimer{ + private: + cudaEvent_t event_start; + cudaEvent_t event_stop; + cudaStream_t stream; + float time; + public: + CudaTimer(cudaStream_t stream){ + this->stream=stream; + } + void start(){ + ck(cudaEventCreate(&event_start)); + ck(cudaEventCreate(&event_stop)); + ck(cudaEventRecord(event_start, stream)); + } + float stop(){ + ck(cudaEventRecord(event_stop,stream)); + ck(cudaEventSynchronize(event_stop)); + ck(cudaEventElapsedTime(&time, event_start, event_stop)); + ck(cudaEventDestroy(event_start)); + ck(cudaEventDestroy(event_stop)); + return time; + } + ~CudaTimer(){ + } +}; + +/**********GMP and CGBN functions***************/ +void getPrimeOver(mpz_t rop, int bits, uint64_t &seed_start){ + gmp_randstate_t state; + gmp_randinit_default(state); + gmp_randseed_ui(state, seed_start); + seed_start++; + mpz_t rand_num; + mpz_init(rand_num); + mpz_urandomb(rand_num, state, bits); + //gmp_printf("rand_num:%Zd\n", rand_num); + mpz_setbit(rand_num, bits-1); + mpz_nextprime(rop, rand_num); + mpz_clear(rand_num); +} + +template +void store2Cgbn(cgbn_mem_t *address, mpz_t z) { + size_t words; + if(mpz_sizeinbase(z, 2) > BITS) { + printf("mpz_sizeinbase: %lu exceeds %d\n", mpz_sizeinbase(z, 2), BITS); + exit(1); + } + + mpz_export((uint32_t *)address, &words, -1, sizeof(uint32_t), 0, 0, z); + while(words<(BITS+31)/32) + ((uint32_t *)address)[words++]=0; +} + +template +void store2Gmp(mpz_t z, cgbn_mem_t *address ) { + mpz_import(z, (BITS+31)/32, -1, sizeof(uint32_t), 0, 0, (uint32_t *)address); +} + +template +void initArr(cgbn_mem_t *address, int count, int default_value = 0, bool randomize = false){ + + for(int i = 0; i < count; i++){ + int value; + mpz_t n; + mpz_init(n); + if (randomize) { + value = i; // rand(); + } else { + value = default_value; + } + mpz_set_si(n, value); + + store2Cgbn(address + i, n); + + gmp_printf("input%d:%Zd\n", i, n); + mpz_clear(n); + } +} +template +void printCgbn(cgbn_mem_t *h_ptr, int print_count){ + for(int i = 0; i < print_count; i++){ + mpz_t n; + mpz_init(n); + store2Gmp(n, h_ptr + i); + gmp_printf("printCgbn [%d]:%Zd\n",i, n); + mpz_clear(n); + } +} + +template +void printDevCgbn(cgbn_mem_t *d_ptr, int print_count, std::string name="cipher"){ + + int mem_size=sizeof(cgbn_mem_t)*print_count; + cgbn_mem_t* h_plains_ptr=(cgbn_mem_t* )malloc(mem_size); + cudaMemcpy(h_plains_ptr, d_ptr, mem_size, cudaMemcpyDeviceToHost); + + for(int i = 0; i < print_count; i++){ + mpz_t n; + mpz_init(n); + store2Gmp(n, h_plains_ptr + i); + gmp_printf("printDevCgbn %s[%d]:%Zd\n",name,i, n); + mpz_clear(n); + } + + + free(h_plains_ptr); +} + +template +void printDevGH(CgbnPair *d_ptr, int print_count, std::string name="cipher"){ + + int mem_size=sizeof(CgbnPair) * print_count; + CgbnPair* h_plains_ptr=(CgbnPair *)malloc(mem_size); + cudaMemcpy(h_plains_ptr, d_ptr, mem_size, cudaMemcpyDeviceToHost); + + for(int i = 0; i < print_count; i++){ + mpz_t g, h; + mpz_init(g); + mpz_init(h); + CgbnPair p = *(h_plains_ptr +i); + store2Gmp(g, &p.g); + store2Gmp(h, &p.h); + gmp_printf("printDevCgbn %s[%d]:g %Zd, h %Zd\n",name, i, g, h); + mpz_clear(g); + mpz_clear(h); + } + + free(h_plains_ptr); +} + +template +void compArr(cgbn_mem_t *a, cgbn_mem_t *b,int count){ + int mem_size=sizeof(cgbn_mem_t)*count; + cgbn_mem_t* ha=(cgbn_mem_t* )malloc(mem_size); + cudaMemcpy(ha, a, mem_size, cudaMemcpyDeviceToHost); + + cgbn_mem_t* hb=(cgbn_mem_t* )malloc(mem_size); + cudaMemcpy(hb, b, mem_size, cudaMemcpyDeviceToHost); + + + for(int i = 0; i < count; i++){ + int res=0; + mpz_t na, nb; + mpz_init(na); + store2Gmp(na, ha + i); + + mpz_init(nb); + store2Gmp(nb, hb + i); + + res=mpz_cmp(na, nb); + if(res!=0){ + std::cout<<"res= "< &a, const std::vector &b, double eps=1e-6) { + if (a.size() != b.size()) return false; + for (auto i = 0; i < a.size(); ++i) { + if (fabs(a[i] - b[i]) >= eps) { + std::cout << "Fatal Error at position " << i << " " << a[i] << " " << b[i] << std::endl; + return false; + } + } + return true; +} + +#endif // CUDA_UTILS_H diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/src/delegated_plugin.cc b/integration/xgboost/encryption_plugins/cuda_plugin/src/delegated_plugin.cc new file mode 100755 index 0000000000..6b348b3cb6 --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/src/delegated_plugin.cc @@ -0,0 +1,32 @@ +/** + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "delegated_plugin.h" +#include "cuda_plugin.h" + +namespace nvflare { + +DelegatedPlugin::DelegatedPlugin(std::vector> const &args): + BasePlugin(args) { + + auto name = get_string(args, "name"); + if (name == "cuda_paillier") { + plugin_ = new CUDAPlugin(args); + } else { + throw std::invalid_argument{"Unknown plugin name: " + name}; + } +} + +} // namespace nvflare diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/src/endec.h b/integration/xgboost/encryption_plugins/cuda_plugin/src/endec.h new file mode 100755 index 0000000000..206746371c --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/src/endec.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENDEC_H +#define ENDEC_H + +#include "gmp.h" + +class Endec { + private: + bool debug_ = false; + double precision_; + + public: + Endec(double p, bool debug = false): debug_(debug), precision_(p) {} + + void encode(mpz_t& result, const double& number) { + int64_t temp = static_cast(number * precision_); + uint64_t output_number = static_cast(temp); + + mpz_set_ui(result, output_number); + if (debug_) printf("Encoding using (p %f): input %f, output %lu\n", precision_, number, output_number); + + } + + double decode(const mpz_t& number) { + uint64_t output_num = mpz_get_ui(number); + int64_t sint = static_cast(output_num); + double result = sint / precision_; + + if (debug_) gmp_printf("Decoding using (p %f): input %Zd, output %f\n", precision_, number, result); + return result; + } +}; + +#endif // ENDEC_H diff --git a/integration/xgboost/encryption_plugins/cuda_plugin/src/paillier.h b/integration/xgboost/encryption_plugins/cuda_plugin/src/paillier.h new file mode 100755 index 0000000000..8226f4cfa0 --- /dev/null +++ b/integration/xgboost/encryption_plugins/cuda_plugin/src/paillier.h @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PAILLIER_H +#define PAILLIER_H + +#pragma once + +#include +#include "cuda_utils.h" + + +/***********************Declare*************************/ + +template +__global__ void gpu_encrypt(cgbn_error_report_t *report, cgbn_mem_t *plains, cgbn_mem_t * ciphers, int count); + +template +__global__ void gpu_decrypt(cgbn_error_report_t *report, cgbn_mem_t * plains, cgbn_mem_t *ciphers, int count); + +template +__global__ void reduce_sum(cgbn_error_report_t *report, CgbnPair* result, CgbnPair* arr, int count, CgbnPair* one); + +template +__global__ void reduce_sum_with_index(cgbn_error_report_t *report, CgbnPair* result, CgbnPair* arr, +int* sample_bin, int count, CgbnPair* one); + + +/***********************Class**********************/ +template +struct PaillierPubKey{ + cgbn_mem_t n; + cgbn_mem_t n_1; + cgbn_mem_t n_square; + cgbn_mem_t limit_int; + cgbn_mem_t rand_seed; +}; + +template +struct PaillierPrvKey{ + cgbn_mem_t lamda; + cgbn_mem_t u; +}; + + + +__constant__ PaillierPrvKey c_PriKey; +__constant__ PaillierPubKey c_PubKey; + + +template +class PaillierCipher{ + private: + mpz_t n_, p_, q_; + uint64_t _rand_seed; + bool fix_seed_ = false; + + public: + int key_len; + bool debug_ = false; + bool has_pub_key = false; + bool has_prv_key = false; + PaillierPubKey pub_key; + PaillierPrvKey prv_key; + CgbnPair _one; + + public: + PaillierCipher(int key_len, bool fix_seed = false, bool debug = false){ + this->key_len=key_len; + debug_ = debug; + fix_seed_ = fix_seed; + mpz_init(n_); + mpz_init(p_); + mpz_init(q_); + + if (debug_) std::cout<<"Construct PaillierCipher"< distribution(0, UINT64_MAX); + + // Generate a random number + _rand_seed = distribution(gen); + if (fix_seed_) _rand_seed = 12345; + + uint64_t seed_start = _rand_seed; + int n_len = 0; + while(n_len!=key_len){ + getPrimeOver(p, key_len/2, seed_start); + mpz_set(q, p); + while(mpz_cmp(p, q) == 0){ + getPrimeOver(q, key_len/2, seed_start); + mpz_mul(n, p, q); + n_len = mpz_sizeinbase(n, 2); + } + } + + // Set Key + set_keys(n, _rand_seed, p, q); + + + if (debug_) { + printf("Rand bits for n: %lu, key_len %d\n", mpz_sizeinbase(n, 2), key_len); + std::cout<<"The size of data is:" < + int encrypt(cgbn_mem_t* d_plains_ptr, cgbn_mem_t* d_ciphers_ptr, int count){ + int IPB=TPB/TPI; + cgbn_error_report_t *report; + ck(cgbn_error_report_alloc(&report)); + +#ifdef DEBUG + ck(cudaDeviceSynchronize()); + ck(cudaGetLastError()); + std::cout<< "numBlocks: "<< (count+IPB-1)/IPB << ", threadsPerBlock: "<< TPB<<<<(count+IPB-1)/IPB, TPB>>>(report, d_plains_ptr, d_ciphers_ptr, count); + +#ifdef DEBUG + ck(cudaDeviceSynchronize()); + ck(cudaGetLastError()); + CGBN_CHECK(report); +#endif +#ifdef TIME + float encrypt_time=cuda_timer.stop(); + std::cout<<"Encrypt Time (TPI="< + int decrypt(cgbn_mem_t* d_ciphers_ptr, cgbn_mem_t* d_plains_ptr,int count){ + int IPB=TPB/TPI; + + cgbn_error_report_t *report; + ck(cgbn_error_report_alloc(&report)); +#ifdef TIME + CudaTimer cuda_timer(0); + cuda_timer.start(); +#endif + gpu_decrypt<<<(count+IPB-1)/IPB, TPB>>>(report, d_plains_ptr, d_ciphers_ptr, count); +#ifdef DEBUG + ck(cudaDeviceSynchronize()); + ck(cudaGetLastError()); + CGBN_CHECK(report); + ck(cgbn_error_report_free(report)); +#endif + +#ifdef TIME + float decrypt_time=cuda_timer.stop(); + std::cout<<"Decrypt Time (TPI="< + int sum(CgbnPair* d_res_ptr, CgbnPair* d_arr_ptr, int* sample_bin, int count) { + int IPB=TPB/TPI; + int maxBlocks = 2560; + int numBlocks = min((count - 1) / IPB + 1, maxBlocks); + int mem_size = numBlocks * sizeof(CgbnPair); + if (count == 0) { + cudaMemcpy(d_res_ptr, &_one, sizeof(CgbnPair), cudaMemcpyHostToDevice); + return 0; + } + + cgbn_error_report_t *report; + ck(cgbn_error_report_alloc(&report)); +#ifdef TIME + CudaTimer cuda_timer(0); + cuda_timer.start(); +#endif + + CgbnPair* d_res_ptr_2; + ck(cudaMalloc((void **)&d_res_ptr_2, mem_size)); + CgbnPair* d_one; + ck(cudaMalloc((void **)&d_one, sizeof(CgbnPair))); + cudaMemcpy(d_one, &_one, sizeof(CgbnPair), cudaMemcpyHostToDevice); + + typedef cgbn_context_t context_t; + typedef cgbn_env_t env_t; + typedef typename env_t::cgbn_t bn_t; + int shmem_size = IPB * sizeof(CgbnPair); + +#ifdef DEBUG + std::cout << "before calling reduce_sum with CgbnPair and sample_bin" << std::endl; + std::cout << "before calling reduce_sum count: " << count << " shm_size: " << shmem_size << " numBlocks: " << numBlocks << std::endl; + std::cout << "before calling reduce_sum TPI: " << TPI << " TPB: " << TPB << " IPB: " << IPB << std::endl; +#endif + + reduce_sum_with_index<<>>(report, d_res_ptr_2, d_arr_ptr, sample_bin, count, d_one); + ck(cudaDeviceSynchronize()); + +#ifdef DEBUG + std::cout << "after calling reduce_sum" << std::endl; +#endif + + // final reduction + if (numBlocks != 1) { + reduce_sum<<<1, TPB, shmem_size>>>(report, d_res_ptr, d_res_ptr_2, numBlocks, d_one); + } else { + cudaMemcpy(d_res_ptr, d_res_ptr_2, mem_size, cudaMemcpyDeviceToDevice); + } + ck(cudaDeviceSynchronize()); + +#ifdef DEBUG + ck(cudaGetLastError()); + CGBN_CHECK(report); + ck(cgbn_error_report_free(report)); +#endif + +#ifdef TIME + float add_time=cuda_timer.stop(); + std::cout<<"Add Time (TPI="< +__global__ +void gpu_encrypt(cgbn_error_report_t *report, cgbn_mem_t *plains, cgbn_mem_t * ciphers, int count) { + int tid=(blockIdx.x*blockDim.x + threadIdx.x)/T_TPI; + if(tid>=count) + return; + + static const uint32_t TPI=T_TPI; + static const uint32_t BITS=T_BITS; + typedef cgbn_context_t context_t; + typedef cgbn_env_t env_t; + typedef typename env_t::cgbn_t bn_t; + typedef typename env_t::cgbn_wide_t bn_w_t; + + context_t bn_context(cgbn_report_monitor, report, tid); // construct a context + env_t bn_env(bn_context); // construct an environment for 1024-bit math + + bn_t t1, t2, t3; // define a, b, r as 1024-bit bignums + cgbn_load(bn_env, t1, &(c_PubKey.n));//tn_ + cgbn_load(bn_env, t2, &(c_PubKey.limit_int));//t_tmp + cgbn_load(bn_env, t3, plains+tid);//t_p + + int compare=cgbn_compare(bn_env, t3, t2); + if( (compare>=0) &&(cgbn_compare(bn_env, t3, t1) < 0)){ + cgbn_sub(bn_env, t2, t1, t3); + cgbn_mul(bn_env, t2, t1, t2); + cgbn_add_ui32(bn_env, t2, t2, 1); + + cgbn_load(bn_env, t3, &(c_PubKey.n_square)); + cgbn_rem(bn_env, t2, t2, t3); + cgbn_modular_inverse(bn_env, t2, t2, t3); + }else{ + cgbn_mul(bn_env, t2, t1, t3); + cgbn_add_ui32(bn_env, t2, t2, 1); + + cgbn_load(bn_env, t3, &(c_PubKey.n_square)); + cgbn_rem(bn_env, t2, t2, t3); + } + + cgbn_load(bn_env, t1, &(c_PubKey.rand_seed)); + + bn_w_t r; + cgbn_mul_wide(bn_env,r,t2, t1); + cgbn_rem_wide(bn_env,t2,r,t3); + cgbn_store(bn_env, ciphers + tid, t2); +} + +template +__device__ __forceinline__ void fixed_window_powm_odd(env_t _env, + typename env_t::cgbn_t &result, const typename env_t::cgbn_t &x, + const typename env_t::cgbn_t &power, const typename env_t::cgbn_t &modulus) { + typename env_t::cgbn_t t; + typename env_t::cgbn_local_t window[1<0) { + // square the result window_bits times + #pragma nounroll + for(int sqr_count=0;sqr_count +__device__ __forceinline__ void sliding_window_powm_odd(env_t _env, + typename env_t::cgbn_t &result, const typename env_t::cgbn_t &x, + const typename env_t::cgbn_t &power, const typename env_t::cgbn_t &modulus) { + typename env_t::cgbn_t t, starts; + int32_t index, position, leading; + uint32_t mont_inv; + typename env_t::cgbn_local_t odd_powers[1<=0) { + // convert x into Montgomery space, store in the odd powers table + mont_inv=cgbn_bn2mont(_env, result, x, modulus); + + // compute t=x^2 mod modulus + cgbn_mont_sqr(_env, t, result, modulus, mont_inv); + + // compute odd powers window table: x^1, x^3, x^5, ... + cgbn_store(_env, odd_powers, result); + #pragma nounroll + for(index=1;index<(1<leading) + break; + position=position+window_bits; + } + } + + // load first window. Note, since the window index must be odd, we have to + // divide it by two before indexing the window table. Instead, we just don't + // load the index LSB from power + index=cgbn_extract_bits_ui32(_env, power, position+1, window_bits-1); + cgbn_load(_env, result, odd_powers+index); + position--; + + // Process remaining windows + while(position>=0) { + cgbn_mont_sqr(_env, result, result, modulus, mont_inv); + if(cgbn_extract_bits_ui32(_env, starts, position, 1)==1) { + // found a window, load the index + index=cgbn_extract_bits_ui32(_env, power, position+1, window_bits-1); + cgbn_load(_env, t, odd_powers+index); + cgbn_mont_mul(_env, result, result, t, modulus, mont_inv); + } + position--; + } + + // convert result from Montgomery space + cgbn_mont2bn(_env, result, result, modulus, mont_inv); + } + else { + // p=0, thus x^p mod modulus=1 + cgbn_set_ui32(_env, result, 1); + } + } + + +template +__global__ void gpu_decrypt(cgbn_error_report_t *report, cgbn_mem_t * plains, cgbn_mem_t *ciphers, int count) { + int tid=(blockIdx.x*blockDim.x + threadIdx.x)/TPI; + if(tid>=count) + return; + + cgbn_context_t bn_context(cgbn_report_monitor, report, tid); + cgbn_env_t, BITS> bn_env(bn_context); + + typename cgbn_env_t, BITS>::cgbn_t t, p; + typename cgbn_env_t, BITS>::cgbn_t n; + + cgbn_load(bn_env, t, ciphers + tid); + cgbn_load(bn_env, p, &(c_PriKey.lamda)); + cgbn_load(bn_env, n, &(c_PubKey.n_square)); + + //cgbn_modular_power(bn_env, t, t,p, n); + //fixed_window_powm_odd(bn_env,t, t, p, n); + sliding_window_powm_odd(bn_env,t, t, p, n); + + + cgbn_load(bn_env, n, &(c_PubKey.n)); + cgbn_sub_ui32(bn_env, t, t, 1); + + cgbn_load(bn_env, p, &(c_PriKey.u)); + + cgbn_div(bn_env, t, t, n); + cgbn_mul(bn_env, t, t, p); + cgbn_rem(bn_env, t, t, n); + + cgbn_store(bn_env, plains + tid, t); +} + + +template +__global__ void reduce_sum(cgbn_error_report_t* report, CgbnPair* result, CgbnPair* arr, int count, CgbnPair* one) { + typedef cgbn_context_t context_t; + typedef cgbn_env_t env_t; + typedef typename env_t::cgbn_t bn_t; + typedef typename env_t::cgbn_wide_t bn_w_t; + + int id = (blockIdx.x * blockDim.x + threadIdx.x) / TPI; + int shm_id = threadIdx.x / TPI; + int IPB = blockDim.x / TPI; + + context_t bn_context(cgbn_report_monitor, report, id); + env_t bn_env(bn_context); + + extern __shared__ CgbnPair sdata3[]; + bn_t a, b, c, tmp_g, tmp_h; + bn_t n_square; + bn_w_t r; + + int total_windows = (count - 1) / (IPB * gridDim.x) + 1; + cgbn_load(bn_env, n_square, &c_PubKey.n_square); + for (unsigned int window = 0; window < total_windows; window++) { + int global_position = id + window * IPB * gridDim.x; + if (global_position >= count) { + // Load rand_seed into sdata3 directly for positions exceeding count + sdata3[shm_id] = one[0]; + } else { + // Load pairs of elements from arr into sdata3 + sdata3[shm_id] = arr[global_position]; + } + __syncthreads(); + + // Perform reduction in shared memory + for (unsigned int s = IPB / 2; s > 0; s >>= 1) { + if (shm_id < s) { + // Load pairs of elements from shared memory and perform reduction + cgbn_load(bn_env, a, &(sdata3[shm_id].g)); + cgbn_load(bn_env, b, &(sdata3[shm_id + s].g)); + cgbn_mul_wide(bn_env, r, a, b); + cgbn_rem_wide(bn_env, c, r, n_square); + cgbn_store(bn_env, &(sdata3[shm_id].g), c); + + cgbn_load(bn_env, a, &(sdata3[shm_id].h)); + cgbn_load(bn_env, b, &(sdata3[shm_id + s].h)); + cgbn_mul_wide(bn_env, r, a, b); + cgbn_rem_wide(bn_env, c, r, n_square); + cgbn_store(bn_env, &(sdata3[shm_id].h), c); + } + __syncthreads(); + } + + if (shm_id == 0) { + if (window == 0) { + // Store the result of the first window into tmp + cgbn_load(bn_env, tmp_g, &(sdata3[0].g)); + cgbn_load(bn_env, tmp_h, &(sdata3[0].h)); + } else { + // Add the result of subsequent windows to tmp + cgbn_load(bn_env, a, &(sdata3[0].g)); + cgbn_mul_wide(bn_env, r, a, tmp_g); + cgbn_rem_wide(bn_env, tmp_g, r, n_square); + + cgbn_load(bn_env, a, &(sdata3[0].h)); + cgbn_mul_wide(bn_env, r, a, tmp_h); + cgbn_rem_wide(bn_env, tmp_h, r, n_square); + } + } + __syncthreads(); + } + + __syncthreads(); + + // Write the final result for this block to global memory + if (shm_id == 0) { + cgbn_store(bn_env, &(result[blockIdx.x].g), tmp_g); + cgbn_store(bn_env, &(result[blockIdx.x].h), tmp_h); + } + __syncthreads(); +} + + +template +__global__ void reduce_sum_with_index(cgbn_error_report_t* report, CgbnPair* result, CgbnPair* arr, int* sample_bin, int count, CgbnPair* one) { + + typedef cgbn_context_t context_t; + typedef cgbn_env_t env_t; + typedef typename env_t::cgbn_t bn_t; + typedef typename env_t::cgbn_wide_t bn_w_t; + + int id = (blockIdx.x * blockDim.x + threadIdx.x) / TPI; + int shm_id = threadIdx.x / TPI; + int IPB = blockDim.x / TPI; + + context_t bn_context(cgbn_report_monitor, report, id); + env_t bn_env(bn_context); + + extern __shared__ CgbnPair sdata4[]; + bn_t a, b, c, tmp_g, tmp_h; + bn_t n_square; + bn_w_t r; + + int total_windows = (count - 1) / (IPB * gridDim.x) + 1; + cgbn_load(bn_env, n_square, &c_PubKey.n_square); + for (unsigned int window = 0; window < total_windows; window++) { + int global_position = id + window * IPB * gridDim.x; + +#ifdef DEBUG + printf("id %d shm_id %d IPB %d threadIdx.x %d blockIdx.x %d gridDim.x %d window %d total_windows %d global_position %d \n", id, shm_id, IPB, threadIdx.x, blockIdx.x, gridDim.x, window, total_windows, global_position); +#endif + + if (global_position >= count) { + // Load rand_seed into sdata4 directly for positions exceeding count + sdata4[shm_id] = one[0]; + } else { + int sample_id = sample_bin[global_position]; + //printf("loading global position %d sample id %d", global_position, sample_id); + // each shm_id copy one instance from global to shared mem + sdata4[shm_id] = arr[sample_id]; + } + __syncthreads(); + + // Perform reduction in shared memory + for (unsigned int s = IPB / 2; s > 0; s >>= 1) { + if (shm_id < s) { + // Load pairs of elements from shared memory and perform reduction + cgbn_load(bn_env, a, &(sdata4[shm_id].g)); + cgbn_load(bn_env, b, &(sdata4[shm_id + s].g)); + cgbn_mul_wide(bn_env, r, a, b); + cgbn_rem_wide(bn_env, c, r, n_square); + cgbn_store(bn_env, &(sdata4[shm_id].g), c); + + cgbn_load(bn_env, a, &(sdata4[shm_id].h)); + cgbn_load(bn_env, b, &(sdata4[shm_id + s].h)); + cgbn_mul_wide(bn_env, r, a, b); + cgbn_rem_wide(bn_env, c, r, n_square); + cgbn_store(bn_env, &(sdata4[shm_id].h), c); + } + __syncthreads(); + } + + if (shm_id == 0) { + if (window == 0) { + // Store the result of the first window into tmp + cgbn_load(bn_env, tmp_g, &(sdata4[0].g)); + cgbn_load(bn_env, tmp_h, &(sdata4[0].h)); + } else { + // Add the result of subsequent windows to tmp + cgbn_load(bn_env, a, &(sdata4[0].g)); + cgbn_mul_wide(bn_env, r, a, tmp_g); + cgbn_rem_wide(bn_env, tmp_g, r, n_square); + + cgbn_load(bn_env, a, &(sdata4[0].h)); + cgbn_mul_wide(bn_env, r, a, tmp_h); + cgbn_rem_wide(bn_env, tmp_h, r, n_square); + } + } + __syncthreads(); + } + + __syncthreads(); + + // Write the final result for this block to global memory + if (shm_id == 0) { + cgbn_store(bn_env, &(result[blockIdx.x].g), tmp_g); + cgbn_store(bn_env, &(result[blockIdx.x].h), tmp_h); + } + __syncthreads(); +} + +#endif // PAILLIER_H diff --git a/integration/xgboost/encryption_plugins/.editorconfig b/integration/xgboost/encryption_plugins/nvflare_plugin/.editorconfig similarity index 100% rename from integration/xgboost/encryption_plugins/.editorconfig rename to integration/xgboost/encryption_plugins/nvflare_plugin/.editorconfig diff --git a/integration/xgboost/encryption_plugins/nvflare_plugin/CMakeLists.txt b/integration/xgboost/encryption_plugins/nvflare_plugin/CMakeLists.txt new file mode 100644 index 0000000000..b834206f4f --- /dev/null +++ b/integration/xgboost/encryption_plugins/nvflare_plugin/CMakeLists.txt @@ -0,0 +1,49 @@ + +option(GOOGLE_TEST "Build google tests" OFF) + +file(GLOB_RECURSE LIB_SRC + ${CMAKE_SOURCE_DIR}/shared/dam/*.cc + ${CMAKE_SOURCE_DIR}/shared/plugins/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc +) +message(STATUS "LIB_SRC files: ${LIB_SRC}") + +set(TARGET_NAME nvflare) +add_library(${TARGET_NAME} SHARED ${LIB_SRC}) +set_target_properties(${TARGET_NAME} PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ENABLE_EXPORTS ON +) +target_include_directories(${TARGET_NAME} PRIVATE + ${CMAKE_SOURCE_DIR}/shared/include + ${CMAKE_CURRENT_SOURCE_DIR}/src +) + +if (APPLE) + add_link_options("LINKER:-object_path_lto,$_lto.o") + add_link_options("LINKER:-cache_path_lto,${CMAKE_BINARY_DIR}/LTOCache") +endif () + +#-- Unit Tests +if(GOOGLE_TEST) + find_package(GTest REQUIRED) + enable_testing() + add_executable(nvflare_test) + target_link_libraries(nvflare_test PRIVATE ${TARGET_NAME}) + + + target_include_directories(nvflare_test PRIVATE + ${CMAKE_SOURCE_DIR}/shared/include + ${CMAKE_CURRENT_SOURCE_DIR}/src + ) + + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) + + add_test( + NAME TestNvflarePlugins + COMMAND nvflare_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +endif() diff --git a/integration/xgboost/encryption_plugins/nvflare_plugin/README.md b/integration/xgboost/encryption_plugins/nvflare_plugin/README.md new file mode 100644 index 0000000000..a2c20a5461 --- /dev/null +++ b/integration/xgboost/encryption_plugins/nvflare_plugin/README.md @@ -0,0 +1,5 @@ +# XGBoost nvflare plugin + +Includes: + - nvflare plugin (that does not do encryption in the C++ code but rely on NVFlare python side to do it) + - pass-thru plugin (that just pass through to showcase how to write a plugin) diff --git a/integration/xgboost/encryption_plugins/src/README.md b/integration/xgboost/encryption_plugins/nvflare_plugin/src/README.md similarity index 100% rename from integration/xgboost/encryption_plugins/src/README.md rename to integration/xgboost/encryption_plugins/nvflare_plugin/src/README.md diff --git a/integration/xgboost/encryption_plugins/src/plugins/delegated_plugin.cc b/integration/xgboost/encryption_plugins/nvflare_plugin/src/delegated_plugin.cc similarity index 100% rename from integration/xgboost/encryption_plugins/src/plugins/delegated_plugin.cc rename to integration/xgboost/encryption_plugins/nvflare_plugin/src/delegated_plugin.cc diff --git a/integration/xgboost/encryption_plugins/src/plugins/nvflare_plugin.cc b/integration/xgboost/encryption_plugins/nvflare_plugin/src/nvflare_plugin.cc similarity index 100% rename from integration/xgboost/encryption_plugins/src/plugins/nvflare_plugin.cc rename to integration/xgboost/encryption_plugins/nvflare_plugin/src/nvflare_plugin.cc diff --git a/integration/xgboost/encryption_plugins/src/include/nvflare_plugin.h b/integration/xgboost/encryption_plugins/nvflare_plugin/src/nvflare_plugin.h similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/nvflare_plugin.h rename to integration/xgboost/encryption_plugins/nvflare_plugin/src/nvflare_plugin.h diff --git a/integration/xgboost/encryption_plugins/src/plugins/pass_thru_plugin.cc b/integration/xgboost/encryption_plugins/nvflare_plugin/src/pass_thru_plugin.cc similarity index 100% rename from integration/xgboost/encryption_plugins/src/plugins/pass_thru_plugin.cc rename to integration/xgboost/encryption_plugins/nvflare_plugin/src/pass_thru_plugin.cc diff --git a/integration/xgboost/encryption_plugins/src/include/pass_thru_plugin.h b/integration/xgboost/encryption_plugins/nvflare_plugin/src/pass_thru_plugin.h similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/pass_thru_plugin.h rename to integration/xgboost/encryption_plugins/nvflare_plugin/src/pass_thru_plugin.h diff --git a/integration/xgboost/encryption_plugins/tests/CMakeLists.txt b/integration/xgboost/encryption_plugins/nvflare_plugin/tests/CMakeLists.txt similarity index 100% rename from integration/xgboost/encryption_plugins/tests/CMakeLists.txt rename to integration/xgboost/encryption_plugins/nvflare_plugin/tests/CMakeLists.txt diff --git a/integration/xgboost/encryption_plugins/tests/test_dam.cc b/integration/xgboost/encryption_plugins/nvflare_plugin/tests/test_dam.cc similarity index 100% rename from integration/xgboost/encryption_plugins/tests/test_dam.cc rename to integration/xgboost/encryption_plugins/nvflare_plugin/tests/test_dam.cc diff --git a/integration/xgboost/encryption_plugins/tests/test_main.cc b/integration/xgboost/encryption_plugins/nvflare_plugin/tests/test_main.cc similarity index 100% rename from integration/xgboost/encryption_plugins/tests/test_main.cc rename to integration/xgboost/encryption_plugins/nvflare_plugin/tests/test_main.cc diff --git a/integration/xgboost/encryption_plugins/tests/test_tenseal.py b/integration/xgboost/encryption_plugins/nvflare_plugin/tests/test_tenseal.py similarity index 100% rename from integration/xgboost/encryption_plugins/tests/test_tenseal.py rename to integration/xgboost/encryption_plugins/nvflare_plugin/tests/test_tenseal.py diff --git a/integration/xgboost/encryption_plugins/src/dam/README.md b/integration/xgboost/encryption_plugins/shared/dam/README.md old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/dam/README.md rename to integration/xgboost/encryption_plugins/shared/dam/README.md diff --git a/integration/xgboost/encryption_plugins/src/dam/dam.cc b/integration/xgboost/encryption_plugins/shared/dam/dam.cc old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/dam/dam.cc rename to integration/xgboost/encryption_plugins/shared/dam/dam.cc diff --git a/integration/xgboost/encryption_plugins/src/include/base_plugin.h b/integration/xgboost/encryption_plugins/shared/include/base_plugin.h old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/base_plugin.h rename to integration/xgboost/encryption_plugins/shared/include/base_plugin.h diff --git a/integration/xgboost/encryption_plugins/src/include/dam.h b/integration/xgboost/encryption_plugins/shared/include/dam.h old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/dam.h rename to integration/xgboost/encryption_plugins/shared/include/dam.h diff --git a/integration/xgboost/encryption_plugins/src/include/data_set_ids.h b/integration/xgboost/encryption_plugins/shared/include/data_set_ids.h old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/data_set_ids.h rename to integration/xgboost/encryption_plugins/shared/include/data_set_ids.h diff --git a/integration/xgboost/encryption_plugins/src/include/delegated_plugin.h b/integration/xgboost/encryption_plugins/shared/include/delegated_plugin.h old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/delegated_plugin.h rename to integration/xgboost/encryption_plugins/shared/include/delegated_plugin.h diff --git a/integration/xgboost/encryption_plugins/src/include/local_plugin.h b/integration/xgboost/encryption_plugins/shared/include/local_plugin.h old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/local_plugin.h rename to integration/xgboost/encryption_plugins/shared/include/local_plugin.h diff --git a/integration/xgboost/encryption_plugins/src/include/util.h b/integration/xgboost/encryption_plugins/shared/include/util.h old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/include/util.h rename to integration/xgboost/encryption_plugins/shared/include/util.h diff --git a/integration/xgboost/encryption_plugins/src/plugins/local_plugin.cc b/integration/xgboost/encryption_plugins/shared/plugins/local_plugin.cc old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/plugins/local_plugin.cc rename to integration/xgboost/encryption_plugins/shared/plugins/local_plugin.cc diff --git a/integration/xgboost/encryption_plugins/src/plugins/plugin_main.cc b/integration/xgboost/encryption_plugins/shared/plugins/plugin_main.cc old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/plugins/plugin_main.cc rename to integration/xgboost/encryption_plugins/shared/plugins/plugin_main.cc diff --git a/integration/xgboost/encryption_plugins/src/plugins/util.cc b/integration/xgboost/encryption_plugins/shared/plugins/util.cc old mode 100644 new mode 100755 similarity index 100% rename from integration/xgboost/encryption_plugins/src/plugins/util.cc rename to integration/xgboost/encryption_plugins/shared/plugins/util.cc