Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onnxruntime/build] Add new flag enable_generic_interface to build primary EPs by default #23342

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF)
option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF)

option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF)
karim-vad marked this conversation as resolved.
Show resolved Hide resolved
option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF)
option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF)
option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF)
option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF)

# ENABLE_TRAINING includes all training functionality
# The following 2 entry points
# 1. ORTModule
Expand Down Expand Up @@ -703,7 +709,7 @@ if (WIN32)
# structure was padded due to __declspec(align())
list(APPEND ORT_WARNING_FLAGS "/wd4324")
# warning C4800: Implicit conversion from 'X' to bool. Possible information loss
if (onnxruntime_USE_OPENVINO)
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
list(APPEND ORT_WARNING_FLAGS "/wd4800")
endif()
# operator 'operator-name': deprecated between enumerations of different types
Expand Down Expand Up @@ -864,7 +870,7 @@ else()
set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()

if (onnxruntime_USE_CUDA)
if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda)
Expand All @@ -888,7 +894,7 @@ if (onnxruntime_USE_CUDA)
endif()
endif()

if (onnxruntime_USE_VITISAI)
if (onnxruntime_USE_VITISAI OR onnxruntime_USE_VITISAI_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai)
Expand All @@ -898,12 +904,12 @@ if (onnxruntime_USE_DNNL)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1)
endif()
if (onnxruntime_USE_OPENVINO)
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1)
endif()
if (onnxruntime_USE_TENSORRT)
if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_TENSORRT_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1)
#TODO: remove the following line and change the test code in onnxruntime_shared_lib_test to use the new EP API.
list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt)
Expand All @@ -929,7 +935,7 @@ if (onnxruntime_USE_JSEP)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES js)
endif()
if (onnxruntime_USE_QNN)
if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn)
Expand Down Expand Up @@ -957,7 +963,7 @@ if (onnxruntime_USE_QNN)
endif()
endif()

if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
if ((NOT onnxruntime_USE_QNN_INTERFACE) AND (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"))
file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libQnn*.so"
"${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/Qnn*.dll"
"${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libHtpPrepare.so"
Expand Down Expand Up @@ -1416,7 +1422,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS)
)
endif()

if (onnxruntime_USE_OPENVINO)
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)

add_definitions(-DUSE_OPENVINO=1)

Expand All @@ -1429,7 +1435,7 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_GPU=1)
endif()

if (onnxruntime_USE_OPENVINO_CPU)
if (onnxruntime_USE_OPENVINO_CPU OR onnxruntime_USE_OPENVINO_INTERFACE) # OpenVino CPU interface is default built.
add_definitions(-DOPENVINO_CONFIG_CPU=1)
endif()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ struct ProviderHost {
virtual std::string demangle(const char* name) = 0;
virtual std::string demangle(const std::string& name) = 0;

#ifdef USE_CUDA
virtual std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(const char* name) = 0;
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() = 0;
Expand All @@ -190,7 +189,6 @@ struct ProviderHost {

virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0;
virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0;
#endif

#ifdef USE_MIGRAPHX
virtual std::unique_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0;
Expand All @@ -200,7 +198,6 @@ struct ProviderHost {
#ifdef USE_ROCM
virtual std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(const char* name) = 0;
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() = 0;

virtual void rocm__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) = 0;
virtual void rocm__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) = 0;
Expand Down Expand Up @@ -1256,9 +1253,7 @@ struct ProviderHost {
virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0;
#endif

#if defined(USE_CUDA) || defined(USE_ROCM)
virtual PhiloxGenerator& PhiloxGenerator__Default() = 0;
#endif

#ifdef ENABLE_TRAINING_TORCH_INTEROP
virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0;
Expand Down
7 changes: 2 additions & 5 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,8 @@ struct ProviderHostImpl : ProviderHost {
void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); }
karim-vad marked this conversation as resolved.
Show resolved Hide resolved
void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); }

#ifdef USE_CUDA
std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); }
std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); }
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); }

void cuda__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); }
void cuda__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); }
Expand All @@ -271,7 +269,6 @@ struct ProviderHostImpl : ProviderHost {

Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); }
void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); }
#endif

#ifdef USE_MIGRAPHX
std::unique_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); }
Expand All @@ -291,6 +288,8 @@ struct ProviderHostImpl : ProviderHost {

Status RocmCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_ROCM().RocmCall_false(retCode, exprString, libName, successCode, msg, file, line); }
void RocmCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_ROCM().RocmCall_true(retCode, exprString, libName, successCode, msg, file, line); }
#else
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); }
#endif

std::string GetEnvironmentVar(const std::string& var_name) override { return Env::Default().GetEnvironmentVar(var_name); }
Expand Down Expand Up @@ -1560,9 +1559,7 @@ struct ProviderHostImpl : ProviderHost {
training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); }
#endif

#if defined(USE_CUDA) || defined(USE_ROCM)
PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); }
#endif

#ifdef ENABLE_TRAINING_TORCH_INTEROP
void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); }
Expand Down
40 changes: 35 additions & 5 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,12 @@
parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels")
parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.")

parser.add_argument(
"--enable_generic_interface",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should have a CI build that builds onnxruntime with this --enable_generic_interface option (without explicitly enabling any EPs). This would help us ensure that the basic build works and would catch future regressions. Let me see if I can think of where we could add this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can add a new build stage to the existing Windows CPU CI Pipeline: https://github.com/microsoft/onnxruntime/blob/06fc73b7d4d80bd97e140776590d98b868c7bc3a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml#L160C1-L179C1

Maybe something like:

- stage: x64_release_ep_generic_interface
  dependsOn: []
  jobs:
    - template: templates/jobs/win-ci-vs-2022-job.yml
      parameters:
        BuildConfig: 'RelWithDebInfo'
        buildArch: x64
        additionalBuildFlags: --enable_generic_interface
        msbuildPlatform: x64
        isX86: false
        job_name_suffix: x64_release_ep_generic_interface
        RunOnnxRuntimeTests: false  # --enable_generic_interface does not build tests
        EnablePython: false
        isTraining: false
        ORT_EP_NAME: CPU
        GenerateDocumentation: false
        WITH_CACHE: false
        MachinePool: 'onnxruntime-Win-CPU-2022'

Do you think this is appropriate @snnn ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And maybe in a future PR, we could enable --enable_generic_interface with each EP combination and run unit tests. That is:

  • build with --enable_generic_interface --use_tensorrt and run unit tests
  • build with --enable_generic_interface --use_qnn and run unit tests
  • etc.

We would have to allow building unit tests with --enable_generic_interface as long as only we're only building with one non-cpu EP.

Just a thought.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good consideration.
Few Qs?

  1. Do we want to enable generic build testing as part of this PR ?
  2. What tools/ci_build/build.py build argument corresponds to ORT_EP_NAME: CPU (templates/jobs/win-ci-vs-2022-job.yml)?
  3. Currently generic build has tests disabled. I guess we would have to relax that restriction when EP ( cpu or non-cpu) is also built with interface.

action="store_true",
help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests",
)

if not is_windows():
parser.add_argument(
"--allow_running_as_root",
Expand Down Expand Up @@ -1042,6 +1048,12 @@
"-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"),
"-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER="
+ ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"),
# interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs
karim-vad marked this conversation as resolved.
Show resolved Hide resolved
"-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_CUDA_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
"-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
# set vars for migraphx
"-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"),
"-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"),
Expand Down Expand Up @@ -1372,6 +1384,8 @@
cmake_args += ["-Donnxruntime_BUILD_QNN_EP_STATIC_LIB=ON"]
if args.android and args.use_qnn != "static_lib":
raise BuildError("Only support Android + QNN builds with QNN EP built as a static library.")
if args.use_qnn == "static_lib" and args.enable_generic_interface:
raise BuildError("Generic ORT interface only supported with QNN EP built as a shared library.")

if args.use_coreml:
cmake_args += ["-Donnxruntime_USE_COREML=ON"]
Expand Down Expand Up @@ -1529,6 +1543,12 @@
"-Donnxruntime_USE_FULL_PROTOBUF=ON",
]

# When this flag is enabled, that means we only build ONNXRuntime shared library, expecting some compatible EP
# shared lib being build in a seperate process. So we skip the test for now as ONNXRuntime shared lib built under

Check warning on line 1547 in tools/ci_build/build.py

View workflow job for this annotation

GitHub Actions / Optional Lint

[misspell] reported by reviewdog 🐶 "seperate" is a misspelling of "separate" Raw Output: ./tools/ci_build/build.py:1547:34: "seperate" is a misspelling of "separate"
# this flag is not expected to work alone
if args.enable_generic_interface:
jslhcl marked this conversation as resolved.
Show resolved Hide resolved
cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"]

if args.enable_lazy_tensor:
import torch

Expand Down Expand Up @@ -2649,6 +2669,9 @@
# Disable ONNX Runtime's builtin memory checker
args.disable_memleak_checker = True

if args.enable_generic_interface:
args.test = False

# If there was no explicit argument saying what to do, default
# to update, build and test (for native builds).
if not (args.update or args.clean or args.build or args.test or args.gen_doc):
Expand Down Expand Up @@ -2752,7 +2775,10 @@
source_dir = os.path.normpath(os.path.join(script_dir, "..", ".."))

# if using cuda, setup cuda paths and env vars
cuda_home, cudnn_home = setup_cuda_vars(args)
cuda_home = ""
cudnn_home = ""
if args.use_cuda:
cuda_home, cudnn_home = setup_cuda_vars(args)

mpi_home = args.mpi_home
nccl_home = args.nccl_home
Expand All @@ -2765,10 +2791,14 @@
armnn_home = args.armnn_home
armnn_libs = args.armnn_libs

qnn_home = args.qnn_home
qnn_home = ""
if args.use_qnn:
qnn_home = args.qnn_home

# if using tensorrt, setup tensorrt paths
tensorrt_home = setup_tensorrt_vars(args)
tensorrt_home = ""
if args.use_tensorrt:
tensorrt_home = setup_tensorrt_vars(args)

# if using migraphx, setup migraphx paths
migraphx_home = setup_migraphx_vars(args)
Expand Down Expand Up @@ -2853,9 +2883,9 @@
toolset = "host=" + host_arch + ",version=" + args.msvc_toolset
else:
toolset = "host=" + host_arch
if args.cuda_version:
if args.use_cuda and args.cuda_version:
toolset += ",cuda=" + args.cuda_version
elif args.cuda_home:
elif args.use_cuda and args.cuda_home:
toolset += ",cuda=" + args.cuda_home
if args.windows_sdk_version:
target_arch += ",version=" + args.windows_sdk_version
Expand Down
Loading