diff --git a/docs/install/install_for_cpp.md b/docs/install/install_for_cpp.md index 7d86e8bd5..f0850712b 100644 --- a/docs/install/install_for_cpp.md +++ b/docs/install/install_for_cpp.md @@ -169,6 +169,12 @@ $ bazel build -c opt --config=gpu //itex:libitex_gpu_cc.so CC library location: `/bazel-bin/itex/libitex_gpu_cc.so` +NOTE: `libitex_gpu_cc.so` is depended on `libitex_gpu_xetla.so`, so `libitex_gpu_xetla.so` shoule be copied to the same diretcory of `libitex_gpu_cc.so` +```bash +$ cd +$ cp bazel-out/k8-opt-ST-*/bin/itex/core/kernels/gpu/libitex_gpu_xetla.so bazel-bin/itex/ +``` + For CPU support ```bash diff --git a/itex/BUILD b/itex/BUILD index 4c7a343e2..fbd50115e 100644 --- a/itex/BUILD +++ b/itex/BUILD @@ -132,6 +132,7 @@ itex_xpu_binary( "//itex/core/graph:xpu_graph", "//itex/core/kernels:xpu_kernel_cc", "//itex/core/profiler:gpu_profiler", + "//itex/core/kernels/gpu:libitex_gpu_xetla", ], ) + [ "//itex/core:protos_all_cc", diff --git a/itex/core/kernels/gpu/BUILD b/itex/core/kernels/gpu/BUILD index 4ac0c00fd..fb6b16b48 100644 --- a/itex/core/kernels/gpu/BUILD +++ b/itex/core/kernels/gpu/BUILD @@ -16,6 +16,7 @@ itex_xetla_binary( set_target = "gpu_orig_backend", visibility = ["//visibility:public"], deps = [ + "//itex/core/kernels/gpu/xetla:fused_einsum_impl", "//itex/core/kernels/gpu/xetla:mha_op", "//itex/core/kernels/gpu/xetla:mlp_op", ], diff --git a/itex/core/kernels/gpu/linalg/BUILD b/itex/core/kernels/gpu/linalg/BUILD index b97793fe3..d2dbd0864 100644 --- a/itex/core/kernels/gpu/linalg/BUILD +++ b/itex/core/kernels/gpu/linalg/BUILD @@ -1,4 +1,4 @@ -load("//itex:itex.bzl", "itex_xetla_library", "itex_xpu_library", "tf_copts") +load("//itex:itex.bzl", "itex_xpu_library", "tf_copts") itex_xpu_library( name = "linalg", @@ -37,19 +37,14 @@ itex_xpu_library( alwayslink = True, ) -itex_xetla_library( - name = "fused_einsum_impl", - srcs = ["fused_einsum_impl.cc"], - hdrs = [ - "einsum_helper.h", - "fused_einsum_impl.h", - ], +itex_xpu_library( + name = "fused_einsum_hdrs", + hdrs = ["fused_einsum_helper.h"], copts = tf_copts(), linkstatic = 1, visibility = ["//visibility:public"], deps = [ "//itex:core", - "@xetla//:xetla_header", ], alwayslink = True, ) @@ -58,7 +53,6 @@ itex_xpu_library( name = "einsum_op_impl", srcs = ["einsum_op_impl.cc"], hdrs = [ - "einsum_helper.h", "//itex/core/kernels/common:einsum_hdrs", ], copts = tf_copts(), @@ -66,9 +60,10 @@ itex_xpu_library( visibility = ["//visibility:public"], deps = [ "//itex/core/kernels/common:fill_functor", + "//itex/core/kernels/gpu:libitex_gpu_xetla", "//itex/core/kernels/gpu:matmul_op", "//itex/core/kernels/gpu:reduction_ops", - "//itex/core/kernels/gpu/linalg:fused_einsum_impl", + "//itex/core/kernels/gpu/linalg:fused_einsum_hdrs", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], diff --git a/itex/core/kernels/gpu/linalg/einsum_op_impl.cc b/itex/core/kernels/gpu/linalg/einsum_op_impl.cc index 6091458bb..9143a1b6f 100644 --- a/itex/core/kernels/gpu/linalg/einsum_op_impl.cc +++ b/itex/core/kernels/gpu/linalg/einsum_op_impl.cc @@ -17,7 +17,7 @@ limitations under the License. #include "itex/core/kernels/common/einsum_op_impl.h" -#include "itex/core/kernels/gpu/linalg/einsum_helper.h" +#include "itex/core/kernels/gpu/linalg/fused_einsum_helper.h" namespace itex { diff --git a/itex/core/kernels/gpu/linalg/einsum_helper.h b/itex/core/kernels/gpu/linalg/fused_einsum_helper.h similarity index 95% rename from itex/core/kernels/gpu/linalg/einsum_helper.h rename to itex/core/kernels/gpu/linalg/fused_einsum_helper.h index ccee56d5d..07f5b1c1d 100644 --- a/itex/core/kernels/gpu/linalg/einsum_helper.h +++ b/itex/core/kernels/gpu/linalg/fused_einsum_helper.h @@ -14,8 +14,8 @@ * limitations under the License. ******************************************************************************/ -#ifndef ITEX_CORE_KERNELS_GPU_LINALG_EINSUM_HELPER_H_ -#define ITEX_CORE_KERNELS_GPU_LINALG_EINSUM_HELPER_H_ +#ifndef ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_HELPER_H_ +#define ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_HELPER_H_ #include "itex/core/utils/op_kernel.h" #include "itex/core/utils/plugin_tensor.h" @@ -122,4 +122,4 @@ void Dispatch(Args& args) { // NOLINT } // namespace functor } // namespace itex -#endif // ITEX_CORE_KERNELS_GPU_LINALG_EINSUM_HELPER_H_ +#endif // ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_HELPER_H_ diff --git a/itex/core/kernels/gpu/xetla/BUILD b/itex/core/kernels/gpu/xetla/BUILD index 0f8cb49bb..a8f8e04ef 100644 --- a/itex/core/kernels/gpu/xetla/BUILD +++ b/itex/core/kernels/gpu/xetla/BUILD @@ -43,3 +43,20 @@ itex_xetla_library( ], alwayslink = True, ) + +itex_xetla_library( + name = "fused_einsum_impl", + srcs = ["fused_einsum_impl.cc"], + hdrs = [ + "fused_einsum_impl.h", + ], + copts = tf_copts(), + linkstatic = 1, + visibility = ["//visibility:public"], + deps = [ + "//itex:core", + "//itex/core/kernels/gpu/linalg:fused_einsum_hdrs", + "@xetla//:xetla_header", + ], + alwayslink = True, +) diff --git a/itex/core/kernels/gpu/linalg/fused_einsum_impl.cc b/itex/core/kernels/gpu/xetla/fused_einsum_impl.cc similarity index 97% rename from itex/core/kernels/gpu/linalg/fused_einsum_impl.cc rename to itex/core/kernels/gpu/xetla/fused_einsum_impl.cc index 1f4d01473..31237b2d9 100644 --- a/itex/core/kernels/gpu/linalg/fused_einsum_impl.cc +++ b/itex/core/kernels/gpu/xetla/fused_einsum_impl.cc @@ -14,9 +14,9 @@ * limitations under the License. ******************************************************************************/ -#include "itex/core/kernels/gpu/linalg/fused_einsum_impl.h" +#include "itex/core/kernels/gpu/xetla/fused_einsum_impl.h" -#include "itex/core/kernels/gpu/linalg/einsum_helper.h" +#include "itex/core/kernels/gpu/linalg/fused_einsum_helper.h" #include "itex/core/utils/op_requires.h" namespace itex { diff --git a/itex/core/kernels/gpu/linalg/fused_einsum_impl.h b/itex/core/kernels/gpu/xetla/fused_einsum_impl.h similarity index 98% rename from itex/core/kernels/gpu/linalg/fused_einsum_impl.h rename to itex/core/kernels/gpu/xetla/fused_einsum_impl.h index 369e1fe14..fa60a91f0 100644 --- a/itex/core/kernels/gpu/linalg/fused_einsum_impl.h +++ b/itex/core/kernels/gpu/xetla/fused_einsum_impl.h @@ -14,8 +14,8 @@ * limitations under the License. ******************************************************************************/ -#ifndef ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_IMPL_H_ -#define ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_IMPL_H_ +#ifndef ITEX_CORE_KERNELS_GPU_XETLA_FUSED_EINSUM_IMPL_H_ +#define ITEX_CORE_KERNELS_GPU_XETLA_FUSED_EINSUM_IMPL_H_ #include @@ -236,4 +236,4 @@ class FusedEinsumKernel { } // namespace gpu::xetla -#endif // ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_IMPL_H_ +#endif // ITEX_CORE_KERNELS_GPU_XETLA_FUSED_EINSUM_IMPL_H_