[GPU] Fix aot issue caused by fused einsum build (#2450)

intel · Oct 26, 2023 · ab902a7 · ab902a7
1 parent 949b526
commit ab902a7
Show file tree

Hide file tree

Showing 9 changed files with 40 additions and 20 deletions.
diff --git a/docs/install/install_for_cpp.md b/docs/install/install_for_cpp.md
@@ -169,6 +169,12 @@ $ bazel build -c opt --config=gpu //itex:libitex_gpu_cc.so
 
 CC library location: `<Path to intel-extension-for-tensorflow>/bazel-bin/itex/libitex_gpu_cc.so`
 
+NOTE: `libitex_gpu_cc.so` is depended on `libitex_gpu_xetla.so`, so `libitex_gpu_xetla.so` shoule be copied to the same diretcory of `libitex_gpu_cc.so`
+```bash
+$ cd <Path to intel-extension-for-tensorflow>
+$ cp bazel-out/k8-opt-ST-*/bin/itex/core/kernels/gpu/libitex_gpu_xetla.so bazel-bin/itex/
+```
+
 For CPU support
 
 ```bash

diff --git a/itex/BUILD b/itex/BUILD
@@ -132,6 +132,7 @@ itex_xpu_binary(
             "//itex/core/graph:xpu_graph",
             "//itex/core/kernels:xpu_kernel_cc",
             "//itex/core/profiler:gpu_profiler",
+            "//itex/core/kernels/gpu:libitex_gpu_xetla",
         ],
     ) + [
         "//itex/core:protos_all_cc",

diff --git a/itex/core/kernels/gpu/BUILD b/itex/core/kernels/gpu/BUILD
@@ -16,6 +16,7 @@ itex_xetla_binary(
     set_target = "gpu_orig_backend",
     visibility = ["//visibility:public"],
     deps = [
+        "//itex/core/kernels/gpu/xetla:fused_einsum_impl",
         "//itex/core/kernels/gpu/xetla:mha_op",
         "//itex/core/kernels/gpu/xetla:mlp_op",
     ],

diff --git a/itex/core/kernels/gpu/linalg/BUILD b/itex/core/kernels/gpu/linalg/BUILD
@@ -1,4 +1,4 @@
-load("//itex:itex.bzl", "itex_xetla_library", "itex_xpu_library", "tf_copts")
+load("//itex:itex.bzl", "itex_xpu_library", "tf_copts")
 
 itex_xpu_library(
     name = "linalg",
@@ -37,19 +37,14 @@ itex_xpu_library(
     alwayslink = True,
 )
 
-itex_xetla_library(
-    name = "fused_einsum_impl",
-    srcs = ["fused_einsum_impl.cc"],
-    hdrs = [
-        "einsum_helper.h",
-        "fused_einsum_impl.h",
-    ],
+itex_xpu_library(
+    name = "fused_einsum_hdrs",
+    hdrs = ["fused_einsum_helper.h"],
     copts = tf_copts(),
     linkstatic = 1,
     visibility = ["//visibility:public"],
     deps = [
         "//itex:core",
-        "@xetla//:xetla_header",
     ],
     alwayslink = True,
 )
@@ -58,17 +53,17 @@ itex_xpu_library(
     name = "einsum_op_impl",
     srcs = ["einsum_op_impl.cc"],
     hdrs = [
-        "einsum_helper.h",
         "//itex/core/kernels/common:einsum_hdrs",
     ],
     copts = tf_copts(),
     linkstatic = 1,
     visibility = ["//visibility:public"],
     deps = [
         "//itex/core/kernels/common:fill_functor",
+        "//itex/core/kernels/gpu:libitex_gpu_xetla",
         "//itex/core/kernels/gpu:matmul_op",
         "//itex/core/kernels/gpu:reduction_ops",
-        "//itex/core/kernels/gpu/linalg:fused_einsum_impl",
+        "//itex/core/kernels/gpu/linalg:fused_einsum_hdrs",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],

diff --git a/itex/core/kernels/gpu/linalg/einsum_op_impl.cc b/itex/core/kernels/gpu/linalg/einsum_op_impl.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include "itex/core/kernels/common/einsum_op_impl.h"
 
-#include "itex/core/kernels/gpu/linalg/einsum_helper.h"
+#include "itex/core/kernels/gpu/linalg/fused_einsum_helper.h"
 
 namespace itex {
 

diff --git a/itex/core/kernels/gpu/linalg/einsum_helper.h → .../kernels/gpu/linalg/fused_einsum_helper.h b/itex/core/kernels/gpu/linalg/einsum_helper.h → .../kernels/gpu/linalg/fused_einsum_helper.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  ******************************************************************************/
 
-#ifndef ITEX_CORE_KERNELS_GPU_LINALG_EINSUM_HELPER_H_
-#define ITEX_CORE_KERNELS_GPU_LINALG_EINSUM_HELPER_H_
+#ifndef ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_HELPER_H_
+#define ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_HELPER_H_
 
 #include "itex/core/utils/op_kernel.h"
 #include "itex/core/utils/plugin_tensor.h"
@@ -122,4 +122,4 @@ void Dispatch(Args<T>& args) {  // NOLINT
 }  // namespace functor
 }  // namespace itex
 
-#endif  // ITEX_CORE_KERNELS_GPU_LINALG_EINSUM_HELPER_H_
+#endif  // ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_HELPER_H_
diff --git a/itex/core/kernels/gpu/xetla/BUILD b/itex/core/kernels/gpu/xetla/BUILD
@@ -43,3 +43,20 @@ itex_xetla_library(
     ],
     alwayslink = True,
 )
+
+itex_xetla_library(
+    name = "fused_einsum_impl",
+    srcs = ["fused_einsum_impl.cc"],
+    hdrs = [
+        "fused_einsum_impl.h",
+    ],
+    copts = tf_copts(),
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//itex:core",
+        "//itex/core/kernels/gpu/linalg:fused_einsum_hdrs",
+        "@xetla//:xetla_header",
+    ],
+    alwayslink = True,
+)
diff --git a/...e/kernels/gpu/linalg/fused_einsum_impl.cc → ...re/kernels/gpu/xetla/fused_einsum_impl.cc b/...e/kernels/gpu/linalg/fused_einsum_impl.cc → ...re/kernels/gpu/xetla/fused_einsum_impl.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  ******************************************************************************/
 
-#include "itex/core/kernels/gpu/linalg/fused_einsum_impl.h"
+#include "itex/core/kernels/gpu/xetla/fused_einsum_impl.h"
 
-#include "itex/core/kernels/gpu/linalg/einsum_helper.h"
+#include "itex/core/kernels/gpu/linalg/fused_einsum_helper.h"
 #include "itex/core/utils/op_requires.h"
 
 namespace itex {

diff --git a/...re/kernels/gpu/linalg/fused_einsum_impl.h → ...ore/kernels/gpu/xetla/fused_einsum_impl.h b/...re/kernels/gpu/linalg/fused_einsum_impl.h → ...ore/kernels/gpu/xetla/fused_einsum_impl.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  ******************************************************************************/
 
-#ifndef ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_IMPL_H_
-#define ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_IMPL_H_
+#ifndef ITEX_CORE_KERNELS_GPU_XETLA_FUSED_EINSUM_IMPL_H_
+#define ITEX_CORE_KERNELS_GPU_XETLA_FUSED_EINSUM_IMPL_H_
 
 #include <xetla.hpp>
 
@@ -236,4 +236,4 @@ class FusedEinsumKernel {
 
 }  // namespace gpu::xetla
 
-#endif  // ITEX_CORE_KERNELS_GPU_LINALG_FUSED_EINSUM_IMPL_H_
+#endif  // ITEX_CORE_KERNELS_GPU_XETLA_FUSED_EINSUM_IMPL_H_