Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring Fusion Executor, pulling out compiled kernel #3468

Open
wants to merge 41 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
6bdd7f5
Redraft pulling compiled kernel out of kernel executor.
csarofeen Nov 24, 2024
a7fca2e
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Nov 30, 2024
dbb0554
Cleanup and preparation to cleanup executor_utils.h
csarofeen Nov 30, 2024
b7c9e7c
Move compilation logic out of executor_utils into compiled_kernel.
csarofeen Nov 30, 2024
b61ac3a
Remove compilation profiling from compiled kernel as it's still calle…
csarofeen Nov 30, 2024
6177db2
cleanup
csarofeen Nov 30, 2024
0e3cdd9
Fix input binding in executor.
csarofeen Dec 1, 2024
3961181
Kernel executor doesn't instantiate compiled kernel unless compiled, …
csarofeen Dec 2, 2024
f2b00bb
Fix type consistency in st matrix testing.
csarofeen Dec 3, 2024
56dda45
Fix build.
csarofeen Dec 3, 2024
5999480
Need to be consistent with types for fusion.manage.
csarofeen Dec 7, 2024
5920d34
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Dec 7, 2024
a7ad429
Repair serialization.
csarofeen Dec 8, 2024
d89d155
Fix check that disables parameter cache, the check was valid before l…
csarofeen Dec 15, 2024
2933509
Merge branch 'main' into compiled_kernel_2
csarofeen Dec 15, 2024
b222303
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Dec 18, 2024
239d652
Merge.
csarofeen Dec 18, 2024
8137228
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Dec 23, 2024
a9733b0
Merge conflicts.
csarofeen Dec 23, 2024
da1452c
Fix lowering hooks, rename compileFusion to compile.
csarofeen Dec 23, 2024
e1fcd7f
Fix param cache check with validation.
csarofeen Dec 25, 2024
b66e6c5
Remove refactor validation.
csarofeen Dec 25, 2024
7dc8d09
Merge branch 'main' into compiled_kernel_2
csarofeen Dec 25, 2024
90a76e4
Remove CompileOptions, make scheduler_type, fusion_id, concrete_id, r…
csarofeen Dec 31, 2024
5d20a08
Cleanup TODO
csarofeen Dec 31, 2024
102e72e
Separate out compiling and running a kernel through RTC directly from…
csarofeen Dec 31, 2024
d993887
Remove default constructor for CompiledKernel.
csarofeen Dec 31, 2024
d78d8b2
Cleanup TODO
csarofeen Dec 31, 2024
1209520
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Dec 31, 2024
495cbf7
Merge conflict and clang tidy.
csarofeen Dec 31, 2024
bd1d312
Fix string construction.
csarofeen Jan 1, 2025
b458c7e
Cleanup disableLaunchParamCache.
csarofeen Jan 1, 2025
c199020
Remove schduler_type from KernelExecutor.
csarofeen Jan 1, 2025
e008bc7
PR review and minor cleanup.
csarofeen Jan 11, 2025
fb38b77
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Jan 11, 2025
7ac7121
Merge conflicts.
csarofeen Jan 11, 2025
2836d60
Pass block size to codegen.
csarofeen Jan 11, 2025
df789cb
Revert constexpr-const change in tests/cpp/utils.h, needed for clang …
csarofeen Jan 12, 2025
ffb901f
Merge branch 'main' into compiled_kernel_2
csarofeen Jan 13, 2025
c5939f0
Merge branch 'main' of https://github.com/NVIDIA/Fuser into compiled_…
csarofeen Jan 20, 2025
9163101
Fix merge conflicts.
csarofeen Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ list(APPEND NVFUSER_SRCS
${NVFUSER_SRCS_DIR}/preseg_passes/segment_inplace_update.cpp
${NVFUSER_SRCS_DIR}/rng.cpp
${NVFUSER_SRCS_DIR}/runtime/allocations.cpp
${NVFUSER_SRCS_DIR}/runtime/compiled_kernel.cpp
${NVFUSER_SRCS_DIR}/runtime/executor.cpp
${NVFUSER_SRCS_DIR}/runtime/executor_dispatch.cpp
${NVFUSER_SRCS_DIR}/runtime/executor_kernel_arg.cpp
Expand Down
9 changes: 6 additions & 3 deletions benchmarks/cpp/matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ static void SingleMatmulBase(
KernelExecutor ke;
ke.compile(fusion, args, launch_constraints, cparams);
NVF_CHECK(
getBankConflictInfo(ke.kernel(), launch_constraints).empty(),
getBankConflictInfo(ke.compiledKernel()->kernel(), launch_constraints)
.empty(),
"Shared memory bank conflict not removed.");

std::vector<c10::IValue> aten_inputs({inputs.first, inputs.second});
Expand Down Expand Up @@ -358,7 +359,7 @@ static void SingleMatmulPartitionedK(
auto lparams = LaunchParams();
ke.compile(fusion, args, lparams, cparams);
NVF_CHECK(
getBankConflictInfo(ke.kernel(), lparams).empty(),
getBankConflictInfo(ke.compiledKernel()->kernel(), lparams).empty(),
"Shared memory bank conflict not removed.");

// Warm up run
Expand Down Expand Up @@ -471,7 +472,9 @@ static void NvFuserScheduler_MatmulSplitKReduction(
fusion, args, heuristic_params->lparams, heuristic_params->cparams);

NVF_CHECK(
getBankConflictInfo(ke.kernel(), heuristic_params->lparams).empty(),
getBankConflictInfo(
ke.compiledKernel()->kernel(), heuristic_params->lparams)
.empty(),
"Shared memory bank conflict not removed.");

// Warm up run
Expand Down
1 change: 1 addition & 0 deletions csrc/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <cuda_runtime.h>
#include <driver_api.h>
#include <exceptions.h>
#include <nvrtc.h>

#define NVFUSER_NVRTC_SAFE_CALL(x) \
do { \
Expand Down
2 changes: 2 additions & 0 deletions csrc/fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ void Fusion::removeVal(Val* val) {
void Fusion::addInput(Val* input) {
assertInContainer(input, "Cannot register input ");

std::cout << "Registering input: " << input->toString() << std::endl;
csarofeen marked this conversation as resolved.
Show resolved Hide resolved

if (input->getValType().value() == ValType::TensorView) {
auto tv = input->as<TensorView>();
tv->setMemoryType(MemoryType::Global);
Expand Down
6 changes: 2 additions & 4 deletions csrc/polymorphic_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// clang-format on
#include <polymorphic_value.h>
#include <type.h>
#include <utils.h>

#include <string>

Expand Down Expand Up @@ -44,10 +45,7 @@ namespace PolymorphicValue_functions {
std::string toString(const PolymorphicValue& v) {
std::stringstream ss;
if (v.is<at::Tensor>()) {
const auto& t = v.as<at::Tensor>();
ss << "Tensor(sizes=" << t.sizes() << ", "
<< "stride=" << t.strides() << ", dtype=" << t.dtype()
<< ", device=" << t.device() << ", data_ptr=" << t.data_ptr() << ")";
ss << debug_str(v.as<at::Tensor>());
} else if (v.is<std::monostate>()) {
ss << "std::monostate";
} else if (v.is<StructHandle>()) {
Expand Down
14 changes: 8 additions & 6 deletions csrc/python_frontend/fusion_definition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,10 +487,11 @@ std::string FusionDefinition::lastCudaCode(

if (!override_user_schedule && (user_exec != nullptr)) {
if (intrinsic_code) {
result = user_exec->getStructuredCode(
user_exec->kernelString(), user_exec->kernel()->indexType());
result = user_exec->compiledKernel()->getStructuredCode(
user_exec->compiledKernel()->kernelString(),
user_exec->compiledKernel()->kernel()->indexType());
} else {
result = user_exec->kernelString();
result = user_exec->compiledKernel()->kernelString();
}
} else {
result = scheds->auto_gen_schedules->getMostRecentCode(intrinsic_code);
Expand All @@ -516,10 +517,11 @@ std::string FusionDefinition::cudaCodeFor(
scheds, user_sched_id.value(), device);
auto user_exec = user_sched.executor.get();
if (intrinsic_code) {
return user_exec->getStructuredCode(
user_exec->kernelString(), user_exec->kernel()->indexType());
return user_exec->compiledKernel()->getStructuredCode(
user_exec->compiledKernel()->kernelString(),
user_exec->compiledKernel()->kernel()->indexType());
} else {
return user_exec->kernelString();
return user_exec->compiledKernel()->kernelString();
}
}
}
Expand Down
Loading
Loading