Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into hopper_warptile_split
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobhinkle committed Jan 17, 2025
2 parents 9de3202 + bf66a0c commit 41e2b94
Show file tree
Hide file tree
Showing 91 changed files with 4,431 additions and 489 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env:

jobs:
clang-build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -37,7 +37,7 @@ jobs:
python setup.py build
dynamic-type-meson:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ env:

jobs:
check-license:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -28,7 +28,7 @@ jobs:
test ! -s missing-header-files.txt
clang-tidy:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -72,7 +72,7 @@ jobs:
git --no-pager diff --diff-filter=d --name-only $head_commit | grep -e "csrc/.*\.cpp" -e "csrc/.*\.h" | xargs lintrunner --take CLANGTIDY --force-color
lintrunner:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand Down
31 changes: 28 additions & 3 deletions .github/workflows/nvfuser-ci-trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,34 @@ jobs:
args: ${{ env.args }}

# This job only runs for pull request comments
if: |
( startsWith(github.event.comment.body, '!build') || startsWith(github.event.comment.body, '!test') ) &&
(github.actor == 'xwang233' || github.actor == 'jjsjann123' || github.actor == 'chang-l' || github.actor == 'csarofeen' || github.actor == 'drzejan2' || github.actor == 'IvanYashchuk' || github.actor == 'jacobhinkle' || github.actor == 'kevinstephano' || github.actor == 'liqiangxl' || github.actor == 'mmigdal-nv' || github.actor == 'naoyam' || github.actor == 'ptrblck' || github.actor == 'rdspring1' || github.actor == 'samnordmann' || github.actor == 'zasdfgbnm' || github.actor == 'crcrpar' || github.actor == 'nWEIdia' || github.actor == 'Priya2698' || github.actor == 'wujingyue' || github.actor == 'tfogal' || github.actor == 'protonu' || github.actor == 'cowanmeg' || github.actor == 'nsarka')
if: >-
( startsWith(github.event.comment.body, '!build') ||
startsWith(github.event.comment.body, '!test')
) &&
( github.actor == 'xwang233' ||
github.actor == 'jjsjann123' ||
github.actor == 'chang-l' ||
github.actor == 'csarofeen' ||
github.actor == 'drzejan2' ||
github.actor == 'IvanYashchuk' ||
github.actor == 'jacobhinkle' ||
github.actor == 'kevinstephano' ||
github.actor == 'liqiangxl' ||
github.actor == 'mmigdal-nv' ||
github.actor == 'naoyam' ||
github.actor == 'ptrblck' ||
github.actor == 'rdspring1' ||
github.actor == 'samnordmann' ||
github.actor == 'zasdfgbnm' ||
github.actor == 'crcrpar' ||
github.actor == 'nWEIdia' ||
github.actor == 'Priya2698' ||
github.actor == 'wujingyue' ||
github.actor == 'tfogal' ||
github.actor == 'protonu' ||
github.actor == 'cowanmeg' ||
github.actor == 'nsarka'
)
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ name: pull
on:
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id}}
cancel-in-progress: true

run-name: CI status hello ${{ github.event.pull_request.number }} - ${{ github.event.pull_request.head.sha }}
jobs:
status_hello:
Expand All @@ -23,3 +27,47 @@ jobs:
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
https://api.github.com/repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha }} \
-d "{\"state\":\"success\",\"target_url\":\"https://github.com/NVIDIA/Fuser/wiki/Bot-Commands\",\"description\":\"Authorized users: comment !build or !test to trigger CI pipelines. See wiki.\",\"context\":\"CI notes\"}"
pr-agent-tools:
name: PR Agent tools
runs-on: ubuntu-latest
permissions:
pull-requests: write
issues: write
packages: read
container:
image: ghcr.io/nvidia/fuser:ci-llm-workflow
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
env:
GITHUB__USER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CONFIG__PUBLISH_OUTPUT: true

OPENAI__KEY: ${{ secrets.LLM_OPENAI__KEY }}
OPENAI__API_BASE: ${{ secrets.LLM_OPENAI__API_BASE }}
CONFIG__MODEL: ${{ secrets.LLM_CONFIG__MODEL }}
CONFIG__CUSTOM_MODEL_MAX_TOKENS: 131072

CONFIG__MAX_MODEL_TOKENS: 65536
CONFIG__PUBLISH_OUTPUT_PROGRESS: false

PR_REVIEWER__REQUIRE_SCORE_REVIEW: false
PR_REVIEWER__REQUIRE_TESTS_REVIEW: true
PR_REVIEWER__REQUIRE_ESTIMATE_EFFORT_TO_REVIEW: true
PR_REVIEWER__REQUIRE_CAN_BE_SPLIT_REVIEW: false
PR_REVIEWER__REQUIRE_SECURITY_REVIEW: false
PR_REVIEWER__REQUIRE_TICKET_ANALYSIS_REVIEW: false

PR_REVIEWER__ENABLE_REVIEW_LABELS_EFFORT: false
PR_REVIEWER__ENABLE_REVIEW_LABELS_SECURITY: false

PR_REVIEWER__PERSISTENT_COMMENT: true
PR_REVIEWER__FINAL_UPDATE_MESSAGE: false

PR_REVIEWER__EXTRA_INSTRUCTIONS: |
Focus on potential logic change, especially on changes to function signatures.
steps:
- name: PR Agent review
run: python /app/pr_agent/cli.py --pr_url ${{ github.event.pull_request.html_url }} review
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ list(APPEND NVFUSER_SRCS
${NVFUSER_SRCS_DIR}/scheduler/tools/loop_domain_scheduler.cpp
${NVFUSER_SRCS_DIR}/scheduler/tools/maxinfo_propagator.cpp
${NVFUSER_SRCS_DIR}/scheduler/tools/resize_utils.cpp
${NVFUSER_SRCS_DIR}/scheduler/tools/static_repeat.cpp
${NVFUSER_SRCS_DIR}/scheduler/transpose.cpp
${NVFUSER_SRCS_DIR}/scheduler/utils.cpp
${NVFUSER_SRCS_DIR}/scheduler/vectorize_helper.cpp
Expand Down Expand Up @@ -446,6 +447,7 @@ if(BUILD_PYTHON)
# nvfuser python API sources
set(NVFUSER_PYTHON_SRCS)
list(APPEND NVFUSER_PYTHON_SRCS
${NVFUSER_SRCS_DIR}/python_frontend/communicator_bindings.cpp
${NVFUSER_SRCS_DIR}/python_frontend/python_bindings.cpp
${NVFUSER_SRCS_DIR}/python_frontend/python_bindings_extension.cpp
${NVFUSER_SRCS_DIR}/python_frontend/schedule_bindings.cpp
Expand Down Expand Up @@ -698,7 +700,12 @@ if(BUILD_TEST)
add_test(tutorial "${NVFUSER_ROOT}/tests/cpp/test_tutorial.cpp" "")
list(APPEND TEST_BINARIES tutorial)

add_test(test_host_ir "${NVFUSER_ROOT}/tests/cpp/test_host_irs.cpp" "")
set(HOSTIR_TEST_SRCS)
list(APPEND HOSTIR_TEST_SRCS
${NVFUSER_ROOT}/tests/cpp/test_host_irs.cpp
${NVFUSER_ROOT}/tests/cpp/test_host_ir_integration.cpp
)
add_test(test_host_ir "${HOSTIR_TEST_SRCS}" "")
list(APPEND TEST_BINARIES test_host_ir)

if(BUILD_PYTHON)
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/cpp/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,14 @@ int64_t runBenchmarkIterations(
->groups()
.size() > 1;

const auto& compile_log = executor_cache->getMostRecentExecutorInfo();
auto params = toString(compile_log.params);
auto lparams = toString(
compile_log.fusion_executor->as<KernelExecutor>()->lastLaunchParams());
// Only set if not segmented. In the case of segmented fusions,
// this could be confusing as the log would refect only the last
// segment. Revisit if necessary.
if (!segmented) {
const auto& compile_log = executor_cache->getMostRecentExecutorInfo();
auto params = toString(compile_log.params);
auto lparams = toString(
compile_log.fusion_executor->as<KernelExecutor>()->lastLaunchParams());
benchmark_state.SetLabel(params + lparams);
}

Expand Down
9 changes: 7 additions & 2 deletions benchmarks/python/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def pytest_addoption(parser):
action="store_true",
help="Benchmarks torch.compile mode.",
)
parser.addoption(
"--benchmark-thunder-torchcompile",
action="store_true",
help="Benchmarks torch.compile mode.",
)

# pytest-benchmark does not have CLI options to set rounds/warmup_rounds for benchmark.pedantic.
# The following two options are used to overwrite the default values through CLI.
Expand Down Expand Up @@ -104,14 +109,14 @@ def pytest_collection_modifyitems(session, config, items):

from nvfuser.pytorch_utils import retry_on_oom_or_skip_test

executors = ["eager", "torchcompile", "thunder"]
executors = ["eager", "torchcompile", "thunder", "thunder-torchcompile"]

def get_test_executor(item) -> str | None:
if hasattr(item, "callspec") and "executor" in item.callspec.params:
test_executor = item.callspec.params["executor"]
assert (
test_executor in executors
), f"Expected executor to be one of 'eager', 'torchcompile', 'thunder', found {test_executor}."
), f"Expected executor to be one of 'eager', 'torchcompile', 'thunder', 'thunder-torchcompile', found {test_executor}."
return test_executor
return None

Expand Down
17 changes: 11 additions & 6 deletions benchmarks/python/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-present NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
from collections.abc import Iterable
import pytest_benchmark
import torch
from torch.autograd import DeviceType
Expand Down Expand Up @@ -47,14 +48,18 @@ def unary_bwd_torch(inputs: List): # [output, grad_out]
inputs[0].backward(inputs[1], retain_graph=True)


def with_executor(executor: str, fwd_fn: Callable) -> Callable:
assert executor in ["eager", "torchcompile", "thunder"]
def with_executor(executor: str, fwd_fn: Callable, **kwargs) -> Callable:
assert executor in ["eager", "torchcompile", "thunder", "thunder-torchcompile"]
if executor == "eager":
return fwd_fn
if executor == "torchcompile":
return torch.compile(fwd_fn)
return torch.compile(fwd_fn, **kwargs)
if executor == "thunder":
return thunder.jit(fwd_fn, nv_enable_bookend=False, executors=[nvfuserex])
return thunder.jit(
fwd_fn, nv_enable_bookend=False, executors=[nvfuserex], **kwargs
)
if executor == "thunder-torchcompile":
return thunder.jit(fwd_fn, executors=["torchcompile"], **kwargs)


def compute_total_iobytes(
Expand Down Expand Up @@ -221,9 +226,9 @@ def set_metrics(
% Peak Bandwidth (SOL): 100 * Bandwidth /PEAK_BANDWIDTH
"""
if not iobytes:
if isinstance(inputs, torch.Tensor):
if not isinstance(inputs, Iterable):
inputs = [inputs]
if isinstance(outputs, torch.Tensor):
if not isinstance(outputs, Iterable):
outputs = [outputs]

iobytes = 0
Expand Down
1 change: 0 additions & 1 deletion benchmarks/python/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,6 @@ def norm_bwd_baseline_benchmark(

norm_fwd_fn = batchnorm_fwd_fn if norm == "batch_norm" else instancenorm_fwd_fn

# Compile the fwd fn for torchcompile
fwd_fn = with_executor(executor, norm_fwd_fn)
fwd_inputs = [inputs, weight, bias, running_mean, running_var]
outputs = fwd_fn(fwd_inputs)
Expand Down
Loading

0 comments on commit 41e2b94

Please sign in to comment.