Skip to content

Commit

Permalink
import
Browse files Browse the repository at this point in the history
  • Loading branch information
Priya2698 committed Dec 10, 2024
1 parent 15a9f50 commit 810fcb3
Show file tree
Hide file tree
Showing 31 changed files with 104 additions and 60 deletions.
4 changes: 2 additions & 2 deletions benchmarks/python/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
import pytest
from .core import BENCHMARK_CONFIG
from .core import BENCHMARK_CONFIG, DEFAULT_EXECUTORS
from nvfuser.pytorch_utils import DEVICE_PROPERTIES
from .global_params import DEFAULT_EXECUTORS


def pytest_addoption(parser):
parser.addoption(
Expand Down
16 changes: 4 additions & 12 deletions benchmarks/python/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import warnings
import thunder
from thunder.executors.nvfuserex import nvfuserex
from .global_params import DEFAULT_EXECUTORS

# These variables can be overwritten through CLI commands
# --benchmark-rounds=rounds --benchmark-warmup-rounds=warmup_rounds
Expand All @@ -22,6 +21,9 @@
L2_CACHE_SIZE = DEVICE_PROPERTIES["gpu_l2_bytes"]
PEAK_BANDWIDTH_GBPS = DEVICE_PROPERTIES["gpu_peak_bandwidth_gbps"]

# Default executors
DEFAULT_EXECUTORS = ["eager", "torchcompile", "thunder"]


def clear_l2_cache() -> None:
"""
Expand All @@ -46,19 +48,9 @@ def clear_dynamo_cache() -> None:
def unary_bwd_torch(inputs: List): # [output, grad_out]
inputs[0].backward(inputs[1], retain_graph=True)

def with_executor(executor: str, fwd_fn: Callable) -> Callable:
assert executor in DEFAULT_EXECUTORS
if executor == 'eager':
return fwd_fn
if executor == 'torchcompile':
return torch.compile(fwd_fn)
if executor == 'thunder':
return thunder.jit(
fwd_fn, nv_enable_bookend=False, executors=[nvfuserex]
)

def with_executor(executor: str, fwd_fn: Callable) -> Callable:
assert executor in ["eager", "torchcompile", "thunder"]
assert executor in DEFAULT_EXECUTORS
if executor == "eager":
return fwd_fn
if executor == "torchcompile":
Expand Down
2 changes: 0 additions & 2 deletions benchmarks/python/global_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
# Datatypes that will be promoted to Datatype.Float in Fusion Definitions
PROMOTE_DTYPES = [DataType.BFloat16, DataType.Half]

#Default executors
DEFAULT_EXECUTORS = ["eager", "torchcompile", "thunder"]
# Model Parameters from LLMs (GPT2/3, PaLM, LLama)

# Embedding size: d_model, d_ff = 4 * d_model
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/python/test_batchnorm_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
# SPDX-License-Identifier: BSD-3-Clause
import pytest
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES
from .normalization import norm_bwd_nvf_benchmark, norm_bwd_baseline_benchmark
from .core import DEFAULT_EXECUTORS


@pytest.mark.parametrize("size", generate_input_sizes(dims=4))
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/python/test_batchnorm_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
# SPDX-License-Identifier: BSD-3-Clause
import pytest
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES
from .normalization import norm_fwd_nvf_benchmark, norm_fwd_baseline_benchmark
from .core import DEFAULT_EXECUTORS


@pytest.mark.parametrize("size", generate_input_sizes(dims=4))
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_broadcast_add_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES


def bcast_add_fusion(
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/python/test_dropout_layernorm_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
unary_bwd_torch,
compute_total_iobytes,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import dropout_layernorm


Expand Down
3 changes: 2 additions & 1 deletion benchmarks/python/test_dropout_layernorm_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
clear_dynamo_cache,
compute_total_iobytes,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import dropout_layernorm


Expand Down
3 changes: 2 additions & 1 deletion benchmarks/python/test_dropout_rmsnorm_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
unary_bwd_torch,
compute_total_iobytes,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import dropout_rmsnorm


Expand Down
3 changes: 2 additions & 1 deletion benchmarks/python/test_dropout_rmsnorm_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
clear_dynamo_cache,
compute_total_iobytes,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import dropout_rmsnorm


Expand Down
10 changes: 8 additions & 2 deletions benchmarks/python/test_gelu_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import gelu

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/python/test_gelu_bwd_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np


Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_gelu_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import gelu


Expand Down
2 changes: 1 addition & 1 deletion benchmarks/python/test_groupnorm_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES

Expand Down
8 changes: 7 additions & 1 deletion benchmarks/python/test_huggingface_attn_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_attn_inputs, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import huggingface_attn
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/python/test_huggingface_attn_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_attn_inputs, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import huggingface_attn
Expand Down
10 changes: 8 additions & 2 deletions benchmarks/python/test_layernorm_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import layernorm

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_layernorm_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import layernorm

Expand Down
8 changes: 7 additions & 1 deletion benchmarks/python/test_nanogpt_attn_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_attn_inputs, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import nanogpt_attn
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/python/test_nanogpt_attn_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_attn_inputs, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import nanogpt_attn
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_pointwise_mul.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES


def pointwise_mul_fusion(
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES


def reduction_fusion(
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_reduction_epilogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES

# test the influence of epilogue on the performance of reduction.
# current reduction scheduler only allows epilogue to be fused with outer reduction without post reduction broadcast.
Expand Down
10 changes: 8 additions & 2 deletions benchmarks/python/test_rmsnorm_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import rmsnorm

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_rmsnorm_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import rmsnorm

Expand Down
10 changes: 8 additions & 2 deletions benchmarks/python/test_scale_bias_relu_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import scale_bias_relu

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_scale_bias_relu_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import scale_bias_relu

Expand Down
10 changes: 8 additions & 2 deletions benchmarks/python/test_silu_mul_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, unary_bwd_torch, with_executor
from .core import (
run_benchmark,
clear_dynamo_cache,
unary_bwd_torch,
with_executor,
DEFAULT_EXECUTORS,
)
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
import numpy as np
from .torch_ops import silu_mul

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/python/test_silu_mul_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
from nvfuser import FusionDefinition, DataType
from nvfuser.pytorch_utils import torch_dtype_to_nvfuser_dtype
from .core import run_benchmark, clear_dynamo_cache, with_executor
from .core import run_benchmark, clear_dynamo_cache, with_executor, DEFAULT_EXECUTORS
import torch
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES, DEFAULT_EXECUTORS
from .global_params import generate_input_sizes, FLOAT_DTYPES, PROMOTE_DTYPES
from .torch_ops import silu_mul


Expand Down
Loading

0 comments on commit 810fcb3

Please sign in to comment.