Skip to content

Commit

Permalink
Update on "stop double-installing ExecuTorch in one-off linux jobs"
Browse files Browse the repository at this point in the history
setup-linux.sh already installs ExecuTorch with XNNPACK (and it passes use-pt-pinned-commit as it should).

Differential Revision: [D67996460](https://our.internmc.facebook.com/intern/diff/D67996460/)

[ghstack-poisoned]
  • Loading branch information
swolchok committed Jan 17, 2025
2 parents d09a372 + 93fcdc2 commit e1862fc
Show file tree
Hide file tree
Showing 123 changed files with 3,944 additions and 3,905 deletions.
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml
tracking_issue: 7679
ciflow_push_tags:
- ciflow/android
- ciflow/apple
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ jobs:
--output_name="${OUT_ET_MODEL_NAME}.pte"
ls -lh "${OUT_ET_MODEL_NAME}.pte"
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
export PYTHONPATH=$(pwd)/..
Expand Down Expand Up @@ -347,7 +347,7 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
export ANDROID_ABIS="arm64-v8a"
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
# Let's see how expensive this job is, we might want to tone it down by running it periodically
benchmark-on-device:
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ jobs:

unittest-arm:
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
Expand Down Expand Up @@ -392,6 +395,25 @@ jobs:
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
test-qnn-models-linux:
name: test-qnn-models-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 180
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
# placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
# reminder: make sure each job runs fast
test-phi-3-mini-runner-linux:
name: test-phi-3-mini-runner-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ jobs:
test-arm-backend-delegation:
name: test-arm-backend-delegation
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
Expand Down Expand Up @@ -159,6 +162,9 @@ jobs:
test-arm-reference-delegation:
name: test-arm-reference-delegation
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
Expand Down
5 changes: 1 addition & 4 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ exclude_patterns = [
# File contains @generated
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
# Want to be able to keep c10 in sync with PyTorch core.
'runtime/core/portable_type/c10/**',
]
command = [
'python',
Expand Down Expand Up @@ -263,8 +261,6 @@ exclude_patterns = [
'extension/**',
'kernels/optimized/**',
'runtime/core/exec_aten/**',
# Want to be able to keep c10 in sync with PyTorch core.
'runtime/core/portable_type/c10/**',
'runtime/executor/tensor_parser_aten.cpp',
'scripts/**',
'test/**',
Expand Down Expand Up @@ -298,6 +294,7 @@ include_patterns = [
'build/**/*.py',
'codegen/**/*.py',
# 'devtools/**/*.py',
'devtools/visualization/**/*.py',
'docs/**/*.py',
# 'examples/**/*.py',
# 'exir/**/*.py',
Expand Down
13 changes: 1 addition & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")
"fix for this restriction."
)
endif()
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type)
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)

#
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
Expand Down Expand Up @@ -544,7 +544,6 @@ endif()
target_include_directories(
executorch_core PUBLIC ${_common_include_directories}
)
target_compile_definitions(executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
target_compile_options(executorch_core PUBLIC ${_common_compile_options})
if(MAX_KERNEL_NUM)
target_compile_definitions(
Expand All @@ -565,7 +564,6 @@ if(EXECUTORCH_BUILD_PYBIND AND APPLE)
target_include_directories(
executorch_core_shared PUBLIC ${_common_include_directories}
)
target_compile_definitions(executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
target_compile_options(
executorch_core_shared PUBLIC ${_common_compile_options}
)
Expand All @@ -586,7 +584,6 @@ endif()
add_library(executorch ${_executorch__srcs})
target_link_libraries(executorch PRIVATE executorch_core)
target_include_directories(executorch PUBLIC ${_common_include_directories})
target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
target_compile_options(executorch PUBLIC ${_common_compile_options})
target_link_options_shared_lib(executorch)

Expand Down Expand Up @@ -620,12 +617,6 @@ endif()

# Install `executorch` library as well as `executorch-config.cmake` under
# ${CMAKE_INSTALL_PREFIX}/
install(DIRECTORY runtime/core/ DESTINATION include/executorch/runtime/core FILES_MATCHING PATTERN "*.h")
install(DIRECTORY runtime/kernel/ DESTINATION include/executorch/runtime/kernel FILES_MATCHING PATTERN "*.h")
install(DIRECTORY runtime/platform/ DESTINATION include/executorch/runtime/platform FILES_MATCHING PATTERN "*.h")
install(DIRECTORY extension/kernel_util/ DESTINATION include/executorch/extension/kernel_util FILES_MATCHING PATTERN "*.h")
install(DIRECTORY extension/tensor/ DESTINATION include/executorch/extension/tensor FILES_MATCHING PATTERN "*.h")
install(DIRECTORY extension/threadpool/ DESTINATION include/executorch/extension/threadpool FILES_MATCHING PATTERN "*.h")
install(
TARGETS executorch executorch_core
DESTINATION lib
Expand Down Expand Up @@ -784,8 +775,6 @@ if(EXECUTORCH_BUILD_PYBIND)
target_include_directories(
util PUBLIC ${_common_include_directories} ${TORCH_INCLUDE_DIRS}
)
target_compile_definitions(util PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)

target_compile_options(util PUBLIC ${_pybind_compile_options})
target_link_libraries(util PRIVATE torch c10 executorch extension_tensor)

Expand Down
2 changes: 0 additions & 2 deletions backends/apple/coreml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ target_include_directories(
coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util
)
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/..)
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/runtime/core/portable_type)
target_compile_definitions(coremldelegate PRIVATE C10_USING_CUSTOM_GENERATED_MACROS)
target_link_libraries(coremldelegate PRIVATE executorch_core)

if(EXECUTORCH_BUILD_DEVTOOLS)
Expand Down
6 changes: 3 additions & 3 deletions backends/apple/coreml/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ class Model(torch.nn.Module):
source_model = Model()
example_inputs = (torch.randn((1, 3, 256, 256)), )

pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()
pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()

quantization_config = LinearQuantizerConfig.from_dict(
{
"global_config": {
"quantization_scheme": QuantizationScheme.symmetric,
"activation_dtype": torch.uint8,
"weight_dtype": torch.int8,
"activation_dtype": torch.quint8,
"weight_dtype": torch.qint8,
"weight_per_channel": True,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,6 @@
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"C10_USING_CUSTOM_GENERATED_MACROS",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
Expand Down Expand Up @@ -912,7 +911,6 @@
DEVELOPMENT_TEAM = "";
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"C10_USING_CUSTOM_GENERATED_MACROS",
"ET_EVENT_TRACER_ENABLED=1",
"$(inherited)",
);
Expand All @@ -922,7 +920,6 @@
"$(SRCROOT)/../kvstore",
"$(SRCROOT)/../inmemoryfs",
"$(SRCROOT)/../include",
"$(SRCROOT)/../include/executorch/runtime/core/portable_type",
"$(SRCROOT)/../sdk",
"$(SRCROOT)/../util",
"$(SRCROOT)/../../third-party/nlohmann_json/single_include",
Expand Down Expand Up @@ -954,7 +951,6 @@
"$(SRCROOT)/../kvstore",
"$(SRCROOT)/../inmemoryfs",
"$(SRCROOT)/../include",
"$(SRCROOT)/../include/executorch/runtime/core/portable_type",
"$(SRCROOT)/../sdk",
"$(SRCROOT)/../util",
"$(SRCROOT)/../../third-party/nlohmann_json/single_include",
Expand Down
3 changes: 1 addition & 2 deletions backends/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ endif()

include(${EXECUTORCH_ROOT}/build/Utils.cmake)

set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type)
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Third-party folder and Ethos-U driver inclued
set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
Expand Down
115 changes: 63 additions & 52 deletions backends/arm/_passes/arm_pass_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

# pyre-unsafe

import torch
from executorch.backends.arm._passes.annotate_channels_last_dim_order_pass import (
AnnotateChannelsLastDimOrder,
)
Expand Down Expand Up @@ -47,7 +46,7 @@
)
from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
ConvertMeanDimToAveragePool,
ConvertMeanDimToAveragePoolPass,
)
from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass
from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass
Expand All @@ -61,86 +60,98 @@
from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
UnsqueezeScalarPlaceholdersPass,
)
from executorch.backends.arm.tosa_specification import TosaSpecification
from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
from executorch.exir import ExportedProgram
from executorch.exir.dialects._ops import ops as exir_ops
from executorch.exir.pass_manager import PassManager
from torch.fx import GraphModule


class ArmPassManager(PassManager):

def _transform(self, graph_module: torch.fx.GraphModule):
def __init__(self, tosa_spec: TosaSpecification) -> None:
self.tosa_spec = tosa_spec
super().__init__()

def _transform(self, graph_module: GraphModule):
return self(graph_module).graph_module

def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
"""Apply passes before transforming program to backend"""
def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
self.add_pass(FuseQuantizedActivationPass())
self.add_pass(RemoveGetItemPass())
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(ConvertMmToBmmPass())
self.add_pass(DecomposeLinearPass())
self.add_pass(ConvertMeanDimToAveragePoolPass())

self.add_pass(AnnotateDecomposedMatmulPass())
self.add_pass(QuantizeFullArgument())
self.add_pass(FoldAndAnnotateQParamsPass())
self.add_pass(RetraceFoldedDtypesPass())
self.add_pass(InsertTableOpsPass(exported_program))

self.add_pass(RemoveClonePass())
self.add_pass(SizeAdjustConv2DPass())
self.add_pass(ConvertExpandCopyToRepeatPass())
self.add_pass(UnsqueezeBeforeRepeatPass())
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
self.add_pass(CastInt64ToInt32Pass(exported_program))
self.add_pass(MatchArgRanksPass(exported_program))
self.add_pass(KeepDimsFalseToSqueezePass())
self.add_pass(Conv1dUnsqueezePass(exported_program))
self.add_pass(DecomposeSelectPass())

self.add_pass(AnnotateChannelsLastDimOrder())

return self._transform(exported_program.graph_module)

def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:

self.add_pass(FuseQuantizedActivationPass())
self.add_pass(RemoveGetItemPass())
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(ConvertMmToBmmPass())
self.add_pass(DecomposeLinearPass())
self.add_pass(DecomposeLayerNormPass())
self.add_pass(DecomposeVarPass())
self.add_pass(ConvertMeanDimToAveragePool())
self.add_pass(DecomposeMeanDimPass())
self.add_pass(ConvertSplitToSlicePass())
self.add_pass(ConvertMmToBmmPass())
# TODO MLETORCH-558
self.add_pass(ConvertMeanDimToAveragePoolPass())
self.add_pass(DecomposeDivPass())
self.add_pass(DecomposeSoftmaxesPass())

self.add_pass(AnnotateDecomposedMatmulPass())
self.add_pass(QuantizeFullArgument())
self.add_pass(
FoldAndAnnotateQParamsPass(
[
exir_ops.edge.aten.minimum.default,
exir_ops.edge.aten.maximum.default,
exir_ops.edge.aten.add.Tensor,
exir_ops.edge.aten.avg_pool2d.default,
exir_ops.edge.aten.bmm.default,
exir_ops.edge.aten.cat.default,
exir_ops.edge.aten.convolution.default,
exir_ops.edge.aten.clone.default,
exir_ops.edge.aten.exp.default,
exir_ops.edge.aten.expand_copy.default,
exir_ops.edge.aten.full.default,
exir_ops.edge.aten.hardtanh.default,
exir_ops.edge.aten.log.default,
exir_ops.edge.aten.max_pool2d.default,
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.permute_copy.default,
exir_ops.edge.aten.reciprocal.default,
exir_ops.edge.aten.relu.default,
exir_ops.edge.aten.repeat.default,
exir_ops.edge.aten.rsqrt.default,
exir_ops.edge.aten.select_copy.int,
exir_ops.edge.aten.sigmoid.default,
exir_ops.edge.aten.slice_copy.Tensor,
exir_ops.edge.aten.squeeze_copy.dims,
exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.sum.dim_IntList,
exir_ops.edge.aten.tanh.default,
exir_ops.edge.aten.unsqueeze_copy.default,
exir_ops.edge.aten.upsample_nearest2d.vec,
exir_ops.edge.aten.view_copy.default,
]
)
)
self.add_pass(FoldAndAnnotateQParamsPass())
self.add_pass(RetraceFoldedDtypesPass())
self.add_pass(InsertTableOpsPass(exported_program))

self.add_pass(RemoveClonePass())
self.add_pass(SizeAdjustConv2DPass())
self.add_pass(ConvertExpandCopyToRepeatPass())
self.add_pass(UnsqueezeBeforeRepeatPass())
self.add_pass(CastInt64ToInt32Pass(exported_program))
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
self.add_pass(SizeAdjustConv2DPass())
self.add_pass(RemoveClonePass())
self.add_pass(CastInt64ToInt32Pass(exported_program))
self.add_pass(MatchArgRanksPass(exported_program))
self.add_pass(DecomposeDivPass())
self.add_pass(KeepDimsFalseToSqueezePass())
self.add_pass(Conv1dUnsqueezePass(exported_program))
self.add_pass(DecomposeSoftmaxesPass())
self.add_pass(DecomposeSelectPass())

self.add_pass(AnnotateChannelsLastDimOrder())

return self._transform(exported_program.graph_module)

def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule):
def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
"""Apply passes before transforming program to backend"""
if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"):
return self._tosa_080_BI_pipeline(exported_program)
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"):
return self._tosa_080_MI_pipeline(exported_program)
else:
raise NotImplementedError(
f"No pass pipeline implemented for {self.tosa_spec=}"
)

def transform_for_annotation_pipeline(self, graph_module: GraphModule):
self.add_pass(ScalarsToAttributePass())
self.add_pass(DecomposeLayerNormPass())
self.add_pass(DecomposeVarPass())
Expand Down
Loading

0 comments on commit e1862fc

Please sign in to comment.