Skip to content

Commit

Permalink
Measure time taken for each step and print number of symbols (#3039)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3039

- Measure time taken for each step on Nova
- print number of symbols

Reviewed By: q10

Differential Revision: D61816235

fbshipit-source-id: 1dfef16e116f7b15f2014115a39a4365a1389089
  • Loading branch information
spcyppt authored and facebook-github-bot committed Aug 28, 2024
1 parent 745f466 commit dba7263
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 1 deletion.
9 changes: 8 additions & 1 deletion .github/scripts/fbgemm_gpu_build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,15 @@ __print_library_infos () {
echo "[CHECK] Listing out the GLIBCXX versions referenced:"
print_glibc_info "${library}"

echo "[CHECK] Checking symbols: "
print_exec "nm -gDC ${library} > symbols"
echo "[CHECK] Number of symbols in ${library}: $(wc -l < symbols)"
echo "[CHECK] Number of fbgemm symbols: $(grep -c fbgemm symbols)"

print_exec "nm -gDCu ${library} > usymbols"
echo "[CHECK] Number of undefined symbols: $(wc -l < usymbols)"
echo "[CHECK] Listing out undefined symbols:"
print_exec "nm -gDCu ${library} | sort"
print_exec "sort usymbols"

echo "[CHECK] Listing out external shared libraries linked:"
print_exec ldd "${library}"
Expand Down
6 changes: 6 additions & 0 deletions .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,14 @@ run_python_test () {

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")
# shellcheck disable=SC2155
local start=$(date +%s)

# shellcheck disable=SC2086
if print_exec conda run --no-capture-output ${env_prefix} python -m pytest "${pytest_args[@]}" --cache-clear "${python_test_file}"; then
echo "[TEST] Python test suite PASSED: ${python_test_file}"
local test_time=$(($(date +%s)-start))
echo "[TEST] Python test time for ${python_test_file}: ${test_time} seconds"
echo ""
echo ""
echo ""
Expand All @@ -52,6 +56,8 @@ run_python_test () {
# shellcheck disable=SC2086
if exec_with_retries 2 conda run --no-capture-output ${env_prefix} python -m pytest "${pytest_args[@]}" --lf --last-failed-no-failures none "${python_test_file}"; then
echo "[TEST] Python test suite PASSED with retries: ${python_test_file}"
local test_time=$(($(date +%s)-start))
echo "[TEST] Python test time with retries for ${python_test_file}: ${test_time} seconds"
echo ""
echo ""
echo ""
Expand Down
15 changes: 15 additions & 0 deletions .github/scripts/nova_postscript.bash
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,22 @@ echo "[NOVA] Current working directory: $(pwd)"
# shellcheck source=.github/scripts/setup_env.bash
. "${PRELUDE}";

# Record time for each step
start_time=$(date +%s)

# Collect PyTorch environment information
collect_pytorch_env_info "${BUILD_ENV_NAME}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to collect PyTorch environment information: ${runtime} seconds"

# Install the wheel
install_fbgemm_gpu_wheel "${BUILD_ENV_NAME}" fbgemm_gpu/dist/*.whl
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to install wheel: ${runtime} seconds"

# Test with PyTest
echo "[NOVA] Current working directory: $(pwd)"
Expand All @@ -44,6 +55,10 @@ fi
$CONDA_RUN python3 -c "import torch; print('cuda.is_available() ', torch.cuda.is_available()); print ('device_count() ',torch.cuda.device_count());"
cd "${FBGEMM_REPO}" || { echo "[NOVA] Failed to cd to ${FBGEMM_REPO} from $(pwd)"; };
test_all_fbgemm_gpu_modules "${BUILD_ENV_NAME}" "${fbgemm_variant}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to test all unit tests: ${runtime} seconds / $(display_time ${runtime})"

# Workaround EACCES: permission denied error at checkout step
chown -R 1000:1000 /__w/FBGEMM/FBGEMM/ || echo "Unable to chown 1000:1000 from $USER, uid: $(id -u)"
43 changes: 43 additions & 0 deletions .github/scripts/nova_prescript.bash
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,59 @@ BUILD_ENV_NAME=${CONDA_ENV}
# shellcheck source=.github/scripts/setup_env.bash
. "${PRELUDE}";

# Record time for each step
start_time=$(date +%s)

# Display System Info
print_system_info
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to display System Info: ${runtime} seconds"

# Display Conda information
print_conda_info
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to display Conda information: ${runtime} seconds"

# Display GPU Info
print_gpu_info
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to display GPU Info: ${runtime} seconds"

# Install C/C++ Compilers
install_cxx_compiler "${BUILD_ENV_NAME}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to install C/C++ Compilers: ${runtime} seconds"

# Install Build Tools
install_build_tools "${BUILD_ENV_NAME}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to install Build Tools: ${runtime} seconds"

# Collect PyTorch environment information
collect_pytorch_env_info "${BUILD_ENV_NAME}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to collect PyTorch environment information: ${runtime} seconds"

if [[ $CU_VERSION = cu* ]]; then
# Extract the CUDA version number from CU_VERSION
cuda_version=$(echo "[NOVA] ${CU_VERSION}" | cut -c 3-)
install_cudnn "${BUILD_ENV_NAME}" "$(pwd)/build_only/cudnn" "${cuda_version}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to install cudnn: ${runtime} seconds"

echo "[NOVA] -------- Finding NVML_LIB_PATH -----------"
if [[ ${NVML_LIB_PATH} == "" ]]; then
Expand All @@ -58,6 +89,10 @@ if [[ $CU_VERSION = cu* ]]; then

echo "[NOVA] NVML_LIB_PATH = ${NVML_LIB_PATH}"
echo "[NOVA] ------------------------------------------"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to find NVML_LIB_PATH: ${runtime} seconds"

echo "[NOVA] Building the CUDA variant of FBGEMM_GPU ..."
export fbgemm_variant="cuda"
Expand All @@ -74,6 +109,10 @@ fi
# Install the necessary Python eggs for building
cd "${FBGEMM_REPO}/fbgemm_gpu" || exit 1
prepare_fbgemm_gpu_build "${BUILD_ENV_NAME}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to prepare the build : ${runtime} seconds / $(display_time ${runtime})"

# Reset the BUILD_FROM_NOVA flag to run setup.py for the actual build
BUILD_FROM_NOVA=0
Expand All @@ -86,6 +125,10 @@ fi

# Build the wheel
build_fbgemm_gpu_package "${BUILD_ENV_NAME}" "${CHANNEL}" "${fbgemm_variant}"
end_time=$(date +%s)
runtime=$((end_time-start_time))
start_time=${end_time}
echo "[NOVA] Time taken to build the package: ${runtime} seconds / $(display_time ${runtime})"

# Temporary workaround - copy dist/ to root repo for smoke test
echo "[NOVA] Copying dist folder to root repo ..."
Expand Down
9 changes: 9 additions & 0 deletions .github/scripts/utils_base.bash
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ exec_with_retries () {
}


display_time() {
local seconds="$1"
((h=seconds/3600))
((m=(seconds%3600)/60))
((s=seconds%60))
printf "%02d:%02d:%02d\n" $h $m $s
}


################################################################################
# Assert Functions
################################################################################
Expand Down

0 comments on commit dba7263

Please sign in to comment.