From 4b77affe6724d9573042cf8c4f98a18dc7675432 Mon Sep 17 00:00:00 2001 From: Marius Pelegrin Date: Tue, 26 Nov 2024 15:33:26 +0100 Subject: [PATCH] Add CPU masking/pinning option Add an option to specify which CPU cores the replayer is able to use. Change-Id: I1ff25355dbcc1da34830b8926dc5eb7078889ec5 --- USAGE_android.md | 11 +++- USAGE_desktop_D3D12.md | 9 ++- USAGE_desktop_Vulkan.md | 9 ++- android/scripts/gfxrecon.py | 5 ++ framework/decode/replay_options.h | 1 + framework/util/platform.h | 98 +++++++++++++++++++++++++++++++ tools/replay/replay_settings.h | 12 +++- tools/tool_settings.h | 15 +++++ 8 files changed, 154 insertions(+), 6 deletions(-) diff --git a/USAGE_android.md b/USAGE_android.md index a8b6a63d74..06d6f7e1ab 100644 --- a/USAGE_android.md +++ b/USAGE_android.md @@ -700,7 +700,7 @@ queryable permission to apply. The `gfxrecon.py replay` command has the following usage: ```text -usage: gfxrecon.py replay [-h] [--push-file LOCAL_FILE] [--version] [--pause-frame N] +usage: gfxrecon.py replay [-h] [--push-file LOCAL_FILE] [--version] [--cpu-mask ] [--pause-frame N] [--paused] [--screenshot-all] [--screenshots RANGES] [--screenshot-format FORMAT] [--screenshot-dir DIR] [--screenshot-prefix PREFIX] [--screenshot-scale SCALE] @@ -749,6 +749,13 @@ optional arguments: -p LOCAL_FILE, --push-file LOCAL_FILE Local file to push to the location on device specified by + --cpu-mask + Set of CPU cores used by the replayer. + `binary-mask` is a succession of '0' and '1' that specifies + used/unused cores. For example '1010' activates the first and + third cores and deactivate all other cores. + If the option is not set, all cores can be used. If the option + is set only for some cores, the other cores are not used. --screenshot-all Generate screenshots for all frames. When this option is specified, --screenshots is ignored (forwarded to replay tool) @@ -792,7 +799,7 @@ optional arguments: See gfxrecon-extract. --opcd, --omit-pipeline-cache-data Omit pipeline cache data from calls to - vkCreatePipelineCache and skip calls to + vkCreatePipelineCache and skip calls to--cpu-mask vkGetPipelineCacheData (forwarded to replay tool) --surface-index N Restrict rendering to the Nth surface object created. Used with captures that include multiple surfaces. diff --git a/USAGE_desktop_D3D12.md b/USAGE_desktop_D3D12.md index aa707a8050..069385f146 100644 --- a/USAGE_desktop_D3D12.md +++ b/USAGE_desktop_D3D12.md @@ -197,7 +197,7 @@ The `gfxrecon-replay` tool accepts the following command line arguments: gfxrecon-replay.exe - A tool to replay GFXReconstruct capture files. Usage: - gfxrecon-replay.exe [-h | --help] [--version] [--gpu ] + gfxrecon-replay.exe [-h | --help] [--version] [--cpu-mask ] [--gpu ] [--pause-frame ] [--paused] [--sync] [--screenshot-all] [--screenshots ] [--screenshot-format ] [--screenshot-dir ] [--screenshot-prefix ] @@ -268,6 +268,13 @@ Optional arguments: --validate Enables the Khronos Vulkan validation layer when replaying a Vulkan capture or the Direct3D debug layer when replaying a Direct3D 12 capture. + --cpu-mask + Set of CPU cores used by the replayer. + `binary-mask` is a succession of '0' and '1' that specifies + used/unused cores. For example '1010' activates the first and + third cores and deactivate all other cores. + If the option is not set, all cores can be used. If the option + is set only for some cores, the other cores are not used. --gpu Use the specified device for replay, where index is the zero-based index to the array of physical devices returned by vkEnumeratePhysicalDevices or IDXGIFactory1::EnumAdapters1. diff --git a/USAGE_desktop_Vulkan.md b/USAGE_desktop_Vulkan.md index b7777751fe..1ca6ccf1b3 100644 --- a/USAGE_desktop_Vulkan.md +++ b/USAGE_desktop_Vulkan.md @@ -546,7 +546,7 @@ The `gfxrecon-replay` tool for desktop accepts the following command line arguments: ```text -gfxrecon-replay [-h | --help] [--version] [--gpu ] +gfxrecon-replay [-h | --help] [--version] [--cpu-mask ] [--gpu ] [--pause-frame ] [--paused] [--sync] [--screenshot-all] [--screenshots ] [--screenshot-format ] [--screenshot-dir ] [--screenshot-prefix ] @@ -587,6 +587,13 @@ Optional arguments: --log-file Write log messages to a file at the specified path. Default is: Empty string (file logging disabled). --log-debugview Log messages with OutputDebugStringA. Windows only. + --cpu-mask + Set of CPU cores used by the replayer. + `binary-mask` is a succession of '0' and '1' that specifies + used/unused cores. For example '1010' activates the first and + third cores and deactivate all other cores. + If the option is not set, all cores can be used. If the option + is set only for some cores, the other cores are not used. --gpu Use the specified device for replay, where index is the zero-based index to the array of physical devices returned by vkEnumeratePhysicalDevices. Replay may fail diff --git a/android/scripts/gfxrecon.py b/android/scripts/gfxrecon.py index 6f98ce82d5..0b6c03b80b 100644 --- a/android/scripts/gfxrecon.py +++ b/android/scripts/gfxrecon.py @@ -71,6 +71,7 @@ def CreateReplayParser(): parser.add_argument('--log-file', metavar='DEVICE_FILE', help='Write log messages to a file at the specified path instead of logcat (forwarded to replay tool)') parser.add_argument('--pause-frame', metavar='N', help='Pause after replaying frame number N (forwarded to replay tool)') parser.add_argument('--paused', action='store_true', default=False, help='Pause after replaying the first frame (same as "--pause-frame 1"; forwarded to replay tool)') + parser.add_argument('--cpu-mask', metavar='binary_mask', help='Set of CPU cores used by the replayer. `binary-mask` is a succession of "0" and "1" that specifies used/unused cores. For example "1010" activates the first and third cores and deactivate all other cores. If the option is not set, all cores can be used. If the option is set only for some cores, the other cores are not used. (forwarded to replay tool)') parser.add_argument('--screenshot-all', action='store_true', default=False, help='Generate screenshots for all frames. When this option is specified, --screenshots is ignored (forwarded to replay tool)') parser.add_argument('--screenshots', metavar='RANGES', help='Generate screenshots for the specified frames. Target frames are specified as a comma separated list of frame ranges. A frame range can be specified as a single value, to specify a single frame, or as two hyphenated values, to specify the first and last frames to process. Frame ranges should be specified in ascending order and cannot overlap. Note that frame numbering is 1-based (i.e. the first frame is frame 1). Example: 200,301-305 will generate six screenshots (forwarded to replay tool)') parser.add_argument('--screenshot-format', metavar='FORMAT', choices=['bmp', 'png'], help='Image file format to use for screenshot generation. Available formats are: bmp, png (forwarded to replay tool)') @@ -142,6 +143,10 @@ def MakeExtrasString(args): if args.paused: arg_list.append('--paused') + if args.cpu_mask: + arg_list.append('--cpu-mask') + arg_list.append('{}'.format(args.cpu_mask)) + if args.screenshot_all: arg_list.append('--screenshot-all') elif args.screenshots: diff --git a/framework/decode/replay_options.h b/framework/decode/replay_options.h index cfda8fb167..b27dfc8b6a 100644 --- a/framework/decode/replay_options.h +++ b/framework/decode/replay_options.h @@ -55,6 +55,7 @@ struct ReplayOptions bool force_windowed_origin{ false }; int32_t window_topleft_x{ 0 }; int32_t window_topleft_y{ 0 }; + std::string cpu_mask; int32_t override_gpu_index{ -1 }; std::string capture_filename; bool enable_print_block_info{ false }; diff --git a/framework/util/platform.h b/framework/util/platform.h index 7bc631a68c..09435d82c6 100644 --- a/framework/util/platform.h +++ b/framework/util/platform.h @@ -57,6 +57,10 @@ #include #endif +#ifdef __linux__ +#include +#endif + GFXRECON_BEGIN_NAMESPACE(gfxrecon) GFXRECON_BEGIN_NAMESPACE(util) GFXRECON_BEGIN_NAMESPACE(platform) @@ -260,6 +264,52 @@ inline int GetSystemLastErrorCode() return GetLastError(); } +inline std::string GetCpuAffinity() +{ + DWORD_PTR process_mask; + DWORD_PTR system_mask; + if (!GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask)) + { + return ""; + } + + DWORD_PTR mask = (process_mask & system_mask); + + std::string affinity; + while (mask) + { + affinity += (mask & 1) ? "1" : "0"; + mask >>= 1; + } + + while (affinity.back() == '0') + { + affinity.pop_back(); + } + + return affinity; +} + +inline bool SetCpuAffinity(const std::string& affinity) +{ + DWORD_PTR mask = 0; + for (unsigned i = 0; i < affinity.size(); i++) + { + if (affinity[i] == '1') + { + mask |= 1; + } + else if (affinity[i] != '0') + { + return false; + } + + mask <<= 1; + } + + return (SetProcessAffinityMask(GetCurrentProcess(), mask) != 0); +} + #else // !defined(WIN32) // Error value indicating string was truncated @@ -568,6 +618,54 @@ inline int GetSystemLastErrorCode() return errno; } +inline std::string GetCpuAffinity() +{ + std::string affinity; + +#ifdef __linux__ + cpu_set_t mask; + if (sched_getaffinity(0, sizeof(mask), &mask)) + { + return affinity; + } + + for (unsigned i = 0; i < sizeof(mask) / CPU_ALLOC_SIZE(1); i++) + { + affinity += CPU_ISSET(i, &mask) ? "1" : "0"; + } + + while (affinity.back() == '0') + { + affinity.pop_back(); + } +#endif + + return affinity; +} + +static bool SetCpuAffinity(const std::string& affinity) +{ +#ifdef __linux__ + cpu_set_t mask; + CPU_ZERO(&mask); + for (unsigned i = 0; i < affinity.size(); i++) + { + if (affinity[i] == '1') + { + CPU_SET(i, &mask); + } + else if (affinity[i] != '0') + { + return false; + } + } + + return (sched_setaffinity(0, sizeof(mask), &mask) == 0); +#else + return false; +#endif +} + #endif // WIN32 inline size_t GetAlignedSize(size_t size, size_t align_to) diff --git a/tools/replay/replay_settings.h b/tools/replay/replay_settings.h index aa02a636c8..98cd36a0c1 100644 --- a/tools/replay/replay_settings.h +++ b/tools/replay/replay_settings.h @@ -38,7 +38,7 @@ const char kOptions[] = "resources,--dump-resources-dump-all-image-subresources,--dump-resources-dump-raw-images,--dump-resources-dump-" "separate-alpha,--pbi-all,--preload-measurement-range, --add-new-pipeline-caches"; const char kArguments[] = - "--log-level,--log-file,--gpu,--gpu-group,--pause-frame,--wsi,--surface-index,-m|--memory-translation," + "--log-level,--log-file,--cpu-mask,--gpu,--gpu-group,--pause-frame,--wsi,--surface-index,-m|--memory-translation," "--replace-shaders,--screenshots,--denied-messages,--allowed-messages,--screenshot-format,--" "screenshot-dir,--screenshot-prefix,--screenshot-size,--screenshot-scale,--mfr|--measurement-frame-range,--fw|--" "force-windowed,--fwo|--force-windowed-origin,--batching-memory-usage,--measurement-file,--swapchain,--sgfs|--skip-" @@ -59,7 +59,8 @@ static void PrintUsage(const char* exe_name) GFXRECON_WRITE_CONSOLE("\n%s - A tool to replay GFXReconstruct capture files.\n", app_name.c_str()); GFXRECON_WRITE_CONSOLE("Usage:"); - GFXRECON_WRITE_CONSOLE(" %s\t[-h | --help] [--version] [--gpu ] [--gpu-group ]", app_name.c_str()); + GFXRECON_WRITE_CONSOLE(" %s\t[-h | --help] [--version]", app_name.c_str()); + GFXRECON_WRITE_CONSOLE("\t\t\t[--cpu-mask ] [--gpu ] [--gpu-group ]"); GFXRECON_WRITE_CONSOLE("\t\t\t[--pause-frame ] [--paused] [--sync] [--screenshot-all]"); GFXRECON_WRITE_CONSOLE("\t\t\t[--screenshots ] [--screenshot-format ]"); GFXRECON_WRITE_CONSOLE("\t\t\t[--screenshot-dir ] [--screenshot-prefix ]"); @@ -159,6 +160,13 @@ static void PrintUsage(const char* exe_name) GFXRECON_WRITE_CONSOLE(" --validate\t\tEnable the Khronos Vulkan validation layer when replaying a"); GFXRECON_WRITE_CONSOLE(" \t\tVulkan capture or the Direct3D debug layer when replaying a"); GFXRECON_WRITE_CONSOLE(" \t\tDirect3D 12 capture."); + GFXRECON_WRITE_CONSOLE(" --cpu-mask "); + GFXRECON_WRITE_CONSOLE(" \t\tSet of CPU cores used by the replayer."); + GFXRECON_WRITE_CONSOLE(" \t\t`binary-mask` is a succession of '0' and '1' that specifies"); + GFXRECON_WRITE_CONSOLE(" \t\tused/unused cores. For example '1010' activates the first and"); + GFXRECON_WRITE_CONSOLE(" \t\tthird cores and deactivate all other cores."); + GFXRECON_WRITE_CONSOLE(" \t\tIf the option is not set, all cores can be used. If the option"); + GFXRECON_WRITE_CONSOLE(" \t\tis set only for some cores, the other cores are not used."); GFXRECON_WRITE_CONSOLE(" --gpu \t\tUse the specified device for replay, where index"); GFXRECON_WRITE_CONSOLE(" \t\tis the zero-based index to the array of physical devices"); GFXRECON_WRITE_CONSOLE(" \t\treturned by vkEnumeratePhysicalDevices or IDXGIFactory1::EnumAdapters1."); diff --git a/tools/tool_settings.h b/tools/tool_settings.h index e7013ebcb3..371f7cd485 100644 --- a/tools/tool_settings.h +++ b/tools/tool_settings.h @@ -64,6 +64,7 @@ const char kLogLevelArgument[] = "--log-level"; const char kLogFileArgument[] = "--log-file"; const char kLogDebugView[] = "--log-debugview"; const char kNoDebugPopup[] = "--no-debug-popup"; +const char kCpuMaskArgument[] = "--cpu-mask"; const char kOverrideGpuArgument[] = "--gpu"; const char kOverrideGpuGroupArgument[] = "--gpu-group"; const char kPausedOption[] = "--paused"; @@ -934,6 +935,20 @@ static void GetReplayOptions(gfxrecon::decode::ReplayOptions& options, options.num_pipeline_creation_jobs = std::stoi(arg_parser.GetArgumentValue(kNumPipelineCreationJobs)); } + options.cpu_mask = arg_parser.GetArgumentValue(kCpuMaskArgument); + if (!options.cpu_mask.empty()) + { + if (gfxrecon::util::platform::SetCpuAffinity(options.cpu_mask)) + { + GFXRECON_LOG_INFO("CPU mask successfully set: %s", gfxrecon::util::platform::GetCpuAffinity().c_str()); + } + else + { + GFXRECON_LOG_ERROR("Failed to set CPU mask: %s", options.cpu_mask.c_str()); + GFXRECON_LOG_ERROR("Resuming with CPU mask: %s", gfxrecon::util::platform::GetCpuAffinity().c_str()); + } + } + const auto& override_gpu = arg_parser.GetArgumentValue(kOverrideGpuArgument); if (!override_gpu.empty()) {