Skip to content

Commit

Permalink
chore: Rename variables
Browse files Browse the repository at this point in the history
  • Loading branch information
keehyuna committed Dec 16, 2024
1 parent 6382a0b commit b3bf3b7
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 30 deletions.
4 changes: 2 additions & 2 deletions core/runtime/TRTEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ TRTEngine::TRTEngine(
exec_ctx = make_trt(cuda_engine->createExecutionContext());
TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");

runtime_states.prev_cudagraphs_enabled = CUDAGRAPHS_MODE;
runtime_states.prev_pre_allocated_outputs_enabled = false;
runtime_states.old_cudagraphs = CUDAGRAPHS_MODE;
runtime_states.old_pre_allocated_outputs = false;

if (_in_binding_names.size() == 0 && _out_binding_names.size() == 0) {
uint64_t inputs = 0;
Expand Down
24 changes: 10 additions & 14 deletions core/runtime/TRTEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,30 @@ using FlattenedState = std::tuple<
std::tuple<std::string, std::string>, // serialized metadata
std::tuple<std::string, std::string>>; // Platform

struct RuntimeStates {
bool need_cudagraphs_record;
bool can_use_pre_allocated_outputs;
};

struct TorchTRTRuntimeStates {
// Previous runtime states
bool prev_cudagraphs_enabled, prev_pre_allocated_outputs_enabled;
// Indicates whether CUDAGraphs were enabled in the previous execute_engine
bool old_cudagraphs;
// Indicates whether pre-allocated output was enabled in the previous execute_engine
bool old_pre_allocated_outputs;

// Evaluates whether certain conditions are met to enable CUDA Graph recording or to reuse pre-allocated outputs
// based on the current and previous states, as well as input shape has changed
RuntimeStates validate_states(bool cudagraphs_enabled, bool pre_allocated_outputs_enabled, bool shape_changed) {
std::tuple<bool, bool> set_runtime_states(bool new_cudagraphs, bool new_pre_allocated_output, bool shape_changed) {
bool need_cudagraphs_record = false;
bool can_use_pre_allocated_outputs = false;

// Cudagraphs record is required if cudagraphs_enabled is switched to True regardless of shape change
if (cudagraphs_enabled && (!prev_cudagraphs_enabled || shape_changed)) {
if (new_cudagraphs && (!old_cudagraphs || shape_changed)) {
need_cudagraphs_record = true;
}
// Pre-allocated output can be used when previous and current state are true without shape change
if (prev_pre_allocated_outputs_enabled && pre_allocated_outputs_enabled && !shape_changed) {
if (old_pre_allocated_outputs && new_pre_allocated_output && !shape_changed) {
can_use_pre_allocated_outputs = true;
}
prev_cudagraphs_enabled = cudagraphs_enabled;
prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled;
old_cudagraphs = new_cudagraphs;
old_pre_allocated_outputs = new_pre_allocated_output;

RuntimeStates values = {need_cudagraphs_record, can_use_pre_allocated_outputs};
return values;
return {need_cudagraphs_record, can_use_pre_allocated_outputs};
}
};

Expand Down
8 changes: 5 additions & 3 deletions core/runtime/execute_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,11 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
bool shape_changed = _validate_shapes(inputs, compiled_engine);

// Whether cudagraphs needs to record the graph on this pass
RuntimeStates states = compiled_engine->runtime_states.validate_states(
auto result = compiled_engine->runtime_states.set_runtime_states(
CUDAGRAPHS_MODE, compiled_engine->use_pre_allocated_outputs, shape_changed);
bool need_cudagraphs_record = states.need_cudagraphs_record;

bool need_cudagraphs_record = std::get<0>(result);
bool can_use_pre_allocated_outputs = std::get<1>(result);

if (!CUDAGRAPHS_MODE || shape_changed) {
compiled_engine->cudagraph.reset();
Expand Down Expand Up @@ -290,7 +292,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
output_profiler_guard =
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->output_profile_path);
}
if (states.can_use_pre_allocated_outputs) {
if (can_use_pre_allocated_outputs) {
outputs = compiled_engine->pre_allocated_outputs;
} else {
outputs = create_output_tensors(compiled_engine);
Expand Down
4 changes: 3 additions & 1 deletion examples/dynamo/pre_allocated_output_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ def test_module_perf(model, *input):
# Enable/Disable pre-allocated output buffer feature using runtime api
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# We can enable the pre-allocated output buffer with a context manager
# Enable pre-allocated output buffer using a context manager
with torch_tensorrt.runtime.enable_pre_allocated_outputs(optimized_model):
out_trt = optimized_model(*inputs)
# Subsequent inferences can use the pre-allocated output buffer (no shape change)
out_trt = optimized_model(*inputs)

# Alternatively, we can enable the feature using a context object
pre_allocated_output_ctx = torch_tensorrt.runtime.enable_pre_allocated_outputs(
Expand Down
22 changes: 12 additions & 10 deletions py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@


class TorchTRTRuntimeStates:
def __init__(self, cudagraphs_enabled: bool, pre_allocated_outputs_enabled: bool):
self.prev_cudagraphs_enabled = cudagraphs_enabled
self.prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled
def __init__(self, new_cudagraphs: bool, new_pre_allocated_output: bool):
# Indicates whether CUDAGraphs were enabled in the previous execute_engine
self.old_cudagraphs = new_cudagraphs
# Indicates whether pre-allocated output was enabled in the previous execute_engine
self.old_pre_allocated_outputs = new_pre_allocated_output

def validate_states(
self,
cudagraphs_enabled: bool,
pre_allocated_outputs_enabled: bool,
new_cudagraphs: bool,
new_pre_allocated_output: bool,
shape_changed: bool,
) -> Tuple[bool, bool]:
# Evaluates whether certain conditions are met to enable CUDA Graph recording or to reuse pre-allocated outputs
Expand All @@ -40,19 +42,19 @@ def validate_states(
can_use_pre_allocated_outputs = False

# Cudagraphs record is required if cudagraphs_enabled is switched to True regardless of shape change
if cudagraphs_enabled and (not self.prev_cudagraphs_enabled or shape_changed):
if new_cudagraphs and (not self.old_cudagraphs or shape_changed):
need_cudagraphs_record = True

# Pre-allocated output can be used when previous and current state are true without shape change
if (
self.prev_pre_allocated_outputs_enabled
and pre_allocated_outputs_enabled
self.old_pre_allocated_outputs
and new_pre_allocated_output
and (not shape_changed)
):
can_use_pre_allocated_outputs = True

self.prev_cudagraphs_enabled = cudagraphs_enabled
self.prev_pre_allocated_outputs_enabled = pre_allocated_outputs_enabled
self.old_cudagraphs = new_cudagraphs
self.old_pre_allocated_outputs = new_pre_allocated_output

return need_cudagraphs_record, can_use_pre_allocated_outputs

Expand Down

0 comments on commit b3bf3b7

Please sign in to comment.