diff --git a/tests/cpp/test_resize.cpp b/tests/cpp/test_resize.cpp index ccd0cf3926d..e7ea0f6b651 100644 --- a/tests/cpp/test_resize.cpp +++ b/tests/cpp/test_resize.cpp @@ -4130,7 +4130,7 @@ TEST_F(ResizeTest, VectorizeFactorTwo) { FusionExecutorCache executor_cache(std::move(fusion_ptr)); auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); - auto ref = at::pad(t0, {4, 4}); + auto ref = at::pad(t0, {2, 2}); NVF_CHECK(ref.equal(cg_outputs[0])); } @@ -4161,4 +4161,33 @@ TEST_F(ResizeTest, UnrollNonInnermost) { NVF_CHECK(ref.equal(cg_outputs[0])); } +TEST_F(ResizeTest, PadAndCacheUses) { + auto fusion_ptr = std::make_unique(); + auto& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + const std::vector shape({1024L * 1024L}); + + // Using a concrete tensor to avoid dynamic reshape + auto tv0 = makeContigConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = pad(tv0, {IrBuilder::create(4L), IrBuilder::create(4L)}); + fusion.addOutput(tv1); + auto tv2 = relu(tv0); + fusion.addOutput(tv2); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + auto t0 = at::randn(shape, options); + std::vector aten_inputs({t0}); + + FusionExecutorCache executor_cache(std::move(fusion_ptr)); + auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); + + auto ref_0 = at::pad(t0, {4, 4}); + NVF_CHECK(ref_0.equal(cg_outputs[0])); + + auto ref_1 = at::relu(t0); + NVF_CHECK(ref_1.equal(cg_outputs[1])); +} } // namespace nvfuser