Make tensors contigous + setup cuda streams

shader-slang · Jan 16, 2025 · 10c8d90 · 10c8d90
1 parent 09fbf8a
commit 10c8d90
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 6 deletions.
diff --git a/slangpy/torchintegration/torchfunction.py b/slangpy/torchintegration/torchfunction.py
@@ -23,7 +23,7 @@ def unpack_arg(arg: Any, tensors: list[torch.Tensor]) -> Any:
         arg = [unpack_arg(v, tensors) for v in arg]
     if isinstance(arg, torch.Tensor):
         id = len(tensors)
-        tensors.append(arg)
+        tensors.append(arg.contiguous())
         arg = WrappedTensor(id=id)
     return arg
 
@@ -84,19 +84,27 @@ def forward(
         ctx.unpacked_args = unpacked_args
         ctx.unpacked_kwargs = unpacked_kwargs
 
+        # Gather streams from tensors
+        streams: set[int] = set()
+        for tensor in tensors:
+            if tensor.is_cuda:
+                streams.add(torch.cuda.current_stream(tensor.device).cuda_stream)
+
         # Fill out the tensors before passing to the function
         populate_tensor_refs((unpacked_args, unpacked_kwargs), tensors)
 
         # Sync device with cuda
-        spy_function.module.device.sync_to_cuda()
+        for stream in streams:
+            spy_function.module.device.sync_to_cuda(stream)
 
         # Get the result (will be a slangpy wrapped tensor)
         wrapped_tensor = spy_function(*unpacked_args, **unpacked_kwargs)
         assert isinstance(wrapped_tensor, WrappedTensor)
         result = wrapped_tensor.primal
 
         # Sync cuda with device
-        spy_function.module.device.sync_to_device()
+        for stream in streams:
+            spy_function.module.device.sync_to_device(stream)
 
         # Clear the tensors after passing to the function
         clear_tensor_refs((unpacked_args, unpacked_kwargs))
@@ -119,6 +127,15 @@ def backward(ctx: Any, *args: torch.Tensor):
         result = WrappedTensor(ctx.saved_tensors[-1])
         result.grad_in = WrappedTensor(result_grad_tensor)
 
+        # Gather streams from tensors (both saved tensors + args)
+        streams: set[int] = set()
+        for tensor in ctx.saved_tensors:
+            if tensor.is_cuda:
+                streams.add(torch.cuda.current_stream(tensor.device).cuda_stream)
+        for arg in args:
+            if arg.is_cuda:
+                streams.add(torch.cuda.current_stream(arg.device).cuda_stream)
+
         # Fill out the tensors before passing to the function
         populate_tensor_refs((unpacked_args, unpacked_kwargs), ctx.saved_tensors)
 
@@ -128,14 +145,21 @@ def backward(ctx: Any, *args: torch.Tensor):
         gradients: list[Optional[torch.Tensor]] = []
         alloc_gradients((unpacked_args, unpacked_kwargs), gradients)
 
+        # Gather streams from gradients
+        for grad in gradients:
+            if grad is not None:
+                streams.add(torch.cuda.current_stream(grad.device).cuda_stream)
+
         # Sync device with cuda
-        spy_function.module.device.sync_to_cuda()
+        for stream in streams:
+            spy_function.module.device.sync_to_cuda(stream)
 
         # Run backwards pass
         spy_function.bwds(*unpacked_args, **unpacked_kwargs, _result=result)
 
         # Sync cuda with device
-        spy_function.module.device.sync_to_device()
+        for stream in streams:
+            spy_function.module.device.sync_to_device(stream)
 
         # Clear the tensors after passing to the function
         clear_tensor_refs((unpacked_args, unpacked_kwargs))

diff --git a/slangpy/torchintegration/wrappedtensor.py b/slangpy/torchintegration/wrappedtensor.py
@@ -8,7 +8,7 @@
 
 from slangpy.backend.slangpynativeemulation import AccessType, CallContext, CallMode, Shape
 from slangpy.bindings.boundvariableruntime import BoundVariableRuntime
-from slangpy.bindings.marshall import ReturnContext
+from slangpy.bindings.marshall import Marshall, ReturnContext
 from slangpy.bindings.typeregistry import PYTHON_SIGNATURES, PYTHON_TYPES
 from slangpy.builtin.tensor import TensorMarshall, is_nested_array
 from slangpy.reflection.reflectiontypes import SlangProgramLayout, SlangType, ScalarType
@@ -174,3 +174,12 @@ def hash_tensor(value: Any) -> str:
 
 PYTHON_TYPES[WrappedTensor] = create_tensor_marshall
 PYTHON_SIGNATURES[WrappedTensor] = hash_tensor
+
+
+def error_tensor_marshall(layout: SlangProgramLayout, value: Any):
+    raise ValueError(f"torch.Tensor types can not be directly passed to SlangPy. Either use the \
+                     pytorch integration (via TorchModule/TorchStruct/TorchFunction) or use a SlangPy \
+                     tensor type.")
+
+
+PYTHON_TYPES[torch.Tensor] = error_tensor_marshall