diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 48321459..33eaa498 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -19,7 +19,7 @@ endmacro() # d3d12 add_library(d3d12 INTERFACE) -target_link_libraries(d3d12 INTERFACE dxgi.lib d3d12.lib) +target_link_libraries(d3d12 INTERFACE dxgi.lib d3d12.lib dxguid.lib) # nanobind diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84ce7045..ba9c9604 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -91,6 +91,7 @@ target_sources(sgl PRIVATE sgl/device/framebuffer.h sgl/device/fwd.h sgl/device/helpers.h + sgl/device/helpers.cpp sgl/device/input_layout.cpp sgl/device/input_layout.h sgl/device/kernel.cpp diff --git a/src/sgl/device/helpers.cpp b/src/sgl/device/helpers.cpp new file mode 100644 index 00000000..3ce2c1da --- /dev/null +++ b/src/sgl/device/helpers.cpp @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 +#include "helpers.h" + +#include "sgl/core/config.h" +#include "sgl/core/macros.h" + +#include +#include + +#if SGL_HAS_D3D12 +#include +#include +#endif + +namespace sgl { + + +// Reads last error from graphics layer. +std::string get_last_gfx_layer_error() +{ +#if SGL_HAS_D3D12 + IDXGIDebug* dxgiDebug = nullptr; + DXGIGetDebugInterface1(0, IID_PPV_ARGS(&dxgiDebug)); + if (!dxgiDebug) + return ""; + + IDXGIInfoQueue* dxgiInfoQueue = nullptr; + dxgiDebug->QueryInterface(IID_PPV_ARGS(&dxgiInfoQueue)); + if (!dxgiInfoQueue) + return ""; + + UINT64 messageCount = dxgiInfoQueue->GetNumStoredMessages(DXGI_DEBUG_ALL); + if (messageCount == 0) + return ""; + + SIZE_T messageLength = 0; + dxgiInfoQueue->GetMessage(DXGI_DEBUG_ALL, messageCount - 1, nullptr, &messageLength); + DXGI_INFO_QUEUE_MESSAGE* pMessage = (DXGI_INFO_QUEUE_MESSAGE*)malloc(messageLength); + dxgiInfoQueue->GetMessage(DXGI_DEBUG_ALL, messageCount - 1, pMessage, &messageLength); + auto res = std::string(pMessage->pDescription); + free(pMessage); + return res; +#else + // TODO: Get useful error information for other platforms if possible + return ""; +#endif +} + +// Builds the user friendly message that is passed into a slang failure exception, +// used by SLANG_CALL. +std::string build_slang_failed_message(const char* call, SlangResult result) +{ + auto msg = std::format("Slang call {} failed with error: {}\n", call, result); + if (static_cast(result) >= 0x80000000U) { + std::string gfx_error = get_last_gfx_layer_error(); + if (!gfx_error.empty()) { + msg += "\nLast graphics layer error:\n" + gfx_error; + } + } + return msg; +} + +} // namespace sgl diff --git a/src/sgl/device/helpers.h b/src/sgl/device/helpers.h index 6b80bcbd..d515d029 100644 --- a/src/sgl/device/helpers.h +++ b/src/sgl/device/helpers.h @@ -5,10 +5,17 @@ #include "sgl/core/error.h" #include +#include + + +namespace sgl { +SGL_API std::string build_slang_failed_message(const char* call, SlangResult result); +} #define SLANG_CALL(call) \ { \ SlangResult result_ = call; \ - if (SLANG_FAILED(result_)) \ - SGL_THROW("Slang call {} failed with error: {}", #call, result_); \ + if (SLANG_FAILED(result_)) { \ + SGL_THROW(build_slang_failed_message(#call, result_)); \ + } \ } diff --git a/src/sgl/device/python/types.cpp b/src/sgl/device/python/types.cpp index 42d5c5e0..39b9a19b 100644 --- a/src/sgl/device/python/types.cpp +++ b/src/sgl/device/python/types.cpp @@ -48,8 +48,22 @@ SGL_DICT_TO_DESC_FIELD(enable_conservative_rasterization, bool) SGL_DICT_TO_DESC_FIELD(forced_sample_count, uint32_t) SGL_DICT_TO_DESC_END() +SGL_DICT_TO_DESC_BEGIN(AspectBlendDesc) +SGL_DICT_TO_DESC_FIELD(src_factor, BlendFactor) +SGL_DICT_TO_DESC_FIELD(dst_factor, BlendFactor) +SGL_DICT_TO_DESC_FIELD(op, BlendOp) +SGL_DICT_TO_DESC_END() + +SGL_DICT_TO_DESC_BEGIN(TargetBlendDesc) +SGL_DICT_TO_DESC_FIELD(enable_blend, bool) +SGL_DICT_TO_DESC_FIELD_DICT(color, AspectBlendDesc) +SGL_DICT_TO_DESC_FIELD_DICT(alpha, AspectBlendDesc) +SGL_DICT_TO_DESC_FIELD(logic_op, LogicOp) +SGL_DICT_TO_DESC_FIELD(write_mask, RenderTargetWriteMask) +SGL_DICT_TO_DESC_END() + SGL_DICT_TO_DESC_BEGIN(BlendDesc) -SGL_DICT_TO_DESC_FIELD(targets, std::vector) +SGL_DICT_TO_DESC_FIELD_LIST(targets, TargetBlendDesc) SGL_DICT_TO_DESC_FIELD(alpha_to_coverage_enable, bool) SGL_DICT_TO_DESC_END() @@ -178,12 +192,20 @@ SGL_PY_EXPORT(device_types) nb::class_(m, "AspectBlendDesc", D(AspectBlendDesc)) .def(nb::init<>()) + .def( + "__init__", + [](AspectBlendDesc* self, nb::dict dict) { new (self) AspectBlendDesc(dict_to_AspectBlendDesc(dict)); } + ) .def_rw("src_factor", &AspectBlendDesc::src_factor, D(AspectBlendDesc, src_factor)) .def_rw("dst_factor", &AspectBlendDesc::dst_factor, D(AspectBlendDesc, dst_factor)) .def_rw("op", &AspectBlendDesc::op, D(AspectBlendDesc, op)); nb::class_(m, "TargetBlendDesc", D(TargetBlendDesc)) .def(nb::init<>()) + .def( + "__init__", + [](TargetBlendDesc* self, nb::dict dict) { new (self) TargetBlendDesc(dict_to_TargetBlendDesc(dict)); } + ) .def_rw("color", &TargetBlendDesc::color, D(TargetBlendDesc, color)) .def_rw("alpha", &TargetBlendDesc::alpha, D(TargetBlendDesc, alpha)) .def_rw("enable_blend", &TargetBlendDesc::enable_blend, D(TargetBlendDesc, enable_blend)) @@ -192,6 +214,7 @@ SGL_PY_EXPORT(device_types) nb::class_(m, "BlendDesc", D(BlendDesc)) .def(nb::init<>()) + .def("__init__", [](BlendDesc* self, nb::dict dict) { new (self) BlendDesc(dict_to_BlendDesc(dict)); }) .def_rw("targets", &BlendDesc::targets, D(BlendDesc, targets)) .def_rw( "alpha_to_coverage_enable", diff --git a/src/sgl/device/tests/test_pipeline.py b/src/sgl/device/tests/test_pipeline.py new file mode 100644 index 00000000..53549c11 --- /dev/null +++ b/src/sgl/device/tests/test_pipeline.py @@ -0,0 +1,801 @@ +import sgl +import pytest +import numpy as np +import sys +from pathlib import Path + +sys.path.append(str(Path(__file__).parent)) +import helpers + + +class PipelineTestContext: + def __init__(self, device_type, size=128) -> None: + self.device = helpers.get_device(type=device_type) + self.output_texture = self.device.create_texture( + format=sgl.Format.rgba32_float, + width=size, + height=size, + usage=sgl.ResourceUsage.unordered_access + | sgl.ResourceUsage.shader_resource + | sgl.ResourceUsage.render_target, + debug_name="render_texture", + ) + self.count_buffer = self.device.create_buffer( + usage=sgl.ResourceUsage.unordered_access + | sgl.ResourceUsage.shader_resource, + size=16, + debug_name="count_buffer", + data=np.array([0, 0, 0, 0], dtype=np.uint32), + ) + + self.clear_kernel = self.device.create_compute_kernel( + self.device.load_program("test_pipeline_utils.slang", ["clear"]) + ) + self.count_kernel = self.device.create_compute_kernel( + self.device.load_program("test_pipeline_utils.slang", ["count"]) + ) + + self.clear() + + def clear(self): + self.clear_kernel.dispatch( + thread_count=[self.output_texture.width, self.output_texture.height, 1], + render_texture=self.output_texture, + ) + + def count(self): + self.count_buffer.from_numpy(np.array([0, 0, 0, 0], dtype=np.uint32)) + self.count_kernel.dispatch( + thread_count=[self.output_texture.width, self.output_texture.height, 1], + render_texture=self.output_texture, + count_buffer=self.count_buffer, + ) + + def expect_counts(self, expected): + self.count() + count = self.count_buffer.to_numpy().view(np.uint32) + assert np.all(count == expected) + + def create_quad_mesh(self): + vertices = np.array( + [-1, -1, -1, 1, -1, -1, -1, 1, -1, 1, 1, -1], dtype=np.float32 + ) + indices = np.array([0, 1, 2, 1, 3, 2], dtype=np.uint32) + + vertex_buffer = self.device.create_buffer( + usage=sgl.ResourceUsage.shader_resource, + debug_name="vertex_buffer", + data=vertices, + ) + input_layout = self.device.create_input_layout( + input_elements=[ + { + "semantic_name": "POSITION", + "semantic_index": 0, + "format": sgl.Format.rgb32_float, + "offset": 0, + }, + ], + vertex_streams=[{"stride": 12}], + ) + index_buffer = self.device.create_buffer( + usage=sgl.ResourceUsage.shader_resource, + debug_name="index_buffer", + data=indices, + ) + + return vertex_buffer, index_buffer, input_layout + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_clear_and_count(device_type): + ctx = PipelineTestContext(device_type) + ctx.expect_counts([0, 0, 0, 0]) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_compute_set_square(device_type): + ctx = PipelineTestContext(device_type) + prog = ctx.device.load_program("test_pipeline_utils.slang", ["setcolor"]) + set_kernel = ctx.device.create_compute_kernel(prog) + + pos = sgl.int2(32, 32) + size = sgl.int2(16, 16) + set_kernel.dispatch( + thread_count=[ctx.output_texture.width, ctx.output_texture.height, 1], + render_texture=ctx.output_texture, + pos=pos, + size=size, + color=sgl.float4(1, 0, 0, 1), + ) + + area = size.x * size.y + ctx.expect_counts([area, 0, 0, area]) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_compute_set_and_overwrite(device_type): + ctx = PipelineTestContext(device_type) + prog = ctx.device.load_program("test_pipeline_utils.slang", ["setcolor"]) + set_kernel = ctx.device.create_compute_kernel(prog) + + pos1 = sgl.int2(0, 0) + size1 = sgl.int2(128, 128) + set_kernel.dispatch( + thread_count=[ctx.output_texture.width, ctx.output_texture.height, 1], + render_texture=ctx.output_texture, + pos=pos1, + size=size1, + color=sgl.float4(1, 0, 0, 0), + ) + + pos2 = sgl.int2(32, 32) + size2 = sgl.int2(16, 16) + set_kernel.dispatch( + thread_count=[ctx.output_texture.width, ctx.output_texture.height, 1], + render_texture=ctx.output_texture, + pos=pos2, + size=size2, + color=sgl.float4(0, 1, 0, 0), + ) + + area1 = size1.x * size1.y + area2 = size2.x * size2.y + ctx.expect_counts([area1 - area2, area2, 0, 0]) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_gfx_clear(device_type): + ctx = PipelineTestContext(device_type) + + command_buffer = ctx.device.create_command_buffer() + command_buffer.clear_resource_view( + ctx.output_texture.get_rtv(), [1.0, 0.0, 1.0, 0.0] + ) + command_buffer.submit() + + area = ctx.output_texture.width * ctx.output_texture.height + + ctx.expect_counts([area, 0, area, 0]) + + +class GfxContext: + def __init__(self, ctx: PipelineTestContext) -> None: + self.ctx = ctx + self.program = ctx.device.load_program( + "test_pipeline_utils.slang", ["vertex_main", "fragment_main"] + ) + self.vertex_buffer, self.index_buffer, self.input_layout = ( + ctx.create_quad_mesh() + ) + self.framebuffer = ctx.device.create_framebuffer( + render_targets=[ctx.output_texture.get_rtv()] + ) + + # Draw a quad with the given pipeline and color, optionally clearing to black first. + # The quad is [-1,-1]->[1,1] so if offset/scale aren't specified will fill the whole screen. + def draw( + self, + pipeline: sgl.Pipeline, + vert_offset=sgl.float2(0, 0), + vert_scale=sgl.float2(1, 1), + vert_z=0.0, + color=sgl.float4(0, 0, 0, 0), + viewport: sgl.Viewport = None, + clear=True, + ): + command_buffer = self.ctx.device.create_command_buffer() + with command_buffer.encode_render_commands(self.framebuffer) as encoder: + if clear: + command_buffer.clear_resource_view( + self.ctx.output_texture.get_rtv(), [0.0, 0.0, 0.0, 1.0] + ) + if viewport: + encoder.set_viewport_and_scissor_rect(viewport) + else: + encoder.set_viewport_and_scissor_rect( + { + "width": self.ctx.output_texture.width, + "height": self.ctx.output_texture.height, + } + ) + shader_object = encoder.bind_pipeline(pipeline) + cursor = sgl.ShaderCursor(shader_object) + cursor.vert_offset = vert_offset + cursor.vert_scale = vert_scale + cursor.vert_z = float(vert_z) + cursor.frag_color = color + encoder.set_vertex_buffer(0, self.vertex_buffer) + encoder.set_index_buffer(self.index_buffer, sgl.Format.r32_uint, 0) + encoder.set_primitive_topology(sgl.PrimitiveTopology.triangle_list) + encoder.draw_indexed(int(self.index_buffer.size / 4)) + command_buffer.submit() + + # Helper to create pipeline with given set of args + correct program/layouts. + def create_graphics_pipeline(self, **kwargs): + return self.ctx.device.create_graphics_pipeline( + program=self.program, + input_layout=self.input_layout, + framebuffer_layout=self.framebuffer.layout, + **kwargs, + ) + + # Helper to both create pipeline and then use it to draw quad. + def draw_graphics_pipeline( + self, + vert_offset=sgl.float2(0, 0), + vert_scale=sgl.float2(1, 1), + vert_z=0, + color=sgl.float4(0, 0, 0, 0), + clear=True, + viewport: sgl.Viewport = None, + **kwargs, + ): + pipeline = self.create_graphics_pipeline(**kwargs) + self.draw( + pipeline, + color=color, + clear=clear, + vert_offset=vert_offset, + vert_scale=vert_scale, + vert_z=vert_z, + viewport=viewport, + ) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_gfx_simple_primitive(device_type): + ctx = PipelineTestContext(device_type) + gfx = GfxContext(ctx) + + area = ctx.output_texture.width * ctx.output_texture.height + scale = sgl.float2(0.5) + + # Clear and fill red, then verify 1/4 pixels are red and all solid. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 0, 0, 1), + vert_scale=scale, + rasterizer={"cull_mode": sgl.CullMode.back}, + ) + ctx.expect_counts([int(area / 4), 0, 0, area]) + + # Repeat with no culling, so should get same result. + gfx.draw_graphics_pipeline( + color=sgl.float4(0, 1, 0, 1), + vert_scale=scale, + rasterizer={"cull_mode": sgl.CullMode.none}, + ) + ctx.expect_counts([0, int(area / 4), 0, area]) + + # Repeat with front face culling, so should get all black. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 1, 1, 1), + vert_scale=scale, + rasterizer={"cull_mode": sgl.CullMode.front}, + ) + ctx.expect_counts([0, 0, 0, area]) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_gfx_viewport(device_type): + ctx = PipelineTestContext(device_type) + gfx = GfxContext(ctx) + + area = ctx.output_texture.width * ctx.output_texture.height + scale = sgl.float2(0.5) + + # Clear and fill red, and verify it filled the whole screen. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 0, 0, 1), rasterizer={"cull_mode": sgl.CullMode.back} + ) + ctx.expect_counts([area, 0, 0, area]) + + # Use viewport to clear half the screen. + gfx.draw_graphics_pipeline( + color=sgl.float4(0, 1, 0, 1), + rasterizer={"cull_mode": sgl.CullMode.back}, + viewport=sgl.Viewport( + { + "width": int(ctx.output_texture.width / 2), + "height": ctx.output_texture.height, + } + ), + ) + ctx.expect_counts([0, int(area / 2), 0, area]) + + # Same using horiontal clip instead. + gfx.draw_graphics_pipeline( + color=sgl.float4(0, 1, 0, 1), + rasterizer={"cull_mode": sgl.CullMode.back}, + viewport=sgl.Viewport( + { + "width": ctx.output_texture.width, + "height": int(ctx.output_texture.height / 2), + } + ), + ) + ctx.expect_counts([0, int(area / 2), 0, area]) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_gfx_depth(device_type): + ctx = PipelineTestContext(device_type) + gfx = GfxContext(ctx) + + # Create a depth texture and re-create frame buffer that uses depth. + depth_texture = ctx.device.create_texture( + format=sgl.Format.d32_float, + width=ctx.output_texture.width, + height=ctx.output_texture.height, + usage=sgl.ResourceUsage.shader_resource | sgl.ResourceUsage.depth_stencil, + debug_name="depth_texture", + ) + gfx.framebuffer = ctx.device.create_framebuffer( + render_targets=[ctx.output_texture.get_rtv()], + depth_stencil=depth_texture.get_dsv(), + ) + + area = ctx.output_texture.width * ctx.output_texture.height + + # Manually clear both buffers and verify results. + command_buffer = ctx.device.create_command_buffer() + with command_buffer.encode_render_commands(gfx.framebuffer) as encoder: + command_buffer.clear_resource_view( + ctx.output_texture.get_rtv(), [0.0, 0.0, 0.0, 1.0] + ) + command_buffer.clear_resource_view(depth_texture.get_dsv(), 0.5, 0, True, True) + command_buffer.submit() + ctx.expect_counts([0, 0, 0, area]) + + # Write quad with z=0.25, which is close than the z buffer clear value of 0.5 so should come through. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 0, 0, 1), + clear=False, + vert_scale=sgl.float2(0.5), + vert_z=0.25, + rasterizer={"cull_mode": sgl.CullMode.back}, + depth_stencil={ + "depth_test_enable": True, + "depth_write_enable": True, + "depth_func": sgl.ComparisonFunc.less, + }, + ) + ctx.expect_counts([int(area / 4), 0, 0, area]) + + # Write a great big quad at z=0.75, which should do nothing. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 1, 1, 1), + clear=False, + vert_z=0.75, + rasterizer={"cull_mode": sgl.CullMode.back}, + depth_stencil={ + "depth_test_enable": True, + "depth_write_enable": True, + "depth_func": sgl.ComparisonFunc.less, + }, + ) + ctx.expect_counts([int(area / 4), 0, 0, area]) + + # Write a great big quad at z=0.4, which should overwrite the background but not the foreground. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 1, 1, 1), + clear=False, + vert_z=0.4, + rasterizer={"cull_mode": sgl.CullMode.back}, + depth_stencil={ + "depth_test_enable": True, + "depth_write_enable": True, + "depth_func": sgl.ComparisonFunc.less, + }, + ) + ctx.expect_counts([area, area - int(area / 4), area - int(area / 4), area]) + + # Write a great big quad at z=0.75 with depth func always, which should just blat the lot. + gfx.draw_graphics_pipeline( + color=sgl.float4(0, 0, 1, 1), + clear=False, + vert_z=0.75, + rasterizer={"cull_mode": sgl.CullMode.back}, + depth_stencil={ + "depth_test_enable": True, + "depth_write_enable": True, + "depth_func": sgl.ComparisonFunc.always, + }, + ) + ctx.expect_counts([0, 0, area, area]) + + # Quick check that the depth write happened correctly + dt = depth_texture.to_numpy() + assert np.all(dt == 0.75) + + # Try again at z=0.8, which should do nothing as z write was still enabled with the previous one. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 1, 1, 1), + clear=False, + vert_z=0.8, + rasterizer={"cull_mode": sgl.CullMode.back}, + depth_stencil={ + "depth_test_enable": True, + "depth_write_enable": True, + "depth_func": sgl.ComparisonFunc.less, + }, + ) + ctx.expect_counts([0, 0, area, area]) + + # Write out a full quad at z=0.25, with z write turned off, so should work but not affect z buffer. + gfx.draw_graphics_pipeline( + color=sgl.float4(1, 0, 0, 1), + clear=False, + vert_z=0.25, + rasterizer={"cull_mode": sgl.CullMode.back}, + depth_stencil={ + "depth_test_enable": True, + "depth_write_enable": True, + "depth_func": sgl.ComparisonFunc.less, + }, + ) + ctx.expect_counts([area, 0, 0, area]) + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +def test_gfx_blend(device_type): + ctx = PipelineTestContext(device_type) + gfx = GfxContext(ctx) + area = ctx.output_texture.width * ctx.output_texture.height + + # Clear and then draw semi transparent red quad, and should get 1/4 dark red pixels. + gfx.draw_graphics_pipeline( + clear=True, + color=sgl.float4(1, 0, 0, 0.5), + vert_scale=sgl.float2(0.5), + rasterizer={"cull_mode": sgl.CullMode.back}, + blend=sgl.BlendDesc( + { + "alpha_to_coverage_enable": False, + "targets": [ + { + "enable_blend": True, + "color": { + "src_factor": sgl.BlendFactor.src_alpha, + "dst_factor": sgl.BlendFactor.inv_src_alpha, + "op": sgl.BlendOp.add, + }, + "alpha": { + "src_factor": sgl.BlendFactor.zero, + "dst_factor": sgl.BlendFactor.one, + "op": sgl.BlendOp.add, + }, + } + ], + } + ), + ) + pixels = ctx.output_texture.to_numpy() + is_pixel_red = np.all(pixels[:, :, :3] == [0.5, 0, 0], axis=2) + assert np.sum(is_pixel_red) == int(area / 4) + + +# On Vulkan using 50% alpha coverage we get a checkerboard effect. +@pytest.mark.parametrize("device_type", [sgl.DeviceType.vulkan]) +def test_gfx_alpha_coverage(device_type): + ctx = PipelineTestContext(device_type) + gfx = GfxContext(ctx) + area = ctx.output_texture.width * ctx.output_texture.height + + # Clear and then draw semi transparent red quad, and should end up + # with 1/8 of the pixels red due to alpha coverage. + gfx.draw_graphics_pipeline( + clear=True, + color=sgl.float4(1, 0, 0, 0.5), + vert_scale=sgl.float2(0.5), + rasterizer={"cull_mode": sgl.CullMode.back}, + blend=sgl.BlendDesc( + { + "alpha_to_coverage_enable": True, + "targets": [ + { + "enable_blend": True, + "color": {"src_factor": sgl.BlendFactor.src_alpha}, + } + ], + } + ), + ) + + pixels = ctx.output_texture.to_numpy() + is_pixel_red = np.all(pixels[:, :, :3] == [0.5, 0, 0], axis=2) + assert np.sum(is_pixel_red) == int(area / 8) + + +class RayContext: + def __init__(self, ctx: PipelineTestContext) -> None: + self.ctx = ctx + + vertices = np.array([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0], dtype=np.float32) + indices = np.array([0, 1, 2, 1, 3, 2], dtype=np.uint32) + + vertex_buffer = ctx.device.create_buffer( + usage=sgl.ResourceUsage.shader_resource, + debug_name="vertex_buffer", + data=vertices, + ) + + index_buffer = ctx.device.create_buffer( + usage=sgl.ResourceUsage.shader_resource, + debug_name="index_buffer", + data=indices, + ) + + transform_buffer = ctx.device.create_buffer( + usage=sgl.ResourceUsage.shader_resource, + debug_name="transform_buffer", + data=sgl.float3x4.identity().to_numpy(), + ) + + blas_geometry_desc = sgl.RayTracingGeometryDesc() + blas_geometry_desc.type = sgl.RayTracingGeometryType.triangles + blas_geometry_desc.flags = sgl.RayTracingGeometryFlags.opaque + blas_geometry_desc.triangles.transform3x4 = transform_buffer.device_address + blas_geometry_desc.triangles.index_format = sgl.Format.r32_uint + blas_geometry_desc.triangles.vertex_format = sgl.Format.rgb32_float + blas_geometry_desc.triangles.index_count = indices.size + blas_geometry_desc.triangles.vertex_count = vertices.size // 3 + blas_geometry_desc.triangles.index_data = index_buffer.device_address + blas_geometry_desc.triangles.vertex_data = vertex_buffer.device_address + blas_geometry_desc.triangles.vertex_stride = vertices.itemsize * 3 + + blas_build_inputs = sgl.AccelerationStructureBuildInputs() + blas_build_inputs.kind = sgl.AccelerationStructureKind.bottom_level + blas_build_inputs.flags = sgl.AccelerationStructureBuildFlags.none + blas_build_inputs.geometry_descs = [blas_geometry_desc] + + blas_prebuild_info = ctx.device.get_acceleration_structure_prebuild_info( + blas_build_inputs + ) + + blas_scratch_buffer = ctx.device.create_buffer( + size=blas_prebuild_info.scratch_data_size, + usage=sgl.ResourceUsage.unordered_access, + debug_name="blas_scratch_buffer", + ) + + blas_buffer = ctx.device.create_buffer( + size=blas_prebuild_info.result_data_max_size, + usage=sgl.ResourceUsage.acceleration_structure, + debug_name="blas_buffer", + ) + + blas = ctx.device.create_acceleration_structure( + kind=sgl.AccelerationStructureKind.bottom_level, + buffer=blas_buffer, + size=blas_buffer.size, + ) + + command_buffer = ctx.device.create_command_buffer() + with command_buffer.encode_ray_tracing_commands() as encoder: + encoder.build_acceleration_structure( + inputs=blas_build_inputs, + dst=blas, + scratch_data=blas_scratch_buffer.device_address, + ) + command_buffer.submit() + + self.blas = blas + + def create_instances(self, instance_transforms: np.ndarray): + + instances: list[sgl.RayTracingInstanceDesc] = [] + for i, transform in enumerate(instance_transforms): + instance_desc = sgl.RayTracingInstanceDesc() + instance_desc.transform = transform + instance_desc.instance_id = i + instance_desc.instance_mask = 0xFF + instance_desc.instance_contribution_to_hit_group_index = 0 + instance_desc.flags = sgl.RayTracingInstanceFlags.none + instance_desc.acceleration_structure = self.blas.device_address + instances.append(instance_desc) + + instance_buffer = self.ctx.device.create_buffer( + usage=sgl.ResourceUsage.shader_resource, + debug_name="instance_buffer", + data=np.stack([i.to_numpy() for i in instances]), + ) + + tlas_build_inputs = sgl.AccelerationStructureBuildInputs() + tlas_build_inputs.kind = sgl.AccelerationStructureKind.top_level + tlas_build_inputs.flags = sgl.AccelerationStructureBuildFlags.none + tlas_build_inputs.desc_count = len(instances) + tlas_build_inputs.instance_descs = instance_buffer.device_address + + tlas_prebuild_info = self.ctx.device.get_acceleration_structure_prebuild_info( + tlas_build_inputs + ) + + tlas_scratch_buffer = self.ctx.device.create_buffer( + size=tlas_prebuild_info.scratch_data_size, + usage=sgl.ResourceUsage.unordered_access, + debug_name="tlas_scratch_buffer", + ) + + tlas_buffer = self.ctx.device.create_buffer( + size=tlas_prebuild_info.result_data_max_size, + usage=sgl.ResourceUsage.acceleration_structure, + debug_name="tlas_buffer", + ) + + tlas = self.ctx.device.create_acceleration_structure( + kind=sgl.AccelerationStructureKind.top_level, + buffer=tlas_buffer, + size=tlas_buffer.size, + ) + + command_buffer = self.ctx.device.create_command_buffer() + with command_buffer.encode_ray_tracing_commands() as encoder: + encoder.build_acceleration_structure( + inputs=tlas_build_inputs, + dst=tlas, + scratch_data=tlas_scratch_buffer.device_address, + ) + command_buffer.submit() + + return tlas + + def dispatch_ray_grid(self, tlas: sgl.AccelerationStructure, mode: str): + if mode == "compute": + self.dispatch_ray_grid_compute(tlas) + elif mode == "ray": + self.dispatch_ray_grid_rtp(tlas) + else: + raise ValueError(f"Unknown mode {mode}") + + def dispatch_ray_grid_compute(self, tlas: sgl.AccelerationStructure): + program = self.ctx.device.load_program("test_pipeline_utils.slang", ["raygrid"]) + kernel = self.ctx.device.create_compute_kernel(program) + kernel.dispatch( + thread_count=[ + self.ctx.output_texture.width, + self.ctx.output_texture.height, + 1, + ], + render_texture=self.ctx.output_texture, + tlas=tlas, + pos=sgl.int2(0, 0), + size=sgl.int2( + self.ctx.output_texture.width, self.ctx.output_texture.height + ), + dist=float(2), + ) + + def dispatch_ray_grid_rtp(self, tlas: sgl.AccelerationStructure): + program = self.ctx.device.load_program( + "test_pipeline_utils.slang", ["rt_ray_gen", "rt_miss", "rt_closest_hit"] + ) + pipeline = self.ctx.device.create_ray_tracing_pipeline( + program=program, + hit_groups=[ + sgl.HitGroupDesc( + hit_group_name="hit_group", closest_hit_entry_point="rt_closest_hit" + ) + ], + max_recursion=1, + max_ray_payload_size=16, + ) + + shader_table = self.ctx.device.create_shader_table( + program=program, + ray_gen_entry_points=["rt_ray_gen"], + miss_entry_points=["rt_miss"], + hit_group_names=["hit_group"], + ) + + command_buffer = self.ctx.device.create_command_buffer() + with command_buffer.encode_ray_tracing_commands() as encoder: + shader_object = encoder.bind_pipeline(pipeline) + cursor = sgl.ShaderCursor(shader_object) + cursor.rt_tlas = tlas + cursor.rt_render_texture = self.ctx.output_texture + encoder.dispatch_rays( + 0, + shader_table, + [self.ctx.output_texture.width, self.ctx.output_texture.height, 1], + ) + command_buffer.submit() + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +@pytest.mark.parametrize("mode", ["compute", "ray"]) +def test_raytrace_simple(device_type, mode): + ctx = PipelineTestContext( + device_type, + ) + rtx = RayContext(ctx) + + # Setup instance transform causes the [0-1] quad to cover the top left + # quarter of the screen. This is basically pixels 0-63, so we scale it up + # a bit to handle rounding issues. The quad is at z=1 so should be visible. + tf = sgl.math.mul( + sgl.math.matrix_from_translation(sgl.float3(-0.05, -0.05, 1)), + sgl.math.matrix_from_scaling(sgl.float3(63.1, 63.1, 1)), + ) + tf = sgl.float3x4(tf) + tlas = rtx.create_instances([tf]) + + # Load and run the ray tracing kernel that fires a grid of rays + # The grid covers the whole texture, and rays have length of 2 so + # should hit the quad and turn the pixels red. + rtx.dispatch_ray_grid(tlas, mode) + + # Check the 64x64 pixels are now red + pixels = ctx.output_texture.to_numpy() + is_pixel_red = np.all(pixels[:, :, :3] == [1, 0, 0], axis=2) + num_red = np.sum(is_pixel_red) + assert num_red == 4096 + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +@pytest.mark.parametrize("mode", ["compute", "ray"]) +def test_raytrace_two_instance(device_type, mode): + ctx = PipelineTestContext(device_type) + rtx = RayContext(ctx) + + # Ray trace against 2 instances, in top left and bottom right. + transforms = [] + transforms.append( + sgl.math.mul( + sgl.math.matrix_from_translation(sgl.float3(-0.05, -0.05, 1)), + sgl.math.matrix_from_scaling(sgl.float3(63.1, 63.1, 1)), + ) + ) + transforms.append( + sgl.math.mul( + sgl.math.matrix_from_translation(sgl.float3(64 - 0.05, 64 - 0.05, 1)), + sgl.math.matrix_from_scaling(sgl.float3(63.1, 63.1, 1)), + ) + ) + + tlas = rtx.create_instances([sgl.float3x4(x) for x in transforms]) + rtx.dispatch_ray_grid(tlas, mode) + + # Expect 2 64x64 squares, with red from 1st instance and green from 2nd. + pixels = ctx.output_texture.to_numpy() + is_pixel_red = np.all(pixels[:, :, :3] == [1, 0, 0], axis=2) + is_pixel_green = np.all(pixels[:, :, :3] == [0, 1, 0], axis=2) + assert np.sum(is_pixel_red) == 4096 + assert np.sum(is_pixel_green) == 4096 + + +@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) +@pytest.mark.parametrize("mode", ["compute", "ray"]) +def test_raytrace_closest_instance(device_type, mode): + ctx = PipelineTestContext(device_type) + rtx = RayContext(ctx) + + # Ray trace against 2 instances, slightly overlapping, + # with centre one closer. + transforms = [] + transforms.append( + sgl.math.mul( + sgl.math.matrix_from_translation(sgl.float3(-0.05, -0.05, 1)), + sgl.math.matrix_from_scaling(sgl.float3(63.1, 63.1, 1)), + ) + ) + transforms.append( + sgl.math.mul( + sgl.math.matrix_from_translation(sgl.float3(32 - 0.05, 32 - 0.05, 0.5)), + sgl.math.matrix_from_scaling(sgl.float3(63.1, 63.1, 1)), + ) + ) + + tlas = rtx.create_instances([sgl.float3x4(x) for x in transforms]) + rtx.dispatch_ray_grid(tlas, mode) + + # Expect full green square, and only 3/4 of red square. + pixels = ctx.output_texture.to_numpy() + is_pixel_red = np.all(pixels[:, :, :3] == [1, 0, 0], axis=2) + is_pixel_green = np.all(pixels[:, :, :3] == [0, 1, 0], axis=2) + assert np.sum(is_pixel_red) == 3072 + assert np.sum(is_pixel_green) == 4096 + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"]) diff --git a/src/sgl/device/tests/test_pipeline_utils.slang b/src/sgl/device/tests/test_pipeline_utils.slang new file mode 100644 index 00000000..b42a911f --- /dev/null +++ b/src/sgl/device/tests/test_pipeline_utils.slang @@ -0,0 +1,148 @@ + +[shader("compute")] +[numthreads(16, 16, 1)] +void clear(uint2 tid: SV_DispatchThreadID, RWTexture2D render_texture) +{ + uint2 dim; + render_texture.GetDimensions(dim.x, dim.y); + if (any(tid.xy >= dim)) + return; + render_texture[tid.xy] = float4(0); +} + +[shader("compute")] +[numthreads(16, 16, 1)] +void count(uint2 tid: SV_DispatchThreadID, Texture2D render_texture, RWByteAddressBuffer count_buffer) +{ + uint2 dim; + render_texture.GetDimensions(dim.x, dim.y); + if (any(tid.xy >= dim)) + return; + float4 val = render_texture[tid.xy]; + count_buffer.InterlockedAdd(0, val.x > 0 ? 1 : 0); + count_buffer.InterlockedAdd(4, val.y > 0 ? 1 : 0); + count_buffer.InterlockedAdd(8, val.z > 0 ? 1 : 0); + count_buffer.InterlockedAdd(12, val.w > 0 ? 1 : 0); +} + +[shader("compute")] +[numthreads(16, 16, 1)] +void setcolor( + uint2 tid: SV_DispatchThreadID, + RWTexture2D render_texture, + uniform int2 pos, + uniform int2 size, + uniform float4 color +) +{ + if (any(tid.xy >= size)) + return; + render_texture[tid.xy + pos] = color; +} + + +struct V2F { + float4 pos : SV_Position; +}; + +uniform float2 vert_offset; +uniform float2 vert_scale; +uniform float vert_z; + +[shader("vertex")] +V2F vertex_main(float3 pos: POSITION) +{ + V2F o; + o.pos = float4(pos.xy * vert_scale.xy + vert_offset.xy, vert_z, 1); + return o; +} + +uniform float4 frag_color; + +[shader("fragment")] +float4 fragment_main(V2F v) + : SV_Target +{ + return frag_color; +} + + +[shader("compute")] +[numthreads(16, 16, 1)] +void raygrid( + uint2 tid: SV_DispatchThreadID, + RWTexture2D render_texture, + RaytracingAccelerationStructure tlas, + uniform int2 pos, + uniform int2 size, + uniform float dist +) +{ + if (any(tid.xy >= size)) + return; + + RayDesc ray; + ray.Origin = float3(tid.xy, 0); + ray.Direction = float3(0, 0, 1); + ray.TMin = 0; + ray.TMax = 2; + + RayQuery q; + q.TraceRayInline(tlas, 0, 0xff, ray); + q.Proceed(); + if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) { + float4 color = float4(0, 0, 0, 1); + color[q.CommittedInstanceID() % 3] = 1; + render_texture[tid.xy] = color; + } else { + render_texture[tid.xy] = float4(0, 0, 0, 1); + } +} + +struct Payload { + float4 color; +} + +[shader("miss")] +void rt_miss(inout Payload payload) +{ + payload.color = float4(0, 0, 0, 1); +} + +[shader("closesthit")] +void rt_closest_hit(inout Payload payload, BuiltInTriangleIntersectionAttributes attribs) +{ + float4 col = float4(0, 0, 0, 1); + col[InstanceID() % 3] = 1; + payload.color = col; +} + +uniform RWTexture2D rt_render_texture; +uniform RaytracingAccelerationStructure rt_tlas; + +[shader("raygeneration")] +void rt_ray_gen() +{ + uint2 pixel = DispatchRaysIndex().xy; + + RayDesc ray; + ray.Origin = float3(pixel.xy, 0); + ray.Direction = float3(0, 0, 1); + ray.TMin = 0; + ray.TMax = 2; + + Payload payload = {}; + + TraceRay( + rt_tlas, + 0, + 0xff, + 0 /* RayContributionToHitGroupIndex */, + 0 /* MultiplierForGeometryContributionHitGroupIndex */, + 0 /* MissShaderIndex */, + ray, + payload + ); + + rt_render_texture[pixel] = payload.color; +}