Skip to content

Commit

Permalink
gpu: More efficient light texture updates.
Browse files Browse the repository at this point in the history
Instead of issuing one `Queue::write_texture()` per cube, batch the data
into a single `Buffer` and copy from it. This is significantly faster,
at least on the CPU side; the time spent on heavy updates has been cut
from ~5 ms to ~1 ms.

Future work: use `StagingBelt` instead of using `write_buffer()`.
To do this optimally, `StagingBelt` will need to be modified to allow
us accessing its buffers to issue a `copy_buffer_to_texture` instead of
it issuing a `copy_buffer_to_buffer`.
  • Loading branch information
kpreid committed Oct 26, 2023
1 parent d3bb23d commit f4264c9
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions all-is-cubes-gpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ futures-channel = { workspace = true, features = ["sink"] }
futures-core = { workspace = true }
futures-util = { workspace = true, features = ["sink"] }
instant = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
once_cell = { workspace = true }
rand = { workspace = true }
Expand Down
92 changes: 83 additions & 9 deletions all-is-cubes-gpu/src/in_wgpu/space.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::sync::{Arc, Mutex, Weak};
use all_is_cubes::camera::{Camera, Flaws};
use all_is_cubes::chunking::ChunkPos;
use all_is_cubes::content::palette;
use all_is_cubes::euclid::vec3;
use all_is_cubes::listen::{Listen as _, Listener};
use all_is_cubes::math::{
Cube, Face6, FaceMap, FreeCoordinate, GridAab, GridCoordinate, GridPoint, GridVector, Rgb,
Expand All @@ -18,7 +19,9 @@ use all_is_cubes_mesh::dynamic::{ChunkedSpaceMesh, RenderDataUpdate};
use all_is_cubes_mesh::{DepthOrdering, IndexSlice};

use crate::in_wgpu::frame_texture::FramebufferTextures;
use crate::in_wgpu::glue::{size_vector_to_extent, to_wgpu_index_format, write_texture_by_aab};
use crate::in_wgpu::glue::{
point_to_origin, size_vector_to_extent, to_wgpu_index_format, write_texture_by_aab,
};
use crate::in_wgpu::pipelines::Pipelines;
use crate::in_wgpu::vertex::{WgpuInstanceData, WgpuLinesVertex};
use crate::in_wgpu::{
Expand Down Expand Up @@ -186,13 +189,9 @@ impl<I: time::Instant> SpaceRenderer<I> {
let mut light_update_count = 0;
if let Some(set) = &mut todo.light {
// TODO: work in larger, ahem, chunks
for cube in set.drain() {
light_update_count += self.light_texture.update(
queue,
space,
GridAab::from_lower_size(cube, [1, 1, 1]),
);
}
light_update_count +=
self.light_texture
.update_scatter(device, queue, space, set.drain());
} else {
light_update_count += self.light_texture.update_all(queue, space);
todo.light = Some(HashSet::new());
Expand Down Expand Up @@ -665,9 +664,14 @@ pub(in crate::in_wgpu) struct SpaceLightTexture {
texture_view: wgpu::TextureView,
/// The region of cube coordinates for which there are valid texels.
texture_bounds: GridAab,
/// Temporary storage for updated light texels to be copied into the texture.
copy_buffer: wgpu::Buffer,
}

impl SpaceLightTexture {
const COPY_BUFFER_TEXELS: usize = 1024;
const COMPONENTS: usize = 4;

/// Construct a new `SpaceLightTexture` for the specified size of [`Space`],
/// with no data.
pub fn new(label_prefix: &str, device: &wgpu::Device, bounds: GridAab) -> Self {
Expand All @@ -694,12 +698,18 @@ impl SpaceLightTexture {
texture_view: texture.create_view(&wgpu::TextureViewDescriptor::default()),
texture,
texture_bounds,
copy_buffer: device.create_buffer(&wgpu::BufferDescriptor {
label: Some(&format!("{label_prefix} space light copy buffer")),
size: u64::try_from(Self::COPY_BUFFER_TEXELS * Self::COMPONENTS).unwrap(),
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
}),
}
}

/// Copy the specified region of light data.
pub fn update(&mut self, queue: &wgpu::Queue, space: &Space, region: GridAab) -> usize {
let mut data: Vec<[u8; 4]> = Vec::with_capacity(region.volume());
let mut data: Vec<[u8; Self::COMPONENTS]> = Vec::with_capacity(region.volume());
// TODO: Enable circular operation and eliminate the need for the offset of the
// coordinates (texture_bounds.lower_bounds() and light_offset in the shader)
// by doing a coordinate wrap-around -- the shader and the Space will agree
Expand Down Expand Up @@ -728,6 +738,70 @@ impl SpaceLightTexture {
self.texture_bounds.volume()
}

/// Copy many individual cubes of light data.
pub fn update_scatter(
&mut self,
device: &wgpu::Device,
queue: &wgpu::Queue,
space: &Space,
cubes: impl IntoIterator<Item = Cube>,
) -> usize {
let mut total_count = 0;

// Break into batches of our buffer size.
for cube_batch in
itertools::Itertools::chunks(cubes.into_iter(), Self::COPY_BUFFER_TEXELS).into_iter()
{
let mut data: [[u8; Self::COMPONENTS]; Self::COPY_BUFFER_TEXELS] =
[[0; Self::COMPONENTS]; Self::COPY_BUFFER_TEXELS];
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("space light scatter-copy"),
});
let mut batch_count = 0;

for (index, cube) in cube_batch.into_iter().enumerate() {
data[index] = space.get_lighting(cube).as_texel();

// TODO: When compute shaders are available, use a compute shader to do these
// scattered writes instead of issuing individual commands.
encoder.copy_buffer_to_texture(
wgpu::ImageCopyBuffer {
buffer: &self.copy_buffer,
layout: wgpu::ImageDataLayout {
offset: (index * Self::COMPONENTS) as u64,
bytes_per_row: None,
rows_per_image: None,
},
},
wgpu::ImageCopyTexture {
texture: &self.texture,
mip_level: 0,
origin: point_to_origin(cube.lower_bounds() + self.light_lookup_offset()),
aspect: wgpu::TextureAspect::All,
},
size_vector_to_extent(vec3(1, 1, 1)),
);

batch_count += 1;
total_count += 1;
}

// TODO: use `StagingBelt` to write buffer instead.
// To do this optimally, `StagingBelt` will need to be modified to allow
// us accessing its buffers to issue a `copy_buffer_to_texture` instead of
// it issuing a `copy_buffer_to_buffer`.
queue.write_buffer(
&self.copy_buffer,
0,
bytemuck::cast_slice::<[u8; Self::COMPONENTS], u8>(&data[..batch_count]),
);

queue.submit([encoder.finish()]);
}

total_count
}

fn light_lookup_offset(&self) -> GridVector {
-self.texture_bounds.lower_bounds().to_vector()
}
Expand Down

0 comments on commit f4264c9

Please sign in to comment.