Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
}

impl ShaderRuntime {
pub async fn run_per_pixel_adjust<T: BufferStruct>(&self, shaders: &Shaders<'_>, textures: List<Raster<GPU>>, args: Option<&T>) -> List<Raster<GPU>> {
pub async fn run_per_pixel_adjust<T: BufferStruct>(&self, shaders: &Shaders<'_>, textures: &[List<Raster<GPU>>], args: Option<&T>) -> List<Raster<GPU>> {
assert_eq!(shaders.input_images, textures.len());
let mut cache = self.per_pixel_adjust.pipeline_cache.lock().await;
let pipeline = cache
.entry(shaders.fragment_shader_name.to_owned())
Expand All @@ -54,11 +55,13 @@
pub struct Shaders<'a> {
pub wgsl_shader: &'a str,
pub fragment_shader_name: &'a str,
pub input_images: usize,
pub has_uniform: bool,
}

pub struct PerPixelAdjustGraphicsPipeline {
name: String,
input_images: usize,
has_uniform: bool,
pipeline: wgpu::RenderPipeline,
}
Expand All @@ -76,46 +79,37 @@
source: ShaderSource::Wgsl(Cow::Borrowed(info.wgsl_shader)),
});

let entries: &[_] = if info.has_uniform {
&[
BindGroupLayoutEntry {
binding: 0,
visibility: ShaderStages::FRAGMENT,
ty: BindingType::Buffer {
ty: BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
BindGroupLayoutEntry {
binding: 1,
visibility: ShaderStages::FRAGMENT,
ty: BindingType::Texture {
sample_type: TextureSampleType::Float { filterable: false },
view_dimension: TextureViewDimension::D2,
multisampled: false,
},
count: None,
let mut binding_alloc = Counter::default();
let mut entries = Vec::new();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider pre-allocating the entries vector with the known capacity to avoid potential re-allocations as elements are added during pipeline creation.

Suggested change
let mut entries = Vec::new();
let mut entries = Vec::with_capacity(info.input_images + info.has_uniform as usize);

if info.has_uniform {
entries.push(BindGroupLayoutEntry {
binding: binding_alloc.alloc(),
visibility: ShaderStages::FRAGMENT,
ty: BindingType::Buffer {
ty: BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
]
} else {
&[BindGroupLayoutEntry {
binding: 0,
count: None,
});
}
for _ in 0..info.input_images {
entries.push(BindGroupLayoutEntry {
binding: binding_alloc.alloc(),
visibility: ShaderStages::FRAGMENT,
ty: BindingType::Texture {
sample_type: TextureSampleType::Float { filterable: false },
view_dimension: TextureViewDimension::D2,
multisampled: false,
},
count: None,
}]
};
});
}
let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor {
label: Some(&format!("PerPixelAdjust {name} PipelineLayout")),
bind_group_layouts: &[Some(&device.create_bind_group_layout(&BindGroupLayoutDescriptor {
label: Some(&format!("PerPixelAdjust {name} BindGroupLayout 0")),
entries,
entries: &entries,
}))],
..Default::default()
});
Expand Down Expand Up @@ -157,61 +151,73 @@
pipeline,
name,
has_uniform: info.has_uniform,
input_images: info.input_images,
}
}

pub fn dispatch(&self, context: &WgpuContext, textures: List<Raster<GPU>>, arg_buffer: Option<Buffer>) -> List<Raster<GPU>> {
pub fn dispatch(&self, context: &WgpuContext, in_textures: &[List<Raster<GPU>>], arg_buffer: Option<Buffer>) -> List<Raster<GPU>> {
assert_eq!(self.has_uniform, arg_buffer.is_some());
assert_eq!(self.input_images, in_textures.len());
let device = &context.device;
let name = self.name.as_str();

// Assumption: when we have multiple input images to our node, each input's List of images can have a different
// length. Only process the minimum between all input images, same as `impl Blend<Color> for List<Raster<CPU>>`.
let dispatch_cnt = match in_textures.iter().map(|t| t.len()).min() {
None => {
return List::new();
}
Some(e) => e,
};

let mut cmd = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some(&format!("{name} cmd encoder")),
});
let out = (0..textures.len())
.map(|index| {
let element = textures.element(index).unwrap();
let tex_in = &element.texture;
let view_in = tex_in.create_view(&TextureViewDescriptor::default());
let format = tex_in.format();

let entries: &[_] = if let Some(arg_buffer) = arg_buffer.as_ref() {
&[
BindGroupEntry {
binding: 0,
resource: BindingResource::Buffer(BufferBinding {
buffer: arg_buffer,
offset: 0,
size: None,
}),
},
BindGroupEntry {
binding: 1,
resource: BindingResource::TextureView(&view_in),
},
]
} else {
&[BindGroupEntry {
binding: 0,
let out = (0..dispatch_cnt)
.map(|dispatch_id| {
let mut binding_alloc = Counter::default();
let mut entries = Vec::new();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

In the dispatch loop, entries is allocated for every image in the list. Pre-allocating with with_capacity based on the number of input images and the uniform buffer presence will improve performance, especially when processing long lists of images.

Suggested change
let mut entries = Vec::new();
let mut entries = Vec::with_capacity(self.input_images + self.has_uniform as usize);

if let Some(arg_buffer) = arg_buffer.as_ref() {
entries.push(BindGroupEntry {
binding: binding_alloc.alloc(),
resource: BindingResource::Buffer(BufferBinding {
buffer: arg_buffer,
offset: 0,
size: None,
}),

Check warning on line 187 in node-graph/libraries/wgpu-executor/src/shader_runtime/per_pixel_adjust_runtime.rs

View workflow job for this annotation

GitHub Actions / rust-fmt

Diff in /home/runner/work/Graphite/Graphite/node-graph/libraries/wgpu-executor/src/shader_runtime/per_pixel_adjust_runtime.rs
});
}
let in_texture_views = in_textures.iter().map(|texture| {
let element = texture.element(dispatch_id).unwrap();
element.texture.create_view(&TextureViewDescriptor::default())
}).collect::<Vec<_>>();
for view_in in &in_texture_views {
entries.push(BindGroupEntry {
binding: binding_alloc.alloc(),
resource: BindingResource::TextureView(&view_in),
}]
};
});
}

let bind_group = device.create_bind_group(&BindGroupDescriptor {
label: Some(&format!("{name} bind group")),
// `get_bind_group_layout` allocates unnecessary memory, we could create it manually to not do that
layout: &self.pipeline.get_bind_group_layout(0),
entries,
entries: &entries,
});

// Assumption: The output texture has the same size and format as the first input texture. Like the
// blend node, that writes the output directly back into the first texture.
let outref_list = &in_textures[0];
let outref_tex = &outref_list.element(dispatch_id).unwrap().texture;
let tex_out = device.create_texture(&TextureDescriptor {
label: Some(&format!("{name} texture out")),
size: tex_in.size(),
size: outref_tex.size(),
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format,
format: outref_tex.format(),
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT,
view_formats: &[format],
view_formats: &[outref_tex.format()],
});

let view_out = tex_out.create_view(&TextureViewDescriptor::default());
Expand All @@ -233,11 +239,22 @@
rp.set_bind_group(0, Some(&bind_group), &[]);
rp.draw(0..3, 0..1);

let attributes = textures.clone_item_attributes(index);
let attributes = outref_list.clone_item_attributes(dispatch_id);
Item::from_parts(Raster::new(GPU { texture: tex_out }), attributes)
})
.collect::<List<_>>();
context.queue.submit([cmd.finish()]);
out
}
}

#[derive(Clone, Debug, Default)]
pub struct Counter(pub u32);

impl Counter {
pub fn alloc(&mut self) -> u32 {
let out = self.0;
self.0 += 1;
out
}
}
22 changes: 11 additions & 11 deletions node-graph/node-macro/src/shader_nodes/per_pixel_adjust.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,16 +248,15 @@ impl PerPixelAdjustCodegen<'_> {
is_data_field: false,
});

// find exactly one gpu_image field, runtime doesn't support more than 1 atm
let gpu_image_field = {
let mut iter = fields.iter().filter(|f| matches!(f.ty, ParsedFieldType::Regular(RegularParsedField { gpu_image: true, .. })));
match (iter.next(), iter.next()) {
(Some(v), None) => Ok(v),
(Some(_), Some(more)) => Err(syn::Error::new_spanned(&more.pat_ident, "No more than one parameter must be annotated with `#[gpu_image]`")),
(None, _) => Err(syn::Error::new_spanned(&self.parsed.fn_name, "At least one parameter must be annotated with `#[gpu_image]`")),
}?
};
let gpu_image = &gpu_image_field.pat_ident.ident;
// find gpu_image fields
let gpu_images = fields
.iter()
.filter_map(|f| match f.ty {
ParsedFieldType::Regular(RegularParsedField { gpu_image: true, .. }) => Some(&f.pat_ident.ident),
_ => None,
})
.collect::<Vec<_>>();
let input_images = gpu_images.len();

// uniform buffer struct construction
let has_uniform = self.has_uniform;
Expand Down Expand Up @@ -287,7 +286,8 @@ impl PerPixelAdjustCodegen<'_> {
wgsl_shader: crate::WGSL_SHADER,
fragment_shader_name: super::#entry_point_name,
has_uniform: #has_uniform,
}, #gpu_image, #uniform_buffer).await
input_images: #input_images,
}, &[#(#gpu_images),*], #uniform_buffer).await
}
};

Expand Down
2 changes: 1 addition & 1 deletion node-graph/nodes/raster/src/blending_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ pub fn apply_blend_mode(foreground: Color, background: Color, blend_mode: BlendM
}
}

#[node_macro::node(category("Raster"), cfg(feature = "std"))]
#[node_macro::node(category("Raster"), shader_node(PerPixelAdjust))]
fn mix<T: Blend<Color> + Send>(
_: impl Ctx,
#[implementations(
Expand Down
Loading