Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/vulkan/op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,8 +1263,9 @@ def register_arange():
@update_features(exir_ops.edge.aten.constant_pad_nd.default)
def register_constant_pad_nd():
return OpFeatures(
inputs_storage=utils.CHANNELS_PACKED_TEXTURE,
inputs_storage=utils.ANY_STORAGE,
inputs_dtypes=utils.FP_INT_BOOL_T,
supports_resize=True,
)


Expand Down
54 changes: 54 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/pad_buffer.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

${define_required_extensions("buffer", DTYPE)}

#define PRECISION ${PRECISION}

#define T ${buffer_scalar_type(DTYPE)}

layout(std430) buffer;

#include "indexing.glslh"

${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}

${layout_declare_ubo(B, "BufferMetadata", "out_meta")}
${layout_declare_ubo(B, "BufferMetadata", "in_meta")}
${layout_declare_ubo(B, "ivec4", "pad_per_dim")}
${layout_declare_ubo(B, "float", "fill_value")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const uint out_bufi = gl_GlobalInvocationID.x;
if (out_bufi >= numel(out_meta)) {
return;
}

TensorIndex out_tidx = linear_idx_to_tensor_idx(out_meta, out_bufi);

// Subtract pad offsets per dimension to get input tensor index.
// Unsigned underflow (when output index < pad offset) wraps to a large
// value that fails the out_of_bounds check below.
TensorIndex in_tidx = out_tidx;
[[unroll]] for (int d = 0; d < 4; d++) {
in_tidx.data[0][d] -= uint(pad_per_dim[d]);
}

if (out_of_bounds(in_tidx, in_meta)) {
t_out[out_bufi] = T(fill_value);
return;
}

const uint in_bufi = tensor_idx_to_linear_idx(in_meta, in_tidx);
t_out[out_bufi] = t_in[in_bufi];
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
pad_channel:
pad_buffer:
parameter_names_with_default_values:
NDIM: 3
DTYPE: float
PACKING: C_packed
STORAGE: texture3d
generate_variant_forall:
DTYPE:
- VALUE: float
- VALUE: half
- VALUE: int32
- VALUE: uint8
shader_variants:
- NAME: pad_channel
- NAME: pad_buffer
80 changes: 0 additions & 80 deletions backends/vulkan/runtime/graph/ops/glsl/pad_channel.glsl

This file was deleted.

50 changes: 0 additions & 50 deletions backends/vulkan/runtime/graph/ops/glsl/pad_height_width.glsl

This file was deleted.

81 changes: 81 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/pad_texture.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

${define_required_extensions("texture3d", DTYPE)}

#define PRECISION ${PRECISION}

#define VEC4_T ${texel_load_type(DTYPE, "texture3d")}
#define T ${texel_load_component_type(DTYPE, "texture3d")}

${define_active_storage_type("texture3d")}

#extension GL_EXT_control_flow_attributes : require

layout(std430) buffer;

#include "common.glslh"
#include "indexing.glslh"

${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}

${layout_declare_ubo(B, "TextureMetadata", "outp")}
${layout_declare_ubo(B, "TextureMetadata", "inp")}
${layout_declare_ubo(B, "int", "pad_left", "int", "pad_top", "int", "pad_front")}
${layout_declare_ubo(B, "float", "fill_value")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);

if (out_of_bounds(out_pos, outp)) {
return;
}

// Convert the thread position to output tensor indices in element space.
// out_tidx.data[packed_dim] is the element index of the first component in
// this texel; the remaining three dims are scalar element indices.
TensorIndex4D out_tidx = texture_pos_to_tensor4d_idx_simple(outp, out_pos);

// Tail texels may have fewer than 4 valid elements; leave extras as 0.
const int limit =
min(4, outp.sizes[outp.packed_dim] - out_tidx.data[outp.packed_dim]);

VEC4_T out_texel = VEC4_T(0);

// Process each of the (up to 4) elements in this output texel independently.
// For each element: subtract pad offsets to obtain the input element index,
// then copy from the input if in-bounds or write fill_value if in the padding
// region.
[[unroll]] for (int comp = 0; comp < limit; comp++) {
TensorIndex4D in_tidx = out_tidx;
in_tidx.data[outp.packed_dim] += comp;
in_tidx.data[0] -= pad_left;
in_tidx.data[1] -= pad_top;
in_tidx.data[2] -= pad_front;

// Signed underflow (output index < pad) produces a negative value that
// fails the >= 0 check, correctly identifying the padding region.
if (in_tidx.data[0] >= 0 && in_tidx.data[0] < inp.sizes[0] &&
in_tidx.data[1] >= 0 && in_tidx.data[1] < inp.sizes[1] &&
in_tidx.data[2] >= 0 && in_tidx.data[2] < inp.sizes[2]) {
TextureElementIndex elem =
tensor4d_idx_to_texture_element_idx_simple(inp, in_tidx);
VEC4_T in_texel = texelFetch(t_in, elem.pos, 0);
out_texel[comp] = T(in_texel[elem.comp]);
} else {
out_texel[comp] = T(fill_value);
}
}

imageStore(t_out, out_pos, out_texel);
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
pad_height_width:
pad_texture:
parameter_names_with_default_values:
NDIM: 3
DTYPE: float
PACKING: C_packed
STORAGE: texture3d
generate_variant_forall:
DTYPE:
- VALUE: float
- VALUE: half
- VALUE: int32
- VALUE: uint8
shader_variants:
- NAME: pad_height_width
- NAME: pad_texture3d
43 changes: 25 additions & 18 deletions backends/vulkan/runtime/graph/ops/impl/Pad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,36 +59,43 @@ void add_constant_pad_nd_node(
ComputeGraph& graph,
const ValueRef& in,
const ValueRef& pad,
const ValueRef& fill_value,
const ValueRef& fill_value_ref,
const ValueRef& out) {
const float fill_value_val = graph.extract_scalar<float>(fill_value);
const float fill_value_val = graph.extract_scalar<float>(fill_value_ref);
const IntListPtr pad_vec = graph.get_int_list(pad);

std::string kernel_name = "";
const PadParam pad_param = creat_pad_param(*pad_vec);

if (pad_vec->size() <= 4) {
kernel_name = "pad_height_width";
kernel_name.reserve(kShaderNameReserve);
add_dtype_suffix(kernel_name, graph.dtype_of(out));
std::string kernel_name = "pad";
kernel_name.reserve(kShaderNameReserve);
add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
add_dtype_suffix(kernel_name, graph.dtype_of(out));

vkapi::ParamsBindList param_ubos;
if (graph.is_buffer_storage(out)) {
// BufferMetadata stores sizes/strides in WHCN order (flip_and_unsqueeze
// reverses from NCHW). Map pad offsets to match: W=0, H=1, C=2.
utils::ivec4 pad_per_dim{pad_param.left, pad_param.top, pad_param.front, 0};
param_ubos = {
graph.buffer_meta_ubo(out),
graph.buffer_meta_ubo(in),
graph.create_params_buffer(pad_per_dim),
graph.create_params_buffer(fill_value_val)};
} else {
kernel_name = "pad_channel";
kernel_name.reserve(kShaderNameReserve);
add_dtype_suffix(kernel_name, graph.dtype_of(out));
param_ubos = {
graph.meta_ubo(out),
graph.meta_ubo(in),
graph.create_params_buffer(pad_param),
graph.create_params_buffer(fill_value_val)};
}

graph.execute_nodes().emplace_back(new DynamicDispatchNode(
graph,
VK_KERNEL_FROM_STR(kernel_name),
default_pick_global_wg_size,
default_pick_local_wg_size,
// Inputs and Outputs
{{out, vkapi::kWrite}, {in, vkapi::kRead}},
// Shader params buffers
{graph.sizes_ubo(out),
graph.sizes_ubo(in),
graph.create_params_buffer(pad_param),
graph.create_params_buffer(fill_value_val)},
// Parameter buffers
param_ubos,
// Push Constants
{},
// Specialization Constants
Expand All @@ -100,7 +107,7 @@ void add_constant_pad_nd_node(
}

void constant_pad_nd(ComputeGraph& graph, const std::vector<ValueRef>& args) {
return add_constant_pad_nd_node(graph, args[0], args[1], args[2], args[3]);
add_constant_pad_nd_node(graph, args[0], args[1], args[2], args[3]);
}

REGISTER_OPERATORS {
Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/test/op_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -1868,6 +1868,14 @@ def get_constant_pad_nd_inputs():
([L, M, M1, M2], [3, 3, 3, 3, 3, 3], 12.2),
]
)
test_suite.layouts = [
"utils::kWidthPacked",
"utils::kChannelsPacked",
]
test_suite.storage_types = [
"utils::kTexture3D",
"utils::kBuffer",
]
return test_suite


Expand Down
Loading