diff --git a/configure b/configure index 18fe7271aa..57316db04d 100755 --- a/configure +++ b/configure @@ -4154,7 +4154,7 @@ ass_filter_deps="libass" avgblur_opencl_filter_deps="opencl" avgblur_vulkan_filter_deps="vulkan spirv_compiler" azmq_filter_deps="libzmq" -blackdetect_vulkan_filter_deps="vulkan spirv_library" +blackdetect_vulkan_filter_deps="vulkan spirv_compiler" blackframe_filter_deps="gpl" blend_vulkan_filter_deps="vulkan spirv_compiler" boxblur_filter_deps="gpl" diff --git a/libavfilter/vf_blackdetect_vulkan.c b/libavfilter/vf_blackdetect_vulkan.c index 279b057148..bd3a92c858 100644 --- a/libavfilter/vf_blackdetect_vulkan.c +++ b/libavfilter/vf_blackdetect_vulkan.c @@ -19,7 +19,6 @@ */ #include -#include "libavutil/vulkan_spirv.h" #include "libavutil/opt.h" #include "libavutil/timestamp.h" #include "vulkan_filter.h" @@ -27,6 +26,9 @@ #include "filters.h" #include "video.h" +extern const unsigned char ff_blackdetect_comp_spv_data[]; +extern const unsigned int ff_blackdetect_comp_spv_len; + typedef struct BlackDetectVulkanContext { FFVulkanContext vkctx; @@ -56,14 +58,8 @@ typedef struct BlackDetectBuf { static av_cold int init_filter(AVFilterContext *ctx) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; BlackDetectVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; const int plane = s->alpha ? 3 : 0; const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); @@ -72,12 +68,6 @@ static av_cold int init_filter(AVFilterContext *ctx) return AVERROR(ENOTSUP); } - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); if (!s->qf) { av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); @@ -86,73 +76,41 @@ static av_cold int init_filter(AVFilterContext *ctx) } RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1, - 32, 32, 1, - 0)); - shd = &s->shd; - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, float threshold; ); - GLSLC(0, }; ); + SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t)) + SPEC_LIST_ADD(sl, 0, 32, plane); + SPEC_LIST_ADD(sl, 1, 32, SLICES); - ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData), + ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (int []) { 32, 32, 1 }, 0); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BlackDetectPushData), VK_SHADER_STAGE_COMPUTE_BIT); - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", + const FFVulkanDescriptorSetBinding desc[] = { + { /* input_img */ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.input_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, { - .name = "sum_buffer", + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + }, + { /* sum_buffer */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "uint slice_sum[];", } }; + ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0); - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLC(0, shared uint wg_sum; ); - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, wg_sum = 0u; ); - GLSLC(1, barrier(); ); - GLSLC(0, ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane); - GLSLC(2, return; ); - GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane); - GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); ); - GLSLC(1, if (subgroupElect()) ); - GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); ); - GLSLC(1, barrier(); ); - GLSLC(1, if (gl_LocalInvocationIndex == 0u) ); - GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(vkctx, &s->shd, + ff_blackdetect_comp_spv_data, + ff_blackdetect_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); s->black_start = AV_NOPTS_VALUE; s->initialized = 1; -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); +fail: return err; } diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile index cd303e535e..2cfe9cfa93 100644 --- a/libavfilter/vulkan/Makefile +++ b/libavfilter/vulkan/Makefile @@ -2,6 +2,7 @@ clean:: $(RM) $(CLEANSUFFIXES:%=libavfilter/vulkan/%) OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o +OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vulkan/blackdetect.comp.spv.o OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vulkan/blend.comp.spv.o OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vulkan/chromaber.comp.spv.o diff --git a/libavfilter/vulkan/blackdetect.comp.glsl b/libavfilter/vulkan/blackdetect.comp.glsl new file mode 100644 index 0000000000..d0a2a078e0 --- /dev/null +++ b/libavfilter/vulkan/blackdetect.comp.glsl @@ -0,0 +1,64 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) + +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_EXT_null_initializer : require + +layout (constant_id = 0) const uint plane = 0; +layout (constant_id = 1) const uint slices = 0; + +layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in; + +layout (set = 0, binding = 0) uniform readonly image2D input_img[]; +layout (set = 0, binding = 1, scalar) buffer sum_buffer { + uint slice_sum[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + float threshold; +}; + +shared uint wg_sum = { }; + +void main() +{ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + + /* oob invocs still must reach the barrier, but mustn't + * get counted in, threshold is positive, so the fake value of 0.0 would + * otherwise be counted as black */ + bool in_bounds = all(lessThan(pos, imageSize(input_img[plane]))); + float value = 0.0f; + if (in_bounds) + value = imageLoad(input_img[plane], pos).x; + + uvec4 isblack = subgroupBallot(in_bounds && value <= threshold); + if (subgroupElect()) + atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); + + barrier(); + if (gl_LocalInvocationIndex == 0) + atomicAdd(slice_sum[gl_WorkGroupID.x % slices], wg_sum); +}