vf_blackdetect_vulkan: port to compile-time SPIR-V generation

This commit is contained in:
Lynne
2026-05-21 21:29:48 +09:00
parent 2e25da3121
commit a189413832
4 changed files with 86 additions and 63 deletions

View File

@@ -19,7 +19,6 @@
*/
#include <float.h>
#include "libavutil/vulkan_spirv.h"
#include "libavutil/opt.h"
#include "libavutil/timestamp.h"
#include "vulkan_filter.h"
@@ -27,6 +26,9 @@
#include "filters.h"
#include "video.h"
extern const unsigned char ff_blackdetect_comp_spv_data[];
extern const unsigned int ff_blackdetect_comp_spv_len;
typedef struct BlackDetectVulkanContext {
FFVulkanContext vkctx;
@@ -56,14 +58,8 @@ typedef struct BlackDetectBuf {
static av_cold int init_filter(AVFilterContext *ctx)
{
int err;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
BlackDetectVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
FFVulkanShader *shd;
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc;
const int plane = s->alpha ? 3 : 0;
const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format);
@@ -72,12 +68,6 @@ static av_cold int init_filter(AVFilterContext *ctx)
return AVERROR(ENOTSUP);
}
spv = ff_vk_spirv_init();
if (!spv) {
av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
if (!s->qf) {
av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
@@ -86,73 +76,41 @@ static av_cold int init_filter(AVFilterContext *ctx)
}
RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1,
32, 32, 1,
0));
shd = &s->shd;
GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
GLSLC(1, float threshold; );
GLSLC(0, }; );
SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t))
SPEC_LIST_ADD(sl, 0, 32, plane);
SPEC_LIST_ADD(sl, 1, 32, SLICES);
ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData),
ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
(int []) { 32, 32, 1 }, 0);
ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BlackDetectPushData),
VK_SHADER_STAGE_COMPUTE_BIT);
desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_img",
const FFVulkanDescriptorSetBinding desc[] = {
{ /* input_img */
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
.mem_quali = "readonly",
.dimensions = 2,
.elems = av_pix_fmt_count_planes(s->vkctx.input_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
}, {
.name = "sum_buffer",
.elems = av_pix_fmt_count_planes(s->vkctx.input_format),
},
{ /* sum_buffer */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "uint slice_sum[];",
}
};
ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0);
RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
GLSLC(0, shared uint wg_sum; );
GLSLC(0, );
GLSLC(0, void main() );
GLSLC(0, { );
GLSLC(1, wg_sum = 0u; );
GLSLC(1, barrier(); );
GLSLC(0, );
GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane);
GLSLC(2, return; );
GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane);
GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); );
GLSLC(1, if (subgroupElect()) );
GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); );
GLSLC(1, barrier(); );
GLSLC(1, if (gl_LocalInvocationIndex == 0u) );
GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES);
GLSLC(0, } );
RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_link(vkctx, &s->shd,
ff_blackdetect_comp_spv_data,
ff_blackdetect_comp_spv_len, "main"));
RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd));
s->black_start = AV_NOPTS_VALUE;
s->initialized = 1;
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
if (spv)
spv->uninit(&spv);
fail:
return err;
}

View File

@@ -2,6 +2,7 @@ clean::
$(RM) $(CLEANSUFFIXES:%=libavfilter/vulkan/%)
OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o
OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vulkan/blackdetect.comp.spv.o
OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vulkan/blend.comp.spv.o
OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o
OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vulkan/chromaber.comp.spv.o

View File

@@ -0,0 +1,64 @@
/*
* Copyright 2025 (c) Niklas Haas
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_EXT_shader_image_load_formatted : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_nonuniform_qualifier : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_EXT_null_initializer : require
layout (constant_id = 0) const uint plane = 0;
layout (constant_id = 1) const uint slices = 0;
layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in;
layout (set = 0, binding = 0) uniform readonly image2D input_img[];
layout (set = 0, binding = 1, scalar) buffer sum_buffer {
uint slice_sum[];
};
layout (push_constant, scalar) uniform pushConstants {
float threshold;
};
shared uint wg_sum = { };
void main()
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
/* oob invocs still must reach the barrier, but mustn't
* get counted in, threshold is positive, so the fake value of 0.0 would
* otherwise be counted as black */
bool in_bounds = all(lessThan(pos, imageSize(input_img[plane])));
float value = 0.0f;
if (in_bounds)
value = imageLoad(input_img[plane], pos).x;
uvec4 isblack = subgroupBallot(in_bounds && value <= threshold);
if (subgroupElect())
atomicAdd(wg_sum, subgroupBallotBitCount(isblack));
barrier();
if (gl_LocalInvocationIndex == 0)
atomicAdd(slice_sum[gl_WorkGroupID.x % slices], wg_sum);
}