mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-06-30 19:58:17 +00:00
vf_blackdetect_vulkan: port to compile-time SPIR-V generation
This commit is contained in:
@@ -19,7 +19,6 @@
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
#include "libavutil/vulkan_spirv.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/timestamp.h"
|
||||
#include "vulkan_filter.h"
|
||||
@@ -27,6 +26,9 @@
|
||||
#include "filters.h"
|
||||
#include "video.h"
|
||||
|
||||
extern const unsigned char ff_blackdetect_comp_spv_data[];
|
||||
extern const unsigned int ff_blackdetect_comp_spv_len;
|
||||
|
||||
typedef struct BlackDetectVulkanContext {
|
||||
FFVulkanContext vkctx;
|
||||
|
||||
@@ -56,14 +58,8 @@ typedef struct BlackDetectBuf {
|
||||
static av_cold int init_filter(AVFilterContext *ctx)
|
||||
{
|
||||
int err;
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
BlackDetectVulkanContext *s = ctx->priv;
|
||||
FFVulkanContext *vkctx = &s->vkctx;
|
||||
FFVulkanShader *shd;
|
||||
FFVkSPIRVCompiler *spv;
|
||||
FFVulkanDescriptorSetBinding *desc;
|
||||
const int plane = s->alpha ? 3 : 0;
|
||||
|
||||
const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format);
|
||||
@@ -72,12 +68,6 @@ static av_cold int init_filter(AVFilterContext *ctx)
|
||||
return AVERROR(ENOTSUP);
|
||||
}
|
||||
|
||||
spv = ff_vk_spirv_init();
|
||||
if (!spv) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
|
||||
if (!s->qf) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
|
||||
@@ -86,73 +76,41 @@ static av_cold int init_filter(AVFilterContext *ctx)
|
||||
}
|
||||
|
||||
RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
|
||||
RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1,
|
||||
32, 32, 1,
|
||||
0));
|
||||
shd = &s->shd;
|
||||
|
||||
GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
|
||||
GLSLC(1, float threshold; );
|
||||
GLSLC(0, }; );
|
||||
SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t))
|
||||
SPEC_LIST_ADD(sl, 0, 32, plane);
|
||||
SPEC_LIST_ADD(sl, 1, 32, SLICES);
|
||||
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData),
|
||||
ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
|
||||
(int []) { 32, 32, 1 }, 0);
|
||||
|
||||
ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BlackDetectPushData),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
desc = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "input_img",
|
||||
const FFVulkanDescriptorSetBinding desc[] = {
|
||||
{ /* input_img */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
|
||||
.mem_quali = "readonly",
|
||||
.dimensions = 2,
|
||||
.elems = av_pix_fmt_count_planes(s->vkctx.input_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
}, {
|
||||
.name = "sum_buffer",
|
||||
.elems = av_pix_fmt_count_planes(s->vkctx.input_format),
|
||||
},
|
||||
{ /* sum_buffer */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.buf_content = "uint slice_sum[];",
|
||||
}
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0);
|
||||
|
||||
RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
|
||||
|
||||
GLSLC(0, shared uint wg_sum; );
|
||||
GLSLC(0, );
|
||||
GLSLC(0, void main() );
|
||||
GLSLC(0, { );
|
||||
GLSLC(1, wg_sum = 0u; );
|
||||
GLSLC(1, barrier(); );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
|
||||
GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane);
|
||||
GLSLC(2, return; );
|
||||
GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane);
|
||||
GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); );
|
||||
GLSLC(1, if (subgroupElect()) );
|
||||
GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); );
|
||||
GLSLC(1, barrier(); );
|
||||
GLSLC(1, if (gl_LocalInvocationIndex == 0u) );
|
||||
GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES);
|
||||
GLSLC(0, } );
|
||||
|
||||
RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main"));
|
||||
RET(ff_vk_shader_link(vkctx, &s->shd,
|
||||
ff_blackdetect_comp_spv_data,
|
||||
ff_blackdetect_comp_spv_len, "main"));
|
||||
|
||||
RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd));
|
||||
|
||||
s->black_start = AV_NOPTS_VALUE;
|
||||
s->initialized = 1;
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
if (spv)
|
||||
spv->uninit(&spv);
|
||||
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ clean::
|
||||
$(RM) $(CLEANSUFFIXES:%=libavfilter/vulkan/%)
|
||||
|
||||
OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o
|
||||
OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vulkan/blackdetect.comp.spv.o
|
||||
OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vulkan/blend.comp.spv.o
|
||||
OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o
|
||||
OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vulkan/chromaber.comp.spv.o
|
||||
|
||||
64
libavfilter/vulkan/blackdetect.comp.glsl
Normal file
64
libavfilter/vulkan/blackdetect.comp.glsl
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright 2025 (c) Niklas Haas
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_nonuniform_qualifier : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_EXT_null_initializer : require
|
||||
|
||||
layout (constant_id = 0) const uint plane = 0;
|
||||
layout (constant_id = 1) const uint slices = 0;
|
||||
|
||||
layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in;
|
||||
|
||||
layout (set = 0, binding = 0) uniform readonly image2D input_img[];
|
||||
layout (set = 0, binding = 1, scalar) buffer sum_buffer {
|
||||
uint slice_sum[];
|
||||
};
|
||||
|
||||
layout (push_constant, scalar) uniform pushConstants {
|
||||
float threshold;
|
||||
};
|
||||
|
||||
shared uint wg_sum = { };
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
/* oob invocs still must reach the barrier, but mustn't
|
||||
* get counted in, threshold is positive, so the fake value of 0.0 would
|
||||
* otherwise be counted as black */
|
||||
bool in_bounds = all(lessThan(pos, imageSize(input_img[plane])));
|
||||
float value = 0.0f;
|
||||
if (in_bounds)
|
||||
value = imageLoad(input_img[plane], pos).x;
|
||||
|
||||
uvec4 isblack = subgroupBallot(in_bounds && value <= threshold);
|
||||
if (subgroupElect())
|
||||
atomicAdd(wg_sum, subgroupBallotBitCount(isblack));
|
||||
|
||||
barrier();
|
||||
if (gl_LocalInvocationIndex == 0)
|
||||
atomicAdd(slice_sum[gl_WorkGroupID.x % slices], wg_sum);
|
||||
}
|
||||
Reference in New Issue
Block a user