Files
FFmpeg/libswscale/uops_backend.c
Niklas Haas 6217350269 swscale/uops_backend: add SWS_UOP_READ_PALETTE reference implementation
This does not actually generate any code yet as the macro is still empty,
but that will change once I add support for generated palette reads to
the format handling code. This logic merely needs to be in place first
to avoid introducing broken intermediate states where palette uops are
generated but not implemented by the reference backend.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
2026-06-20 14:08:49 +00:00

199 lines
6.5 KiB
C

/**
* Copyright (C) 2026 Niklas Haas
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/avassert.h"
#include "uops_tmpl.h"
/**
* We want to disable FP contraction because this is a reference backend that
* establishes a bit-exact reference result.
*/
#ifdef __clang__
#pragma STDC FP_CONTRACT OFF
#elif AV_GCC_VERSION_AT_LEAST(4, 8)
#pragma GCC optimize ("fp-contract=off")
#elif defined(_MSC_VER)
#pragma fp_contract (off)
#endif
#if AV_GCC_VERSION_AT_LEAST(4, 4)
#pragma GCC optimize ("finite-math-only")
#endif
/* Integer types */
#define IS_FLOAT 0
# define BIT_DEPTH 8
# include "uops_tmpl.c"
# undef BIT_DEPTH
# define BIT_DEPTH 16
# include "uops_tmpl.c"
# undef BIT_DEPTH
# define BIT_DEPTH 32
# include "uops_tmpl.c"
# undef BIT_DEPTH
#undef IS_FLOAT
/* Floating point types */
#define IS_FLOAT 1
# define BIT_DEPTH 32
# include "uops_tmpl.c"
# undef BIT_DEPTH
#undef IS_FLOAT
/* Expanded as new uop types are implemented in the C/template backend */
#define REF_ALL_UOPS(TYPE) \
SWS_FOR(TYPE, READ_PLANAR, REF_ENTRY) \
SWS_FOR(TYPE, READ_PLANAR_FV, REF_ENTRY) \
SWS_FOR(TYPE, READ_PLANAR_FH, REF_ENTRY) \
SWS_FOR(TYPE, READ_PACKED, REF_ENTRY) \
SWS_FOR(TYPE, READ_NIBBLE, REF_ENTRY) \
SWS_FOR(TYPE, READ_BIT, REF_ENTRY) \
SWS_FOR(TYPE, READ_PALETTE, REF_ENTRY) \
SWS_FOR(TYPE, PERMUTE, REF_ENTRY) \
SWS_FOR(TYPE, COPY, REF_ENTRY) \
SWS_FOR(TYPE, WRITE_PLANAR, REF_ENTRY) \
SWS_FOR(TYPE, WRITE_PACKED, REF_ENTRY) \
SWS_FOR(TYPE, WRITE_NIBBLE, REF_ENTRY) \
SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY) \
SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY) \
SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY) \
SWS_FOR(TYPE, EXPAND_PAIR, REF_ENTRY) \
SWS_FOR(TYPE, EXPAND_QUAD, REF_ENTRY) \
SWS_FOR(TYPE, TO_U8, REF_ENTRY) \
SWS_FOR(TYPE, TO_U16, REF_ENTRY) \
SWS_FOR(TYPE, TO_U32, REF_ENTRY) \
SWS_FOR(TYPE, TO_F32, REF_ENTRY) \
SWS_FOR(TYPE, SCALE, REF_ENTRY) \
SWS_FOR(TYPE, ADD, REF_ENTRY) \
SWS_FOR(TYPE, MIN, REF_ENTRY) \
SWS_FOR(TYPE, MAX, REF_ENTRY) \
SWS_FOR(TYPE, UNPACK, REF_ENTRY) \
SWS_FOR(TYPE, PACK, REF_ENTRY) \
SWS_FOR(TYPE, LSHIFT, REF_ENTRY) \
SWS_FOR(TYPE, RSHIFT, REF_ENTRY) \
SWS_FOR(TYPE, CLEAR, REF_ENTRY) \
SWS_FOR(TYPE, LINEAR, REF_ENTRY) \
SWS_FOR(TYPE, DITHER, REF_ENTRY) \
/* end of macro */
static const SwsUOpTable uop_table = {
.block_size = SWS_BLOCK_SIZE,
.entries = {
REF_ALL_UOPS(U8)
REF_ALL_UOPS(U16)
REF_ALL_UOPS(U32)
REF_ALL_UOPS(F32)
NULL
},
};
static void process(const SwsOpExec *exec, const void *priv,
const int bx_start, const int y_start,
int bx_end, int y_end)
{
const SwsOpChain *chain = priv;
const SwsOpImpl *impl = chain->impl;
block_t x, y, z, w; /* allocate enough space for any intermediate */
SwsOpIter iterdata;
SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
iter->exec = exec;
for (int i = 0; i < 4; i++) {
iter->in[i] = (uintptr_t) exec->in[i];
iter->out[i] = (uintptr_t) exec->out[i];
}
for (iter->y = y_start; iter->y < y_end; iter->y++) {
for (int block = bx_start; block < bx_end; block++) {
iter->x = block * SWS_BLOCK_SIZE;
CONTINUE(&x, &y, &z, &w);
}
const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
for (int i = 0; i < 4; i++) {
iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
iter->out[i] += exec->out_bump[i];
}
}
}
static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
{
int ret;
SwsOpChain *chain = ff_sws_op_chain_alloc();
if (!chain)
return AVERROR(ENOMEM);
SwsUOpList *uops = ff_sws_uop_list_alloc();
if (!uops) {
ret = AVERROR(ENOMEM);
goto fail;
}
ret = ff_sws_ops_translate(ctx, ops, 0, uops);
if (ret < 0)
goto fail;
av_assert0(uops->num_ops > 0);
for (int i = 0; i < uops->num_ops; i++) {
const SwsUOpTable *table = &uop_table;
ret = ff_sws_uop_lookup(ctx, &table, 1, &uops->ops[i],
SWS_BLOCK_SIZE, chain);
if (ret < 0)
goto fail;
}
*out = (SwsCompiledOp) {
.slice_align = 1,
.block_size = SWS_BLOCK_SIZE,
.cpu_flags = chain->cpu_flags,
.priv = chain,
.free = ff_sws_op_chain_free_cb,
.func = process,
};
memcpy(out->over_read, chain->over_read, sizeof(out->over_read));
memcpy(out->over_write, chain->over_write, sizeof(out->over_write));
av_log(ctx, AV_LOG_DEBUG, "Compiled micro-ops:\n");
for (int i = 0; i < uops->num_ops; i++) {
char name[SWS_UOP_NAME_MAX];
ff_sws_uop_name(&uops->ops[i], name);
av_log(ctx, AV_LOG_DEBUG, " %s\n", name);
}
ff_sws_uop_list_free(&uops);
return 0;
fail:
ff_sws_uop_list_free(&uops);
ff_sws_op_chain_free(chain);
return ret;
}
const SwsOpBackend backend_c = {
.name = "c",
.flags = SWS_BACKEND_C,
.compile = compile,
.hw_format = AV_PIX_FMT_NONE,
};