/* motionmetric.c
Copyright (c) 2003-2025 HandBrake Team
This file is part of the HandBrake source code
Homepage: .
It may be used under the terms of the GNU General Public License v2.
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
*/
#include "handbrake/handbrake.h"
#if defined (__aarch64__) && !defined(__APPLE__)
#include
#endif
struct hb_motion_metric_private_s
{
unsigned *gamma_lut;
int depth;
int bps;
int max_value;
uint8_t *approx_buf_a;
uint8_t *approx_buf_b;
float (*motion_metric)(hb_motion_metric_private_t *pv,
int width, int height,
int stride_a, int stride_b,
const uint8_t *buf_a, const uint8_t *buf_b);
};
// Create gamma lookup table.
// Note that we are creating a scaled integer lookup table that will
// not cause overflows in sse_block16() below. This results in
// small values being truncated to 0 which is ok for this usage.
static void build_gamma_lut(hb_motion_metric_private_t *pv)
{
for (int i = 0; i <= pv->max_value; i++)
{
pv->gamma_lut[i] = 4095 * pow(((float)i / (float)(pv->max_value -1)), 2.2f);;
}
}
#define APPROX(a, b, c, d) (((((uint32_t)a + b + 1) >> 1) + (((uint32_t)c + d + 1) >> 1) + 1) >> 1)
#define APPROX_FRAME_DATA(nbits) \
static void approximate_frame_data##_##nbits(const uint##nbits##_t *source, uint##nbits##_t *dest, \
int source_stride, int dest_stride, int width, int height) \
{ \
int stride2 = source_stride * 2; \
int stride3 = source_stride * 3; \
int jj4; \
int top_left, top_right, bottom_left, bottom_right; \
for (int ii = 0; ii < height; ii++) \
{ \
for (int jj = 0; jj < width; jj++) \
{ \
jj4 = jj * 4; \
top_left = APPROX(source[jj4], source[jj4 + source_stride], \
source[jj4 + 1], source[jj4 + source_stride + 1]); \
top_right = APPROX(source[jj4 + 2], source[jj4 + source_stride + 2], \
source[jj4 + 3], source[jj4 + source_stride + 3]); \
bottom_left = APPROX(source[jj4 + stride2], source[jj4 + stride3], \
source[jj4 + stride2 + 1], source[jj4 + stride3 + 1]); \
bottom_right = APPROX(source[jj4 + stride2 + 2], source[jj4 + stride3 + 2], \
source[jj4 + stride2 + 3], source[jj4 + stride3 + 3]); \
dest[jj] = APPROX(top_left, top_right, bottom_left, bottom_right); \
} \
source += source_stride * 4; \
dest += dest_stride; \
} \
} \
APPROX_FRAME_DATA(8)
APPROX_FRAME_DATA(16)
// Compute the sum of squared errors for a 16x16 block
// Gamma adjusts pixel values so that less visible differences
// count less.
#if defined (__aarch64__) && !defined(__APPLE__)
#define DEF_MOTION_METRIC(nbits) \
static float motion_metric##_##nbits(hb_motion_metric_private_t *pv, \
int width, int height, \
int stride_a, int stride_b, \
const uint8_t *a, const uint8_t *b) \
{ \
int bw, bh; \
uint##nbits##_t *buf_a, *buf_b; \
\
buf_a = (uint##nbits##_t *)a; \
buf_b = (uint##nbits##_t *)b; \
bw = width / 16; \
bh = height / 16; \
\
uint64_t sum = 0; \
for (int y = 0; y < bh; y++) \
{ \
for (int x = 0; x < bw; x++) \
{ \
const uint##nbits##_t *ra = buf_a + y * 16 * stride_a + x * 16; \
const uint##nbits##_t *rb = buf_b + y * 16 * stride_b + x * 16; \
for (int yy = 0; yy < 16; yy++) \
{ \
uint32_t arrga[16]; \
uint32_t arrgb[16]; \
for (int xx = 0; xx < 16; xx++) \
{ \
arrga[xx] = pv->gamma_lut[ra[xx]]; \
arrgb[xx] = pv->gamma_lut[rb[xx]]; \
} \
uint32x4_t vga0 = vld1q_u32(arrga); \
uint32x4_t vga1 = vld1q_u32(arrga + 4); \
uint32x4_t vga2 = vld1q_u32(arrga + 8); \
uint32x4_t vga3 = vld1q_u32(arrga + 12); \
uint32x4_t vgb0 = vld1q_u32(arrgb); \
uint32x4_t vgb1 = vld1q_u32(arrgb + 4); \
uint32x4_t vgb2 = vld1q_u32(arrgb + 8); \
uint32x4_t vgb3 = vld1q_u32(arrgb + 12); \
uint32x4_t vdf0 = vsubq_u32(vga0, vgb0); \
uint32x4_t vdf1 = vsubq_u32(vga1, vgb1); \
uint32x4_t vdf2 = vsubq_u32(vga2, vgb2); \
uint32x4_t vdf3 = vsubq_u32(vga3, vgb3); \
uint32x4_t vsq0 = vmulq_u32(vdf0, vdf0); \
uint32x4_t vsq1 = vmulq_u32(vdf1, vdf1); \
uint32x4_t vsq2 = vmulq_u32(vdf2, vdf2); \
uint32x4_t vsq3 = vmulq_u32(vdf3, vdf3); \
sum += vaddvq_u32(vsq0); \
sum += vaddvq_u32(vsq1); \
sum += vaddvq_u32(vsq2); \
sum += vaddvq_u32(vsq3); \
ra += stride_a; \
rb += stride_b; \
} \
} \
} \
return (float)sum / (width * height); \
} \
#else
#define DEF_SSE_BLOCK16(nbits) \
static inline unsigned sse_block16##_##nbits(unsigned *gamma_lut, \
const uint##nbits##_t *a, const uint##nbits##_t *b, \
int stride_a, int stride_b) \
{ \
unsigned sum = 0; \
for (int y = 0; y < 16; y++) \
{ \
for (int x = 0; x < 16; x++) \
{ \
int diff = gamma_lut[a[x]] - gamma_lut[b[x]]; \
sum += diff * diff; \
} \
a += stride_a; \
b += stride_b; \
} \
return sum; \
} \
DEF_SSE_BLOCK16(8)
DEF_SSE_BLOCK16(16)
// Sum of squared errors. Computes and sums the SSEs for all
// 16x16 blocks in the images. Only checks the Y component.
#define DEF_MOTION_METRIC(nbits) \
static float motion_metric##_##nbits(hb_motion_metric_private_t *pv, \
int width, int height, \
int stride_a, int stride_b, \
const uint8_t *a, const uint8_t *b) \
{ \
\
int bw, bh; \
uint##nbits##_t *buf_a, *buf_b; \
\
buf_a = (uint##nbits##_t *)a; \
buf_b = (uint##nbits##_t *)b; \
bw = width / 16; \
bh = height / 16; \
stride_a /= pv->bps; \
stride_b /= pv->bps; \
\
uint64_t sum = 0; \
for (int y = 0; y < bh; y++) \
{ \
for (int x = 0; x < bw; x++) \
{ \
sum += sse_block16##_##nbits(pv->gamma_lut, \
buf_a + y * 16 * stride_a + x * 16, \
buf_b + y * 16 * stride_b + x * 16, \
stride_a, stride_b); \
} \
} \
return (float)sum / (width * height); \
} \
#endif
DEF_MOTION_METRIC(8)
DEF_MOTION_METRIC(16)
#define DEF_MOTION_METRIC_FAST(nbits) \
static float motion_metric_fast##_##nbits(hb_motion_metric_private_t *pv, \
int width, int height, \
int stride_a, int stride_b, \
const uint8_t *a, const uint8_t *b) \
{ \
uint##nbits##_t *buf_a, *buf_b; \
int stride_buf_a, stride_buf_b; \
width /= 4; \
height /= 4; \
stride_buf_a = width; \
stride_buf_b = width; \
buf_a = (uint##nbits##_t *)pv->approx_buf_a; \
buf_b = (uint##nbits##_t *)pv->approx_buf_b; \
\
approximate_frame_data##_##nbits((const uint##nbits##_t *)a, buf_a, \
stride_a / pv->bps, stride_buf_a, width, height); \
approximate_frame_data##_##nbits((const uint##nbits##_t *)b, buf_b, \
stride_b / pv->bps, stride_buf_b, width, height); \
\
return motion_metric##_##nbits(pv, width, height, \
stride_buf_a, stride_buf_b, \
(const uint8_t *)buf_a, (const uint8_t *)buf_b); \
} \
DEF_MOTION_METRIC_FAST(8)
DEF_MOTION_METRIC_FAST(16)
static int hb_motion_metric_init(hb_motion_metric_object_t *metric,
hb_filter_init_t *init)
{
metric->private_data = calloc(sizeof(struct hb_motion_metric_private_s), 1);
if (metric->private_data == NULL)
{
hb_error("motion_metric: calloc failed");
return -1;
}
hb_motion_metric_private_t *pv = metric->private_data;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(init->pix_fmt);
pv->depth = desc->comp[0].depth;
pv->bps = pv->depth > 8 ? 2 : 1;
pv->max_value = (1 << pv->depth) - 1;
pv->gamma_lut = malloc(sizeof(unsigned) * (pv->max_value + 1));
if (pv->gamma_lut == NULL)
{
hb_error("motion_metric: malloc failed");
return -1;
}
build_gamma_lut(pv);
int fast = 0;
if (init->geometry.width >= 1920 || init->geometry.height >= 1080)
{
fast = 1;
int approx_height = init->geometry.height / 4;
int approx_width = init->geometry.width / 4;
int size = approx_height * approx_width * sizeof(uint8_t) * pv->bps;
pv->approx_buf_a = malloc(size);
pv->approx_buf_b = malloc(size);
if (pv->approx_buf_a == NULL || pv->approx_buf_b == NULL)
{
hb_error("motion_metric: malloc failed");
return -1;
}
}
switch (pv->depth)
{
case 8:
pv->motion_metric = fast ? motion_metric_fast_8 : motion_metric_8;
break;
default:
pv->motion_metric = fast ? motion_metric_fast_16 : motion_metric_16;
}
return 0;
}
static float hb_motion_metric_work(hb_motion_metric_object_t *metric,
hb_buffer_t *buf_a,
hb_buffer_t *buf_b)
{
hb_motion_metric_private_t *pv = metric->private_data;
return pv->motion_metric(metric->private_data,
buf_a->f.width, buf_a->f.height,
buf_a->plane[0].stride, buf_b->plane[0].stride,
buf_a->plane[0].data, buf_b->plane[0].data);
}
static void hb_motion_metric_close(hb_motion_metric_object_t *metric)
{
hb_motion_metric_private_t *pv = metric->private_data;
if (pv == NULL)
{
return;
}
free(pv->gamma_lut);
free(pv->approx_buf_a);
free(pv->approx_buf_b);
free(pv);
}
hb_motion_metric_object_t hb_motion_metric =
{
.name = "Motion metric",
.init = hb_motion_metric_init,
.work = hb_motion_metric_work,
.close = hb_motion_metric_close,
};