/* comb_detect_template.c Copyright (c) 2003-2025 HandBrake Team This file is part of the HandBrake source code Homepage: . It may be used under the terms of the GNU General Public License v2. For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ #if BIT_DEPTH > 8 # define pixel uint16_t # define FUNC(name) name##_##16 #else # define pixel uint8_t # define FUNC(name) name##_##8 #endif #if defined (__aarch64__) && !defined(__APPLE__) #include #endif static inline void FUNC(draw_mask_box)(hb_filter_private_t *pv) { const int x = pv->mask_box_x; const int y = pv->mask_box_y; const int box_width = pv->block_width; const int box_height = pv->block_height; int stride; uint8_t *mskp; if (pv->mode & MODE_FILTER) { mskp = pv->mask_filtered->plane[0].data; stride = pv->mask_filtered->plane[0].stride; } else { mskp = pv->mask->plane[0].data; stride = pv->mask->plane[0].stride; } for (int block_x = 0; block_x < box_width; block_x++) { mskp[ y * stride + x + block_x] = 128; mskp[(y + box_height) * stride + x + block_x] = 128; } for (int block_y = 0; block_y < box_height; block_y++) { mskp[stride * (y + block_y) + x ] = 128; mskp[stride * (y + block_y) + x + box_width] = 128; } } static inline void FUNC(apply_mask_line)(pixel *srcp, const uint8_t *mskp, const int width, const int max, const int half) { for (int x = 0; x < width; x++) { if (mskp[x] == 1) { srcp[x] = max; } else if (mskp[x] == 128) { srcp[x] = half; } } } static void FUNC(apply_mask)(hb_filter_private_t *pv, hb_buffer_t *b) { // Draw boxes FUNC(draw_mask_box)(pv); const hb_buffer_t *m; const int max = pv->max_value; const int half = pv->half_value; if (pv->mode & MODE_FILTER) { m = pv->mask_filtered; } else { m = pv->mask; } for (int pp = 0; pp < 3; pp++) { pixel *dstp = (pixel *)b->plane[pp].data; const int dstp_stride = b->plane[pp].stride / pv->bps; const int width = m->plane[pp].width; const int height = m->plane[pp].height; if (!(pv->mode & MODE_COMPOSITE)) { if (pp == 0) { memset(dstp, 0, b->plane[pp].size); } else { if (pv->depth == 8) { memset(dstp, half, b->plane[pp].size); } else { for (int i = 0; i < b->plane[pp].size / pv->bps; i++) { dstp[i] = half; } } } } if (pp == 0) { const uint8_t *mskp = m->plane[0].data; const int mskp_stride = m->plane[0].stride; for (int yy = 0; yy < height; yy++) { FUNC(apply_mask_line)(dstp, mskp, width, max, half); dstp += dstp_stride; mskp += mskp_stride; } } } } #if defined (__aarch64__) && !defined(__APPLE__) static void FUNC(detect_gamma_combed_segment)(hb_filter_private_t *pv, int segment_start, int segment_stop) { // A mishmash of various comb detection tricks // picked up from neuron2's Decomb plugin for // AviSynth and tritical's IsCombedT and // IsCombedTIVTC plugins. // Comb scoring algorithm const float mthresh = pv->gamma_motion_threshold; const float athresh = pv->gamma_spatial_threshold; const float athresh6 = pv->gamma_spatial_threshold6; // One pass for Y const int stride_prev = pv->ref[0]->plane[0].stride / pv->bps; const int stride_cur = pv->ref[1]->plane[0].stride / pv->bps; const int stride_next = pv->ref[2]->plane[0].stride / pv->bps; const int width = pv->ref[0]->plane[0].width; const int height = pv->ref[0]->plane[0].height; const int mask_stride = pv->mask->plane[0].stride; // Comb detection has to start at y = 2 and end at // y = height - 2, because it needs to examine // 2 pixels above and 2 below the current pixel. if (segment_start < 2) { segment_start = 2; } if (segment_stop > height - 2) { segment_stop = height - 2; } // These are just to make the buffer locations easier to read. const int up_1_prev = -1 * stride_prev; const int down_1_prev = stride_prev; const int up_2 = -2 * stride_cur; const int up_1 = -1 * stride_cur; const int down_1 = stride_cur; const int down_2 = 2 * stride_cur; const int up_1_next = -1 * stride_next; const int down_1_next = stride_next; float32x4_t v_athresh = vdupq_n_f32(athresh); float32x4_t v_athresh_neg = vdupq_n_f32(-athresh); float32x4_t v_mthresh = vdupq_n_f32(mthresh); float32x4_t v_athresh6 = vdupq_n_f32(athresh6); float32x4_t v_four = vdupq_n_f32(4.0f); float32x4_t v_three = vdupq_n_f32(3.0f); uint32x4_t v_one = vdupq_n_u32(1); uint32x4_t v_exhaustive_check = vdupq_n_u32(pv->force_exaustive_check); for (int y = segment_start; y < segment_stop; y++) { // We need to examine a column of 5 pixels // in the prev, cur, and next frames. const pixel *prev = &((const pixel *)pv->ref[0]->plane[0].data)[y * stride_prev]; const pixel *cur = &((const pixel *)pv->ref[1]->plane[0].data)[y * stride_cur]; const pixel *next = &((const pixel *)pv->ref[2]->plane[0].data)[y * stride_next]; uint8_t *mask = &pv->mask->plane[0].data[y * mask_stride]; memset(mask, 0, mask_stride); uint32_t mask32[4]; for (int x = 0; x < width; x += 4) { uint32x4_t mask_vec = vdupq_n_u32(0); float32x4_t cur_vec = {pv->gamma_lut[cur[0]], pv->gamma_lut[cur[1]], pv->gamma_lut[cur[2]], pv->gamma_lut[cur[3]]}; float32x4_t cur_up1_vec = {pv->gamma_lut[cur[up_1 + 0]], pv->gamma_lut[cur[up_1 + 1]], pv->gamma_lut[cur[up_1 + 2]], pv->gamma_lut[cur[up_1 + 3]]}; float32x4_t cur_down1_vec = {pv->gamma_lut[cur[down_1 + 0]], pv->gamma_lut[cur[down_1 + 1]], pv->gamma_lut[cur[down_1 + 2]], pv->gamma_lut[cur[down_1 + 3]]}; float32x4_t up_diff1 = vsubq_f32(cur_vec, cur_up1_vec); float32x4_t down_diff1 = vsubq_f32(cur_vec, cur_down1_vec); uint32x4_t cond1 = vcgtq_f32(up_diff1, v_athresh); uint32x4_t cond2 = vcgtq_f32(down_diff1, v_athresh); uint32x4_t cond3 = vcltq_f32(up_diff1, v_athresh_neg); uint32x4_t cond4 = vcltq_f32(down_diff1, v_athresh_neg); uint32x4_t condition1 = vandq_u32(cond1, cond2); uint32x4_t condition2 = vandq_u32(cond3, cond4); uint32x4_t condition = vorrq_u32(condition1, condition2); if(vmaxvq_u32(condition) > 0) { uint32x4_t motion = vdupq_n_u32(0); uint32x4_t motion1 = vdupq_n_u32(0); if (mthresh > 0) { float32x4_t prev_vec = {pv->gamma_lut[prev[0]], pv->gamma_lut[prev[1]], pv->gamma_lut[prev[2]], pv->gamma_lut[prev[3]]}; float32x4_t next_vec = {pv->gamma_lut[next[0]], pv->gamma_lut[next[1]], pv->gamma_lut[next[2]], pv->gamma_lut[next[3]]}; float32x4_t next_up_1_vec = {pv->gamma_lut[next[up_1_next + 0]], pv->gamma_lut[next[up_1_next + 1]], pv->gamma_lut[next[up_1_next + 2]], pv->gamma_lut[next[up_1_next + 3]]}; float32x4_t next_down_1_vec = {pv->gamma_lut[next[down_1_next + 0]], pv->gamma_lut[next[down_1_next + 1]], pv->gamma_lut[next[down_1_next + 2]], pv->gamma_lut[next[down_1_next + 3]]}; float32x4_t abs_diff1 = vabsq_f32(vsubq_f32(prev_vec, cur_vec)); float32x4_t abs_diff2 = vabsq_f32(vsubq_f32(cur_up1_vec, next_up_1_vec)); float32x4_t abs_diff3 = vabsq_f32(vsubq_f32(cur_down1_vec,next_down_1_vec)); uint32x4_t motion_cond1 = vcgtq_f32(abs_diff1, v_mthresh); uint32x4_t motion_cond2 = vcgtq_f32(abs_diff2, v_mthresh); uint32x4_t motion_cond3 = vcgtq_f32(abs_diff3, v_mthresh); motion = vandq_u32(vandq_u32(motion_cond1, motion_cond2), motion_cond3); float32x4_t prev_up_1_vec = {pv->gamma_lut[prev[up_1_prev + 0]], pv->gamma_lut[prev[up_1_prev + 1]], pv->gamma_lut[prev[up_1_prev + 2]], pv->gamma_lut[prev[up_1_prev + 3]]}; float32x4_t prev_down_1_vec = {pv->gamma_lut[prev[down_1_prev + 0]], pv->gamma_lut[prev[down_1_prev + 1]], pv->gamma_lut[prev[down_1_prev + 2]], pv->gamma_lut[prev[down_1_prev + 3]]}; float32x4_t abs_diff4 = vabsq_f32(vsubq_f32(next_vec, cur_vec)); float32x4_t abs_diff5 = vabsq_f32(vsubq_f32(prev_up_1_vec, cur_up1_vec)); float32x4_t abs_diff6 = vabsq_f32(vsubq_f32(prev_down_1_vec, cur_down1_vec)); uint32x4_t motion_cond4 = vcgtq_f32(abs_diff4, v_mthresh); uint32x4_t motion_cond5 = vcgtq_f32(abs_diff5, v_mthresh); uint32x4_t motion_cond6 = vcgtq_f32(abs_diff6, v_mthresh); motion1 = vandq_u32(vandq_u32(motion_cond4, motion_cond5), motion_cond6); motion = vorrq_u32(motion, motion1); } else { motion = vdupq_n_u32(1); } uint32x4_t motion_check = vorrq_u32(motion, v_exhaustive_check); motion_check = vcgtq_u32(motion_check, mask_vec); float32x4_t cur_up2_vec = {pv->gamma_lut[cur[up_2 + 0]], pv->gamma_lut[cur[up_2 + 1]], pv->gamma_lut[cur[up_2 + 2]], pv->gamma_lut[cur[up_2 + 3]]}; float32x4_t cur_down2_vec = {pv->gamma_lut[cur[down_2 + 0]], pv->gamma_lut[cur[down_2 + 1]], pv->gamma_lut[cur[down_2 + 2]], pv->gamma_lut[cur[down_2 + 3]]}; float32x4_t combing1 = vabsq_f32(vsubq_f32(vaddq_f32(vaddq_f32(cur_up2_vec, vmulq_f32(cur_vec, v_four)),cur_down2_vec), vmulq_f32(vaddq_f32(cur_up1_vec, cur_down1_vec), v_three))); uint32x4_t combing_cond = vcgtq_f32(combing1, v_athresh6); mask_vec = vandq_u32(combing_cond, motion_check); mask_vec = vandq_u32(mask_vec, condition); mask_vec = vandq_u32(mask_vec, v_one); vst1q_u32(mask32, mask_vec); mask[0] = mask32[0]; mask[1] = mask32[1]; mask[2] = mask32[2]; mask[3] = mask32[3]; } cur+=4; prev+=4; next+=4; mask+=4; } } } #else static void FUNC(detect_gamma_combed_segment)(hb_filter_private_t *pv, int segment_start, int segment_stop) { // A mishmash of various comb detection tricks // picked up from neuron2's Decomb plugin for // AviSynth and tritical's IsCombedT and // IsCombedTIVTC plugins. // Comb scoring algorithm const float mthresh = pv->gamma_motion_threshold; const float athresh = pv->gamma_spatial_threshold; const float athresh6 = pv->gamma_spatial_threshold6; // One pass for Y const int stride_prev = pv->ref[0]->plane[0].stride / pv->bps; const int stride_cur = pv->ref[1]->plane[0].stride / pv->bps; const int stride_next = pv->ref[2]->plane[0].stride / pv->bps; const int width = pv->ref[0]->plane[0].width; const int height = pv->ref[0]->plane[0].height; const int mask_stride = pv->mask->plane[0].stride; // Comb detection has to start at y = 2 and end at // y = height - 2, because it needs to examine // 2 pixels above and 2 below the current pixel. if (segment_start < 2) { segment_start = 2; } if (segment_stop > height - 2) { segment_stop = height - 2; } // These are just to make the buffer locations easier to read. const int up_1_prev = -1 * stride_prev; const int down_1_prev = stride_prev; const int up_2 = -2 * stride_cur; const int up_1 = -1 * stride_cur; const int down_1 = stride_cur; const int down_2 = 2 * stride_cur; const int up_1_next = -1 * stride_next; const int down_1_next = stride_next; for (int y = segment_start; y < segment_stop; y++) { // We need to examine a column of 5 pixels // in the prev, cur, and next frames. const pixel *prev = &((const pixel *)pv->ref[0]->plane[0].data)[y * stride_prev]; const pixel *cur = &((const pixel *)pv->ref[1]->plane[0].data)[y * stride_cur]; const pixel *next = &((const pixel *)pv->ref[2]->plane[0].data)[y * stride_next]; uint8_t *mask = &pv->mask->plane[0].data[y * mask_stride]; memset(mask, 0, mask_stride); for (int x = 0; x < width; x++) { const float up_diff = pv->gamma_lut[cur[0]] - pv->gamma_lut[cur[up_1]]; const float down_diff = pv->gamma_lut[cur[0]] - pv->gamma_lut[cur[down_1]]; if ((up_diff > athresh && down_diff > athresh) || (up_diff < -athresh && down_diff < -athresh)) { // The pixel above and below are different, // and they change in the same "direction" too. int motion = 0; if (mthresh > 0) { // Make sure there's sufficient motion between frame t-1 to frame t+1. if (fabs(pv->gamma_lut[prev[0]] - pv->gamma_lut[cur[0]] ) > mthresh && fabs(pv->gamma_lut[cur[up_1]] - pv->gamma_lut[next[up_1_next]] ) > mthresh && fabs(pv->gamma_lut[cur[down_1]] - pv->gamma_lut[next[down_1_next]]) > mthresh) { motion++; } if (fabs(pv->gamma_lut[next[0]] - pv->gamma_lut[cur[0]] ) > mthresh && fabs(pv->gamma_lut[prev[up_1_prev]] - pv->gamma_lut[cur[up_1]] ) > mthresh && fabs(pv->gamma_lut[prev[down_1_prev]] - pv->gamma_lut[cur[down_1]]) > mthresh) { motion++; } } else { // User doesn't want to check for motion, // so move on to the spatial check. motion = 1; } if (motion || pv->force_exaustive_check) { // Tritical's noise-resistant combing scorer. // The check is done on a bob+blur convolution. float combing = fabs(pv->gamma_lut[cur[up_2]] + (4 * pv->gamma_lut[cur[0]]) + pv->gamma_lut[cur[down_2]] - (3 * (pv->gamma_lut[cur[up_1]] + pv->gamma_lut[cur[down_1]]))); // If the frame is sufficiently combed, // then mark it down on the mask as 1. if (combing > athresh6) { mask[0] = 1; } } } cur++; prev++; next++; mask++; } } } #endif #if defined (__aarch64__) && !defined(__APPLE__) #if BIT_DEPTH > 8 static void FUNC(detect_combed_segment)(hb_filter_private_t *pv, int segment_start, int segment_stop) { // A mishmash of various comb detection tricks // picked up from neuron2's Decomb plugin for // AviSynth and tritical's IsCombedT and // IsCombedTIVTC plugins. // Comb scoring algorithm const int spatial_metric = pv->spatial_metric; const int mthresh = pv->motion_threshold; const int athresh = pv->spatial_threshold; const int athresh_squared = pv->spatial_threshold_squared; const int athresh6 = pv->spatial_threshold6; // One pass for Y const int stride_prev = pv->ref[0]->plane[0].stride / pv->bps; const int stride_cur = pv->ref[1]->plane[0].stride / pv->bps; const int stride_next = pv->ref[2]->plane[0].stride / pv->bps; const int width = pv->ref[0]->plane[0].width; const int height = pv->ref[0]->plane[0].height; const int mask_stride = pv->mask->plane[0].stride; // Comb detection has to start at y = 2 and end at // y = height - 2, because it needs to examine // 2 pixels above and 2 below the current pixel. if (segment_start < 2) { segment_start = 2; } if (segment_stop > height - 2) { segment_stop = height - 2; } // These are just to make the buffer locations easier to read. const int up_1_prev = -1 * stride_prev; const int down_1_prev = stride_prev; const int up_2 = -2 * stride_cur; const int up_1 = -1 * stride_cur; const int down_1 = stride_cur; const int down_2 = 2 * stride_cur; const int up_1_next = -1 * stride_next; const int down_1_next = stride_next; int32x4_t v_athresh = vdupq_n_s32(athresh); int32x4_t v_athresh_neg = vdupq_n_s32(-athresh); int32x4_t v_mthresh = vdupq_n_s32(mthresh); int32x4_t v_athresh6 = vdupq_n_s32(athresh6); int32x4_t v_athresh_squared = vdupq_n_s32(athresh_squared); int32x4_t v_four = vdupq_n_s32(4); int32x4_t v_three = vdupq_n_s32(3); uint32x4_t v_one = vdupq_n_u32(1); int32x4_t v_c32detect_min = vdupq_n_s32(pv->comb32detect_min); int32x4_t v_c32detect_max = vdupq_n_s32(pv->comb32detect_max); uint32x4_t v_exhaustive_check = vdupq_n_u32(pv->force_exaustive_check); for (int y = segment_start; y < segment_stop; y++) { // We need to examine a column of 5 pixels // in the prev, cur, and next frames. const pixel *prev = &((const pixel *)pv->ref[0]->plane[0].data)[y * stride_prev]; const pixel *cur = &((const pixel *)pv->ref[1]->plane[0].data)[y * stride_cur]; const pixel *next = &((const pixel *)pv->ref[2]->plane[0].data)[y * stride_next]; uint8_t *mask = &pv->mask->plane[0].data[y * mask_stride]; memset(mask, 0, mask_stride); uint32_t mask32[4]; for (int x = 0; x < width; x+=4) { uint32x4_t mask_vec = vdupq_n_u32(0); int32x4_t cur_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur))); int32x4_t cur_up1_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + up_1))); int32x4_t cur_down1_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + down_1))); int32x4_t up_diff_vec = vsubq_s32(cur_vec, cur_up1_vec); int32x4_t down_diff_vec = vsubq_s32(cur_vec, cur_down1_vec); uint32x4_t cond1 = vcgtq_s32(up_diff_vec, v_athresh); uint32x4_t cond2 = vcgtq_s32(down_diff_vec, v_athresh); uint32x4_t cond3 = vcltq_s32(up_diff_vec, v_athresh_neg); uint32x4_t cond4 = vcltq_s32(down_diff_vec, v_athresh_neg); uint32x4_t condition1 = vandq_u32(cond1, cond2); uint32x4_t condition2 = vandq_u32(cond3, cond4); uint32x4_t condition = vorrq_u32(condition1, condition2); if(vmaxvq_u32(condition) > 0) { uint32x4_t motion = vdupq_n_u32(0); uint32x4_t motion1 = vdupq_n_u32(0); if (mthresh > 0) { int32x4_t prev_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(prev))); int32x4_t next_up1_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(next + up_1_next))); int32x4_t next_down1_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(next + down_1_next))); int32x4_t abs_diff1 = vabsq_s32(vsubq_s32(prev_vec, cur_vec)); int32x4_t abs_diff2 = vabsq_s32(vsubq_s32(cur_up1_vec, next_up1_vec)); int32x4_t abs_diff3 = vabsq_s32(vsubq_s32(cur_down1_vec, next_down1_vec)); uint32x4_t motion_cond1 = vcgtq_s32(abs_diff1, v_mthresh); uint32x4_t motion_cond2 = vcgtq_s32(abs_diff2, v_mthresh); uint32x4_t motion_cond3 = vcgtq_s32(abs_diff3, v_mthresh); motion = vandq_u32(vandq_u32(motion_cond1, motion_cond2), motion_cond3); int32x4_t next_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(next))); int32x4_t prev_up1_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(prev + up_1_prev))); int32x4_t prev_down1_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(prev + down_1_prev))); int32x4_t abs_diff4 = vabsq_s32(vsubq_s32(next_vec, cur_vec)); int32x4_t abs_diff5 = vabsq_s32(vsubq_s32(prev_up1_vec, cur_up1_vec)); int32x4_t abs_diff6 = vabsq_s32(vsubq_s32(prev_down1_vec, cur_down1_vec)); uint32x4_t motion_cond4 = vcgtq_s32(abs_diff4, v_mthresh); uint32x4_t motion_cond5 = vcgtq_s32(abs_diff5, v_mthresh); uint32x4_t motion_cond6 = vcgtq_s32(abs_diff6, v_mthresh); motion1 = vandq_u32(vandq_u32(motion_cond4, motion_cond5), motion_cond6); motion = vorrq_u32(motion, motion1); } else { motion = vdupq_n_u32(1); } uint32x4_t motion_check = vorrq_u32(motion, v_exhaustive_check); motion_check = vcgtq_u32(motion_check, mask_vec); int32x4_t cur_up2_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + up_2))); int32x4_t cur_down2_vec = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + down_2))); switch(spatial_metric) { case 0: { uint32x4_t cond_c32_detect_min = vcltq_s32(vabsq_s32(vsubq_s32(cur_vec, cur_down2_vec)), v_c32detect_min); uint32x4_t cond_c32_detect_max = vcgtq_s32(vabsq_s32(vsubq_s32(cur_vec, cur_down1_vec)), v_c32detect_max); uint32x4_t s_metric_0_vec = vandq_u32(cond_c32_detect_min, cond_c32_detect_max); mask_vec = vandq_u32(s_metric_0_vec, motion_check); mask_vec = vandq_u32(mask_vec, condition); mask_vec = vandq_u32(mask_vec, v_one); vst1q_u32(mask32, mask_vec); mask[0] = mask32[0]; mask[1] = mask32[1]; mask[2] = mask32[2]; mask[3] = mask32[3]; break; } case 1: { int32x4_t s_metric_1_diff1 = vsubq_s32(cur_up1_vec, cur_vec); int32x4_t s_metric_1_diff2 = vsubq_s32(cur_down1_vec, cur_vec); int32x4_t s_metric_1_mul = vmulq_s32(s_metric_1_diff1, s_metric_1_diff2); uint32x4_t s_metric_1_vec = vcgtq_s32(s_metric_1_mul, v_athresh_squared); mask_vec = vandq_u32(s_metric_1_vec, motion_check); mask_vec = vandq_u32(mask_vec, condition); mask_vec = vandq_u32(mask_vec, v_one); vst1q_u32(mask32, mask_vec); mask[0] = mask32[0]; mask[1] = mask32[1]; mask[2] = mask32[2]; mask[3] = mask32[3]; break; } case 2: { int32x4_t combing1 = vabsq_s32(vsubq_s32(vaddq_s32(vaddq_s32(cur_up2_vec, vmulq_s32(cur_vec, v_four)),cur_down2_vec), vmulq_s32(vaddq_s32(cur_up1_vec, cur_down1_vec), v_three))); uint32x4_t s_metric_2_vec = vcgtq_s32(combing1, v_athresh6); mask_vec = vandq_u32(s_metric_2_vec, motion_check); mask_vec = vandq_u32(mask_vec, condition); mask_vec = vandq_u32(mask_vec, v_one); vst1q_u32(mask32, mask_vec); mask[0] = mask32[0]; mask[1] = mask32[1]; mask[2] = mask32[2]; mask[3] = mask32[3]; break; } } } cur+=4; prev+=4; next+=4; mask+=4; } } } #else static void FUNC(detect_combed_segment)(hb_filter_private_t *pv, int segment_start, int segment_stop) { // A mishmash of various comb detection tricks // picked up from neuron2's Decomb plugin for // AviSynth and tritical's IsCombedT and // IsCombedTIVTC plugins. // Comb scoring algorithm const int spatial_metric = pv->spatial_metric; const int mthresh = pv->motion_threshold; const int athresh = pv->spatial_threshold; const int athresh_squared = pv->spatial_threshold_squared; const int athresh6 = pv->spatial_threshold6; // One pass for Y const int stride_prev = pv->ref[0]->plane[0].stride / pv->bps; const int stride_cur = pv->ref[1]->plane[0].stride / pv->bps; const int stride_next = pv->ref[2]->plane[0].stride / pv->bps; const int width = pv->ref[0]->plane[0].width; const int height = pv->ref[0]->plane[0].height; const int mask_stride = pv->mask->plane[0].stride; // Comb detection has to start at y = 2 and end at // y = height - 2, because it needs to examine // 2 pixels above and 2 below the current pixel. if (segment_start < 2) { segment_start = 2; } if (segment_stop > height - 2) { segment_stop = height - 2; } // These are just to make the buffer locations easier to read. const int up_1_prev = -1 * stride_prev; const int down_1_prev = stride_prev; const int up_2 = -2 * stride_cur; const int up_1 = -1 * stride_cur; const int down_1 = stride_cur; const int down_2 = 2 * stride_cur; const int up_1_next = -1 * stride_next; const int down_1_next = stride_next; int16x8_t v_athresh = vdupq_n_s16(athresh); int16x8_t v_athresh_neg = vdupq_n_s16(-athresh); int16x8_t v_mthresh = vdupq_n_s16(mthresh); int16x8_t v_athresh6 = vdupq_n_s16(athresh6); int16x8_t v_athresh_squared = vdupq_n_s16(athresh_squared); int16x8_t v_four = vdupq_n_s16(4); int16x8_t v_three = vdupq_n_s16(3); uint16x8_t v_one = vdupq_n_u16(1); int16x8_t v_c32detect_min = vdupq_n_s16(pv->comb32detect_min); int16x8_t v_c32detect_max = vdupq_n_s16(pv->comb32detect_max); uint16x8_t v_exhaustive_check = vdupq_n_u16(pv->force_exaustive_check); for (int y = segment_start; y < segment_stop; y++) { // We need to examine a column of 5 pixels // in the prev, cur, and next frames. const pixel *prev = &((const pixel *)pv->ref[0]->plane[0].data)[y * stride_prev]; const pixel *cur = &((const pixel *)pv->ref[1]->plane[0].data)[y * stride_cur]; const pixel *next = &((const pixel *)pv->ref[2]->plane[0].data)[y * stride_next]; uint8_t *mask = &pv->mask->plane[0].data[y * mask_stride]; memset(mask, 0, mask_stride); for (int x = 0; x < width; x+=8) { uint16x8_t mask_vec = vdupq_n_u16(0); int16x8_t cur_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)cur))); int16x8_t cur_up1_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)cur + up_1))); int16x8_t cur_down1_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)cur + down_1))); int16x8_t up_diff_vec = vsubq_s16(cur_vec, cur_up1_vec); int16x8_t down_diff_vec = vsubq_s16(cur_vec, cur_down1_vec); uint16x8_t cond1 = vcgtq_s16(up_diff_vec, v_athresh); uint16x8_t cond2 = vcgtq_s16(down_diff_vec, v_athresh); uint16x8_t cond3 = vcltq_s16(up_diff_vec, v_athresh_neg); uint16x8_t cond4 = vcltq_s16(down_diff_vec, v_athresh_neg); uint16x8_t condition1 = vandq_u16(cond1, cond2); uint16x8_t condition2 = vandq_u16(cond3, cond4); uint16x8_t condition = vorrq_u16(condition1, condition2); if(vmaxvq_u16(condition) > 0) { uint16x8_t motion = vdupq_n_u16(0); uint16x8_t motion1 = vdupq_n_u16(0); if (mthresh > 0) { int16x8_t prev_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)prev))); int16x8_t next_up1_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)next + up_1_next))); int16x8_t next_down1_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)next + down_1_next))); int16x8_t abs_diff1 = vabsq_s16(vsubq_s16(prev_vec, cur_vec)); int16x8_t abs_diff2 = vabsq_s16(vsubq_s16(cur_up1_vec, next_up1_vec)); int16x8_t abs_diff3 = vabsq_s16(vsubq_s16(cur_down1_vec, next_down1_vec)); uint16x8_t motion_cond1 = vcgtq_s16(abs_diff1, v_mthresh); uint16x8_t motion_cond2 = vcgtq_s16(abs_diff2, v_mthresh); uint16x8_t motion_cond3 = vcgtq_s16(abs_diff3, v_mthresh); motion = vandq_u16(vandq_u16(motion_cond1, motion_cond2), motion_cond3); int16x8_t next_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)next))); int16x8_t prev_up1_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)prev + up_1_prev))); int16x8_t prev_down1_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)prev + down_1_prev))); int16x8_t abs_diff4 = vabsq_s16(vsubq_s16(next_vec, cur_vec)); int16x8_t abs_diff5 = vabsq_s16(vsubq_s16(prev_up1_vec, cur_up1_vec)); int16x8_t abs_diff6 = vabsq_s16(vsubq_s16(prev_down1_vec, cur_down1_vec)); uint16x8_t motion_cond4 = vcgtq_s16(abs_diff4, v_mthresh); uint16x8_t motion_cond5 = vcgtq_s16(abs_diff5, v_mthresh); uint16x8_t motion_cond6 = vcgtq_s16(abs_diff6, v_mthresh); motion1 = vandq_u16(vandq_u16(motion_cond4, motion_cond5), motion_cond6); motion = vorrq_u16(motion, motion1); } else { motion = vdupq_n_u16(1); } uint16x8_t motion_check = vorrq_u16(motion, v_exhaustive_check); motion_check = vcgtq_u16(motion_check, mask_vec); int16x8_t cur_up2_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)cur + up_2))); int16x8_t cur_down2_vec = vreinterpretq_s16_u16(vmovl_u8(vld1_u8((uint8_t*)cur + down_2))); switch(spatial_metric) { case 0: { uint16x8_t cond_c32_detect_min = vcltq_s16(vabsq_s16(vsubq_s16(cur_vec, cur_down2_vec)), v_c32detect_min); uint16x8_t cond_c32_detect_max = vcgtq_s16(vabsq_s16(vsubq_s16(cur_vec, cur_down1_vec)), v_c32detect_max); uint16x8_t s_metric_0_vec = vandq_u16(cond_c32_detect_min, cond_c32_detect_max); mask_vec = vandq_u16(s_metric_0_vec, motion_check); mask_vec = vandq_u16(mask_vec, condition); mask_vec = vandq_u16(mask_vec, v_one); vst1_u8(&mask[x], vmovn_u16(mask_vec)); break; } case 1: { int16x8_t s_metric_1_diff1 = vsubq_s16(cur_up1_vec, cur_vec); int16x8_t s_metric_1_diff2 = vsubq_s16(cur_down1_vec, cur_vec); int16x8_t s_metric_1_mul = vmulq_s16(s_metric_1_diff1, s_metric_1_diff2); uint16x8_t s_metric_1_vec = vcgtq_s16(s_metric_1_mul, v_athresh_squared); mask_vec = vandq_u16(s_metric_1_vec, motion_check); mask_vec = vandq_u16(mask_vec, condition); mask_vec = vandq_u16(mask_vec, v_one); vst1_u8(&mask[x], vmovn_u16(mask_vec)); break; } case 2: { int16x8_t combing1 = vabsq_s16(vsubq_s16(vaddq_s16(vaddq_s16(cur_up2_vec, vmulq_s16(cur_vec, v_four)),cur_down2_vec), vmulq_s16(vaddq_s16(cur_up1_vec, cur_down1_vec), v_three))); uint16x8_t s_metric_2_vec = vcgtq_s16(combing1, v_athresh6); mask_vec = vandq_u16(s_metric_2_vec, motion_check); mask_vec = vandq_u16(mask_vec, condition); mask_vec = vandq_u16(mask_vec, v_one); vst1_u8(&mask[x], vmovn_u16(mask_vec)); break; } } } cur+=8; prev+=8; next+=8; } } } #endif #else static void FUNC(detect_combed_segment)(hb_filter_private_t *pv, int segment_start, int segment_stop) { // A mishmash of various comb detection tricks // picked up from neuron2's Decomb plugin for // AviSynth and tritical's IsCombedT and // IsCombedTIVTC plugins. // Comb scoring algorithm const int spatial_metric = pv->spatial_metric; const int mthresh = pv->motion_threshold; const int athresh = pv->spatial_threshold; const int athresh_squared = pv->spatial_threshold_squared; const int athresh6 = pv->spatial_threshold6; // One pass for Y const int stride_prev = pv->ref[0]->plane[0].stride / pv->bps; const int stride_cur = pv->ref[1]->plane[0].stride / pv->bps; const int stride_next = pv->ref[2]->plane[0].stride / pv->bps; const int width = pv->ref[0]->plane[0].width; const int height = pv->ref[0]->plane[0].height; const int mask_stride = pv->mask->plane[0].stride; // Comb detection has to start at y = 2 and end at // y = height - 2, because it needs to examine // 2 pixels above and 2 below the current pixel. if (segment_start < 2) { segment_start = 2; } if (segment_stop > height - 2) { segment_stop = height - 2; } // These are just to make the buffer locations easier to read. const int up_1_prev = -1 * stride_prev; const int down_1_prev = stride_prev; const int up_2 = -2 * stride_cur; const int up_1 = -1 * stride_cur; const int down_1 = stride_cur; const int down_2 = 2 * stride_cur; const int up_1_next = -1 * stride_next; const int down_1_next = stride_next; for (int y = segment_start; y < segment_stop; y++) { // We need to examine a column of 5 pixels // in the prev, cur, and next frames. const pixel *prev = &((const pixel *)pv->ref[0]->plane[0].data)[y * stride_prev]; const pixel *cur = &((const pixel *)pv->ref[1]->plane[0].data)[y * stride_cur]; const pixel *next = &((const pixel *)pv->ref[2]->plane[0].data)[y * stride_next]; uint8_t *mask = &pv->mask->plane[0].data[y * mask_stride]; memset(mask, 0, mask_stride); for (int x = 0; x < width; x++) { const int up_diff = cur[0] - cur[up_1]; const int down_diff = cur[0] - cur[down_1]; if ((up_diff > athresh && down_diff > athresh) || (up_diff < -athresh && down_diff < -athresh)) { // The pixel above and below are different, // and they change in the same "direction" too. int motion = 0; if (mthresh > 0) { // Make sure there's sufficient motion between frame t-1 to frame t+1. if (abs(prev[0] - cur[0] ) > mthresh && abs(cur[up_1] - next[up_1_next] ) > mthresh && abs(cur[down_1] - next[down_1_next]) > mthresh) { motion++; } if (abs(next[0] - cur[0] ) > mthresh && abs(prev[up_1_prev] - cur[up_1] ) > mthresh && abs(prev[down_1_prev] - cur[down_1]) > mthresh) { motion++; } } else { // User doesn't want to check for motion, // so move on to the spatial check. motion = 1; } // If motion, or we can't measure motion yet... if (motion || pv->force_exaustive_check) { // That means it's time for the spatial check. // We've got several options here. if (spatial_metric == 0) { // Simple 32detect style comb detection. if ((abs(cur[0] - cur[down_2]) < pv->comb32detect_min) && (abs(cur[0] - cur[down_1]) > pv->comb32detect_max)) { mask[0] = 1; } } else if (spatial_metric == 1) { // This, for comparison, is what IsCombed uses. // It's better, but still noise sensitive. const int combing = (cur[up_1] - cur[0]) * (cur[down_1] - cur[0]); if (combing > athresh_squared) { mask[0] = 1; } } else if (spatial_metric == 2) { // Tritical's noise-resistant combing scorer. // The check is done on a bob+blur convolution. const int combing = abs( cur[up_2] + ( 4 * cur[0] ) + cur[down_2] - ( 3 * ( cur[up_1] + cur[down_1] ) ) ); // If the frame is sufficiently combed, // then mark it down on the mask as 1. if (combing > athresh6) { mask[0] = 1; } } } } cur++; prev++; next++; mask++; } } } #endif #undef pixel #undef FUNC