/* decomb_template.c Copyright (c) 2003-2025 HandBrake Team This file is part of the HandBrake source code Homepage: . It may be used under the terms of the GNU General Public License v2. For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ #if BIT_DEPTH > 8 # define pixel uint16_t # define FUNC(name) name##_##16 #else # define pixel uint8_t # define FUNC(name) name##_##8 #endif #if defined (__aarch64__) && !defined(__APPLE__) #include #endif #include "handbrake/eedi2.h" static void FUNC(init_crop_table)(void **crop_table_out, const int max_value) { const int central_part_size = max_value + 1; const int table_size = central_part_size + 2048; pixel *crop_table = calloc(table_size, sizeof(pixel)); for (int i = 0; i < central_part_size; i++) { crop_table[i + 1024] = i; } for (int i = central_part_size + 1024; i < table_size; i++) { crop_table[i] = max_value; } *crop_table_out = crop_table; } static inline int FUNC(cubic_interpolate_pixel)(const pixel *crop_table, int y0, int y1, int y2, int y3) { // From http://www.neuron2.net/library/cubicinterp.html int result = (y0 * -3) + (y1 * 23) + (y2 * 23) + (y3 * -3); result = crop_table[(result / 40) + 1024]; return result; } static inline void FUNC(cubic_interpolate_line)(pixel *dst, const pixel *crop_table, const pixel *cur, const int width, const int height, const int stride, const int y) { for (int x = 0; x < width; x++) { int a, b, c, d; a = b = c = d = 0; if (y >= 3) { // Normal top a = cur[-3*stride]; b = cur[-stride]; } else if (y == 2 || y == 1) { // There's only one sample above this pixel, use it twice. a = cur[-stride]; b = cur[-stride]; } else if (y == 0) { // No samples above, triple up on the one below. a = cur[+stride]; b = cur[+stride]; } if (y <= (height - 4)) { // Normal bottom c = cur[+stride]; d = cur[3*stride]; } else if (y == (height - 3) || y == (height - 2)) { // There's only one sample below, use it twice. c = cur[+stride]; d = cur[+stride]; } else if (y == height - 1) { // No samples below, triple up on the one above. c = cur[-stride]; d = cur[-stride]; } dst[0] = FUNC(cubic_interpolate_pixel)(crop_table, a, b, c, d); dst++; cur++; } } #if defined (__aarch64__) && !defined(__APPLE__) #if BIT_DEPTH > 8 static void FUNC(blend_filter_line)(const filter_param_t *filter, const pixel *crop_table, pixel *dst, const pixel *cur, const int width, const int height, const int stride, const int y) { int up1, up2, down1, down2; if (y > 1 && y < (height - 2)) { up1 = -1 * stride; up2 = -2 * stride; down1 = 1 * stride; down2 = 2 * stride; } else if (y == 0) { up1 = up2 = 0; down1 = 1 * stride; down2 = 2 * stride; } else if (y == 1) { up1 = up2 = -1 * stride; down1 = 1 * stride; down2 = 2 * stride; } else if (y == (height - 2)) { up1 = -1 * stride; up2 = -2 * stride; down1 = down2 = 1 * stride; } else if (y == (height - 1)) { up1 = -1 * stride; up2 = -2 * stride; down1 = down2 = 0; } else { hb_error("Invalid value y %d height %d", y, height); return; } int32x4_t tap0 = vdupq_n_s32(filter->tap[0]); int32x4_t tap1 = vdupq_n_s32(filter->tap[1]); int32x4_t tap2 = vdupq_n_s32(filter->tap[2]); int32x4_t tap3 = vdupq_n_s32(filter->tap[3]); int32x4_t tap4 = vdupq_n_s32(filter->tap[4]); int32x4_t filter_norm_vec = vdupq_n_s32(-filter->normalize); int32x4_t offset = vdupq_n_s32(1024); for (int x = 0; x < width; x += 4) { uint32_t cr_table_vec[4]; int32x4_t up2_pixels = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + x + up2))); int32x4_t up1_pixels = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + x + up1))); int32x4_t current_pixels = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + x ))); int32x4_t down1_pixels = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + x + down1))); int32x4_t down2_pixels = vreinterpretq_s32_u32(vmovl_u16(vld1_u16(cur + x + down2))); int32x4_t result = vmulq_s32(up2_pixels, tap0); result = vmlaq_s32(result, up1_pixels, tap1); result = vmlaq_s32(result, current_pixels, tap2); result = vmlaq_s32(result, down1_pixels, tap3); result = vmlaq_s32(result, down2_pixels, tap4); result = vshrq_n_s32(result, 3); uint32x4_t result_u32 = vreinterpretq_u32_s32(vaddq_s32(result, offset)); vst1q_u32(cr_table_vec, result_u32); dst[x+0] = crop_table[cr_table_vec[0]]; dst[x+1] = crop_table[cr_table_vec[1]]; dst[x+2] = crop_table[cr_table_vec[2]]; dst[x+3] = crop_table[cr_table_vec[3]]; } } #else static void FUNC(blend_filter_line)(const filter_param_t *filter, const pixel *crop_table, pixel *dst, const pixel *cur, const int width, const int height, const int stride, const int y) { int up1, up2, down1, down2; if (y > 1 && y < (height - 2)) { up1 = -1 * stride; up2 = -2 * stride; down1 = 1 * stride; down2 = 2 * stride; } else if (y == 0) { up1 = up2 = 0; down1 = 1 * stride; down2 = 2 * stride; } else if (y == 1) { up1 = up2 = -1 * stride; down1 = 1 * stride; down2 = 2 * stride; } else if (y == (height - 2)) { up1 = -1 * stride; up2 = -2 * stride; down1 = down2 = 1 * stride; } else if (y == (height - 1)) { up1 = -1 * stride; up2 = -2 * stride; down1 = down2 = 0; } else { hb_error("Invalid value y %d height %d", y, height); return; } int16x8_t tap0 = vdupq_n_s16(filter->tap[0]); int16x8_t tap1 = vdupq_n_s16(filter->tap[1]); int16x8_t tap2 = vdupq_n_s16(filter->tap[2]); int16x8_t tap3 = vdupq_n_s16(filter->tap[3]); int16x8_t tap4 = vdupq_n_s16(filter->tap[4]); int16x8_t filter_norm_vec = vdupq_n_s16(-filter->normalize); int16x8_t offset = vdupq_n_s16(1024); for (int x = 0; x < width; x += 8) { uint16_t cr_table_vec[8]; int16x8_t up2_pixels = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(cur + x + up2))); int16x8_t up1_pixels = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(cur + x + up1))); int16x8_t current_pixels = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(cur + x ))); int16x8_t down1_pixels = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(cur + x + down1))); int16x8_t down2_pixels = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(cur + x + down2))); int16x8_t result = vmulq_s16(up2_pixels, tap0); result = vmlaq_s16(result, up1_pixels, tap1); result = vmlaq_s16(result, current_pixels, tap2); result = vmlaq_s16(result, down1_pixels, tap3); result = vmlaq_s16(result, down2_pixels, tap4); result = vshrq_n_s16(result, 3); uint16x8_t result_u16 = vreinterpretq_u16_s16(vaddq_s16(result, offset)); vst1q_u16(cr_table_vec, result_u16); dst[x+0] = crop_table[cr_table_vec[0]]; dst[x+1] = crop_table[cr_table_vec[1]]; dst[x+2] = crop_table[cr_table_vec[2]]; dst[x+3] = crop_table[cr_table_vec[3]]; dst[x+4] = crop_table[cr_table_vec[4]]; dst[x+5] = crop_table[cr_table_vec[5]]; dst[x+6] = crop_table[cr_table_vec[6]]; dst[x+7] = crop_table[cr_table_vec[7]]; } } #endif #else static inline int FUNC(blend_filter_pixel)(const filter_param_t *filter, const pixel *crop_table, const int up2, const int up1, const int current, const int down1, const int down2) { // Low-pass 5-tap filter int result = 0; result += up2 * filter->tap[0]; result += up1 * filter->tap[1]; result += current * filter->tap[2]; result += down1 * filter->tap[3]; result += down2 * filter->tap[4]; result >>= filter->normalize; result = crop_table[result + 1024]; return result; } static void FUNC(blend_filter_line)(const filter_param_t *filter, const pixel *crop_table, pixel *dst, const pixel *cur, const int width, const int height, const int stride, const int y) { int up1, up2, down1, down2; if (y > 1 && y < (height - 2)) { up1 = -1 * stride; up2 = -2 * stride; down1 = 1 * stride; down2 = 2 * stride; } else if (y == 0) { // First line, so A and B don't exist. up1 = up2 = 0; down1 = 1 * stride; down2 = 2 * stride; } else if (y == 1) { // Second line, no A. up1 = up2 = -1 * stride; down1 = 1 * stride; down2 = 2 * stride; } else if (y == (height - 2)) { // Second to last line, no E. up1 = -1 * stride; up2 = -2 * stride; down1 = down2 = 1 * stride; } else if (y == (height -1)) { // Last line, no D or E. up1 = -1 * stride; up2 = -2 * stride; down1 = down2 = 0; } else { hb_error("Invalid value y %d height %d", y, height); return; } for (int x = 0; x < width; x++) { // Low-pass 5-tap filter dst[0] = FUNC(blend_filter_pixel)(filter, crop_table, cur[up2], cur[up1], cur[0], cur[down1], cur[down2]); dst++; cur++; } } #endif /// This function calls all the eedi2 filters in sequence for a given plane. /// It outputs the final interpolated image to pv->eedi_full[DST2PF]. static void FUNC(eedi2_interpolate_plane)(hb_filter_private_t *pv, int plane) { // We need all these pointers. No, seriously. // I swear. It's not a joke. They're used. // All nine of them. pixel *mskp = (pixel *)pv->eedi_half[MSKPF]->plane[plane].data; pixel *srcp = (pixel *)pv->eedi_half[SRCPF]->plane[plane].data; pixel *tmpp = (pixel *)pv->eedi_half[TMPPF]->plane[plane].data; pixel *dstp = (pixel *)pv->eedi_half[DSTPF]->plane[plane].data; pixel *dst2p = (pixel *)pv->eedi_full[DST2PF]->plane[plane].data; pixel *tmp2p2 = (pixel *)pv->eedi_full[TMP2PF2]->plane[plane].data; pixel *msk2p = (pixel *)pv->eedi_full[MSK2PF]->plane[plane].data; pixel *tmp2p = (pixel *)pv->eedi_full[TMP2PF]->plane[plane].data; pixel *dst2mp = (pixel *)pv->eedi_full[DST2MPF]->plane[plane].data; int *cx2 = pv->cx2; int *cy2 = pv->cy2; int *cxy = pv->cxy; int *tmpc = pv->tmpc; const int pitch = pv->eedi_full[0]->plane[plane].stride / pv->bps; const int height = pv->eedi_full[0]->plane[plane].height; const int width = pv->eedi_full[0]->plane[plane].width; const int half_height = pv->eedi_half[0]->plane[plane].height; // edge mask FUNC(eedi2_build_edge_mask)(mskp, pitch, srcp, pitch, pv->magnitude_threshold, pv->variance_threshold, pv->laplacian_threshold, half_height, width, pv->depth); FUNC(eedi2_erode_edge_mask)(mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width, pv->depth); FUNC(eedi2_dilate_edge_mask)(tmpp, pitch, mskp, pitch, pv->dilation_threshold, half_height, width, pv->depth); FUNC(eedi2_erode_edge_mask)(mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width, pv->depth); FUNC(eedi2_remove_small_gaps)(tmpp, pitch, mskp, pitch, half_height, width, pv->depth); // direction mask FUNC(eedi2_calc_directions)(plane, mskp, pitch, srcp, pitch, tmpp, pitch, pv->maximum_search_distance, pv->noise_threshold, half_height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_filter_dir_map)(mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_expand_dir_map)(mskp, pitch, dstp, pitch, tmpp, pitch, half_height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_filter_map)(mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width, pv->depth); // upscale 2x vertically FUNC(eedi2_upscale_by_2)(srcp, dst2p, half_height, pitch); FUNC(eedi2_upscale_by_2)(dstp, tmp2p2, half_height, pitch); FUNC(eedi2_upscale_by_2)(mskp, msk2p, half_height, pitch); // upscale the direction mask FUNC(eedi2_mark_directions_2x)(msk2p, pitch, tmp2p2, pitch, tmp2p, pitch, pv->tff, height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_filter_dir_map_2x)(msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_expand_dir_map_2x)(msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_fill_gaps_2x)(msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width, pv->depth); FUNC(eedi2_fill_gaps_2x)(msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width, pv->depth); // interpolate a full-size plane FUNC(eedi2_interpolate_lattice)( plane, tmp2p, pitch, dst2p, pitch, tmp2p2, pitch, pv->tff, pv->noise_threshold, height, width, pv->depth, pv->eedi_limlut); if (pv->post_processing == 1 || pv->post_processing == 3) { // make sure the edge directions are consistent FUNC(eedi2_bit_blit)( tmp2p2, pitch, tmp2p, pitch, width, height); FUNC(eedi2_filter_dir_map_2x)(msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_expand_dir_map_2x)(msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width, pv->depth, pv->eedi_limlut); FUNC(eedi2_post_process)(tmp2p, pitch, tmp2p2, pitch, dst2p, pitch, pv->tff, height, width, pv->depth, pv->eedi_limlut); } if (pv->post_processing == 2 || pv->post_processing == 3) { // filter junctions and corners FUNC(eedi2_gaussian_blur1)( srcp, pitch, tmpp, pitch, srcp, pitch, half_height, width ); FUNC(eedi2_calc_derivatives)(srcp, pitch, half_height, width, cx2, cy2, cxy, pv->depth); FUNC(eedi2_gaussian_blur_sqrt2)( cx2, tmpc, cx2, pitch, half_height, width); FUNC(eedi2_gaussian_blur_sqrt2)( cy2, tmpc, cy2, pitch, half_height, width); FUNC(eedi2_gaussian_blur_sqrt2)( cxy, tmpc, cxy, pitch, half_height, width); FUNC(eedi2_post_process_corner)(cx2, cy2, cxy, pitch, tmp2p2, pitch, dst2p, pitch, height, width, pv->tff, pv->depth); } } static void FUNC(eedi2_filter_work)(void *thread_args_v) { eedi2_thread_arg_t *thread_args = thread_args_v; hb_filter_private_t *pv = thread_args->pv; int plane = thread_args->arg.segment; //Process plane FUNC(eedi2_interpolate_plane)(pv, plane); } /// Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF] /// and then runs eedi2_filter_thread for each plane. static void FUNC(eedi2_planer)(hb_filter_private_t *pv) { // Copy the first field from the source to a half-height frame. for (int pp = 0; pp < 3; pp++) { const int src_pitch = pv->ref[1]->plane[pp].stride / pv->bps; const int dst_pitch = pv->eedi_half[SRCPF]->plane[pp].stride / pv->bps; const int height = pv->ref[1]->plane[pp].height; const int start_line = !pv->tff; FUNC(eedi2_fill_half_height_buffer_plane)(&((pixel *)pv->ref[1]->plane[pp].data)[src_pitch * start_line], (pixel *)pv->eedi_half[SRCPF]->plane[pp].data, src_pitch, dst_pitch, height); } // Now that all data is ready for our threads, fire them off // and wait for their completion. taskset_cycle(&pv->eedi2_taskset); } /// EDDI: Edge Directed Deinterlacing Interpolation /// Checks 4 different slopes to see if there is more similarity along a diagonal /// than there was vertically. If a diagonal is more similar, then it indicates /// an edge, so interpolate along that instead of a vertical line, using either /// linear or cubic interpolation depending on mode. #if BIT_DEPTH > 8 #define YADIF_CHECK(j) \ { \ int score = ABS(cur[stride_cur_p - 1 + j] - cur[stride_cur_n - 1 - j]) + \ ABS(cur[stride_cur_p + j] - cur[stride_cur_n - j]) + \ ABS(cur[stride_cur_p + 1 + j] - cur[stride_cur_n + 1 - j]); \ if (score < spatial_score) \ { \ spatial_score = score; \ if ((pv->mode & MODE_DECOMB_CUBIC) && !vertical_edge) \ { \ switch (j) \ { \ case -1: \ spatial_pred = cubic_interpolate_pixel_16(crop_table, \ cur[-3 * stride_cur - 3], \ cur[-stride_cur - 1], \ cur[+stride_cur + 1], \ cur[3 * stride_cur + 3]); \ break; \ case -2: \ spatial_pred = cubic_interpolate_pixel_16(crop_table, \ ((cur[-3 * stride_cur - 4] + cur[-stride_cur - 4]) / 2), \ cur[-stride_cur - 2], \ cur[+stride_cur + 2], \ ((cur[3 * stride_cur + 4] + cur[stride_cur + 4]) / 2)); \ break; \ case 1: \ spatial_pred = cubic_interpolate_pixel_16(crop_table, \ cur[-3 * stride_cur +3], \ cur[-stride_cur + 1], \ cur[+stride_cur - 1], \ cur[3 * stride_cur - 3] ); \ break; \ case 2: \ spatial_pred = cubic_interpolate_pixel_16(crop_table, \ ((cur[-3 * stride_cur + 4] + cur[-stride_cur + 4]) / 2), \ cur[-stride_cur + 2], \ cur[+stride_cur - 2], \ ((cur[3 * stride_cur - 4] + cur[stride_cur - 4]) / 2)); \ break; \ } \ } \ else \ { \ spatial_pred = (cur[stride_cur_p + j] + cur[stride_cur_n - j]) >> 1; \ } #else #define YADIF_CHECK(j) \ { \ int score = ABS(cur[stride_cur_p - 1 + j] - cur[stride_cur_n - 1 - j]) + \ ABS(cur[stride_cur_p + j] - cur[stride_cur_n - j]) + \ ABS(cur[stride_cur_p + 1 + j] - cur[stride_cur_n + 1 - j]); \ if (score < spatial_score) \ { \ spatial_score = score; \ if ((pv->mode & MODE_DECOMB_CUBIC) && !vertical_edge) \ { \ switch (j) \ { \ case -1: \ spatial_pred = cubic_interpolate_pixel_8(crop_table, \ cur[-3 * stride_cur - 3], \ cur[-stride_cur - 1], \ cur[+stride_cur + 1], \ cur[3 * stride_cur + 3]); \ break; \ case -2: \ spatial_pred = cubic_interpolate_pixel_8(crop_table, \ ((cur[-3 * stride_cur - 4] + cur[-stride_cur - 4]) / 2), \ cur[-stride_cur - 2], \ cur[+stride_cur + 2], \ ((cur[3 * stride_cur + 4] + cur[stride_cur + 4]) / 2)); \ break; \ case 1: \ spatial_pred = cubic_interpolate_pixel_8(crop_table, \ cur[-3 * stride_cur +3], \ cur[-stride_cur + 1], \ cur[+stride_cur - 1], \ cur[3 * stride_cur - 3] ); \ break; \ case 2: \ spatial_pred = cubic_interpolate_pixel_8(crop_table, \ ((cur[-3 * stride_cur + 4] + cur[-stride_cur + 4]) / 2), \ cur[-stride_cur + 2], \ cur[+stride_cur - 2], \ ((cur[3 * stride_cur - 4] + cur[stride_cur - 4]) / 2)); \ break; \ } \ } \ else \ { \ spatial_pred = (cur[stride_cur_p + j] + cur[stride_cur_n - j]) >> 1; \ } #endif static void FUNC(yadif_filter_line)(const hb_filter_private_t *pv, pixel *dst, const pixel *prev, const pixel *cur, const pixel *next, const int stride_dst, const int stride_prev, const int stride_cur, const int stride_next, const int plane, const int width, const int height, const int parity, const int y) { const pixel *crop_table = (const pixel *)pv->crop_table; // While prev and next point to the previous and next frames, // prev2 and next2 will shift depending on the parity, usually 1. // They are the previous and next fields, the fields temporally adjacent // to the other field in the current frame--the one not being filtered. const pixel *prev2 = parity ? prev : cur; const int stride_prev2 = parity ? stride_prev : stride_cur; const pixel *next2 = parity ? cur : next; const int stride_next2 = parity ? stride_cur : stride_next; // Invert the stride for the first and last line const int stride_prev_p = y ? -stride_prev : stride_prev; const int stride_prev_n = y + 1 < height ? stride_prev : -stride_prev; const int stride_cur_p = y ? -stride_cur : stride_cur; const int stride_cur_n = y + 1 < height ? stride_cur : -stride_cur; const int stride_next_p = y ? -stride_next : stride_next; const int stride_next_n = y + 1 < height ? stride_next : -stride_next; const int eedi2_mode = (pv->mode & MODE_DECOMB_EEDI2); // We can replace spatial_pred with this interpolation const pixel *eedi2_guess = eedi2_mode ? &((pixel *)pv->eedi_full[DST2PF]->plane[plane].data)[y * stride_dst] : NULL; // Decomb's cubic interpolation can only function when there are // three samples above and below, so regress to yadif's traditional // two-tap interpolation when filtering at the top and bottom edges. const int vertical_edge = (y < 3) || (y > (height - 4)) ? 1 : 0; // YADIF_CHECK requires a margin to avoid invalid memory access. // In MODE_DECOMB_CUBIC, margin needed is 2 + ABS(param). // Else, the margin needed is 1 + ABS(param). const int margin = pv->mode & MODE_DECOMB_CUBIC ? 3 : 2; for (int x = 0; x < width; x++) { // Pixel above const int c = cur[stride_cur_p]; // Temporal average: the current location in the adjacent fields const int d = (prev2[0] + next2[0]) >> 1; // Pixel below const int e = cur[stride_cur_n]; // How the current pixel changes between the adjacent fields const int temporal_diff0 = ABS(prev2[0] - next2[0]); // The average of how much the pixels above and below change from the frame before to now. const int temporal_diff1 = (ABS(prev[stride_prev_p] - c) + ABS(prev[stride_prev_n] - e)) >> 1; // The average of how much the pixels above and below change from now to the next frame. const int temporal_diff2 = (ABS(next[stride_next_p] - c) + ABS(next[stride_next_n] - e)) >> 1; // For the actual difference, use the largest of the previous average diffs. int diff = MAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); int spatial_pred; if (eedi2_mode) { // Who needs yadif's spatial predictions when we can have EEDI2's? spatial_pred = eedi2_guess[0]; eedi2_guess++; } else // Yadif spatial interpolation { // Spatial pred is either a bilinear or cubic vertical interpolation. if ((pv->mode & MODE_DECOMB_CUBIC) && !vertical_edge) { spatial_pred = FUNC(cubic_interpolate_pixel)(crop_table, cur[-3 * stride_cur], cur[-stride_cur], cur[+stride_cur], cur[3 * stride_cur]); } else { spatial_pred = (c + e) >> 1; } if (x > margin && x < width - (margin + 1)) { // SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. int spatial_score = ABS(cur[stride_cur_p-1] - cur[stride_cur_n-1]) + ABS(c - e) + ABS(cur[stride_cur_p+1] - cur[stride_cur_n+1]) - 1; YADIF_CHECK(-1) YADIF_CHECK(-2) }} }} YADIF_CHECK( 1) YADIF_CHECK( 2) }} }} } } // Temporally adjust the spatial prediction by // comparing against lines in the adjacent fields. if (!vertical_edge) { const int b = (prev2[-2 * stride_prev2] + next2[-2 * stride_next2]) >> 1; const int f = (prev2[+2 * stride_prev2] + next2[+2 * stride_next2]) >> 1; // Find the median value const int max = MAX3(d-e, d-c, MIN(b-c, f-e)); const int min = MIN3(d-e, d-c, MAX(b-c, f-e)); diff = MAX3(diff, min, -max); } if (spatial_pred > d + diff) { spatial_pred = d + diff; } else if (spatial_pred < d - diff) { spatial_pred = d - diff; } dst[0] = spatial_pred; dst++; cur++; prev++; next++; prev2++; next2++; } } #undef YADIF_CHECK static void FUNC(yadif_decomb_filter_work)(void *thread_args_v) { yadif_thread_arg_t *thread_args = thread_args_v; hb_filter_private_t *pv = thread_args->pv; const int segment = thread_args->arg.segment; yadif_arguments_t *yadif_work = &pv->yadif_arguments[segment]; // Process all three planes, but only this segment of it. const int mode = pv->yadif_arguments[segment].mode; const int tff = yadif_work->tff; const int parity = yadif_work->parity; const pixel *crop_table = (const pixel *)pv->crop_table; hb_buffer_t *dst = yadif_work->dst; for (int pp = 0; pp < 3; pp++) { const int width = dst->plane[pp].width; const int height = dst->plane[pp].height; const int size = dst->plane[pp].width * pv->bps; const int stride_dst = dst->plane[pp].stride / pv->bps; const int stride_prev = pv->ref[0]->plane[pp].stride / pv->bps; const int stride_cur = pv->ref[1]->plane[pp].stride / pv->bps; const int stride_next = pv->ref[2]->plane[pp].stride / pv->bps; const int segment_start = thread_args->segment_start[pp]; const int segment_stop = segment_start + thread_args->segment_height[pp]; // Filter parity lines int start = parity ? (segment_start + 1) & ~1 : segment_start | 1; pixel *dst2 = &((pixel *)dst->plane[pp].data)[start * stride_dst]; const pixel *prev = &((const pixel *)pv->ref[0]->plane[pp].data)[start * stride_prev]; const pixel *cur = &((const pixel *)pv->ref[1]->plane[pp].data)[start * stride_cur]; const pixel *next = &((const pixel *)pv->ref[2]->plane[pp].data)[start * stride_next]; if (mode == MODE_DECOMB_BLEND) { filter_param_t filter; filter.tap[0] = -1; filter.tap[1] = 2; filter.tap[2] = 6; filter.tap[3] = 2; filter.tap[4] = -1; filter.normalize = 3; // These will be useful if we ever do temporal blending. for (int yy = start; yy < segment_stop; yy += 2) { // This line gets blend filtered, not yadif filtered. FUNC(blend_filter_line)(&filter, crop_table, dst2, cur, width, height, stride_cur, yy); dst2 += stride_dst * 2; cur += stride_cur * 2; } } else if (mode == MODE_DECOMB_CUBIC) { for (int yy = start; yy < segment_stop; yy += 2) { // Just apply vertical cubic interpolation FUNC(cubic_interpolate_line)(dst2, crop_table, cur, width, height, stride_cur, yy); dst2 += stride_dst * 2; cur += stride_cur * 2; } } else if (mode & MODE_DECOMB_YADIF) { for (int yy = start; yy < segment_stop; yy += 2) { FUNC(yadif_filter_line)(pv, dst2, prev, cur, next, stride_dst, stride_prev, stride_cur, stride_next, pp, width, height, parity ^ tff, yy); dst2 += stride_dst * 2; prev += stride_prev * 2; cur += stride_cur * 2; next += stride_next * 2; } } // Copy unfiltered lines start = !parity ? (segment_start + 1) & ~1 : segment_start | 1; dst2 = &((pixel *)dst->plane[pp].data)[start * stride_dst]; cur = &((const pixel *)pv->ref[1]->plane[pp].data)[start * stride_cur]; for (int yy = start; yy < segment_stop; yy += 2) { memcpy(dst2, cur, size); dst2 += stride_dst * 2; cur += stride_cur * 2; } } } static void FUNC(filter)(hb_filter_private_t *pv, hb_buffer_t *dst, const int parity, const int tff) { int is_combed = HB_COMB_HEAVY; int mode = 0; if (pv->mode & MODE_DECOMB_SELECTIVE) { is_combed = pv->ref[1]->s.combed; } // Pick a mode based on the comb detect state and selected decomb modes if ((pv->mode & MODE_DECOMB_BLEND) && is_combed == HB_COMB_LIGHT) { mode = MODE_DECOMB_BLEND; } else if (is_combed != HB_COMB_NONE) { mode = pv->mode & ~MODE_DECOMB_SELECTIVE; } if (mode == MODE_DECOMB_BLEND) { pv->blended++; } else if (mode != 0) { pv->deinterlaced++; } else { pv->unfiltered++; } pv->frames++; if (mode & MODE_DECOMB_EEDI2) { // Generate an EEDI2 interpolation FUNC(eedi2_planer)(pv); } if (mode != 0) { if ((mode & MODE_DECOMB_EEDI2) && !(mode & MODE_DECOMB_YADIF)) { // Just pass through the EEDI2 interpolation for (int pp = 0; pp < 3; pp++) { const pixel *ref = (const pixel *)pv->eedi_full[DST2PF]->plane[pp].data; const int ref_stride = pv->eedi_full[DST2PF]->plane[pp].stride / pv->bps; pixel *dest = (pixel *)dst->plane[pp].data; const int size = dst->plane[pp].width * pv->bps; const int height = dst->plane[pp].height; const int stride = dst->plane[pp].stride / pv->bps; for (int yy = 0; yy < height; yy++) { memcpy(dest, ref, size); dest += stride; ref += ref_stride; } } } else { for (int segment = 0; segment < pv->cpu_count; segment++) { // Setup the work for this plane. pv->yadif_arguments[segment].parity = parity; pv->yadif_arguments[segment].tff = tff; pv->yadif_arguments[segment].dst = dst; pv->yadif_arguments[segment].mode = mode; } // Allow the taskset threads to make one pass over the data. taskset_cycle(&pv->yadif_taskset); // Entire frame is now deinterlaced. } } else { // Just passing through pv->yadif_arguments[0].mode = mode; // 0 hb_buffer_copy(dst, pv->ref[1]); } } #undef pixel #undef FUNC