Index: webrtc/modules/video_processing/util/denoiser_filter_neon.cc |
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc |
index 2920305f71bd23a70927e3639dace7fc26d20baa..195b985b98fe953ade1e9fc72bbe40d94980e060 100644 |
--- a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc |
+++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc |
@@ -75,20 +75,6 @@ void DenoiserFilterNEON::CopyMem16x16(const uint8_t* src, |
} |
} |
-void DenoiserFilterNEON::CopyMem8x8(const uint8_t* src, |
- int src_stride, |
- uint8_t* dst, |
- int dst_stride) { |
- uint8x8_t vtmp; |
- |
- for (int r = 0; r < 8; r++) { |
- vtmp = vld1_u8(src); |
- vst1_u8(dst, vtmp); |
- src += src_stride; |
- dst += dst_stride; |
- } |
-} |
- |
uint32_t DenoiserFilterNEON::Variance16x8(const uint8_t* a, |
int a_stride, |
const uint8_t* b, |
@@ -106,8 +92,7 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y, |
const uint8_t* sig, |
int sig_stride, |
uint8_t motion_magnitude, |
- int increase_denoising, |
- bool denoise_always) { |
+ int increase_denoising) { |
// If motion_magnitude is small, making the denoiser more aggressive by |
// increasing the adjustment for each level, level1 adjustment is |
// increased, the deltas stay the same. |
@@ -190,92 +175,13 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y, |
} |
// Too much adjustments => copy block. |
- { |
- int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), |
- vget_low_s64(v_sum_diff_total)); |
- int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); |
- if (denoise_always) |
- sum_diff_thresh = INT_MAX; |
- else if (increase_denoising) |
- sum_diff_thresh = kSumDiffThresholdHigh; |
- else |
- sum_diff_thresh = kSumDiffThreshold; |
- if (sum_diff > sum_diff_thresh) { |
- // Before returning to copy the block (i.e., apply no denoising), |
- // checK if we can still apply some (weaker) temporal filtering to |
- // this block, that would otherwise not be denoised at all. Simplest |
- // is to apply an additional adjustment to running_avg_y to bring it |
- // closer to sig. The adjustment is capped by a maximum delta, and |
- // chosen such that in most cases the resulting sum_diff will be |
- // within the accceptable range given by sum_diff_thresh. |
- |
- // The delta is set by the excess of absolute pixel diff over the |
- // threshold. |
- int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; |
- // Only apply the adjustment for max delta up to 3. |
- if (delta < 4) { |
- const uint8x16_t k_delta = vmovq_n_u8(delta); |
- sig -= sig_stride * 16; |
- mc_running_avg_y -= mc_running_avg_y_stride * 16; |
- running_avg_y -= running_avg_y_stride * 16; |
- for (int r = 0; r < 16; ++r) { |
- uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); |
- const uint8x16_t v_sig = vld1q_u8(sig); |
- const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); |
- |
- // Calculate absolute difference and sign masks. |
- const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); |
- const uint8x16_t v_diff_pos_mask = |
- vcltq_u8(v_sig, v_mc_running_avg_y); |
- const uint8x16_t v_diff_neg_mask = |
- vcgtq_u8(v_sig, v_mc_running_avg_y); |
- // Clamp absolute difference to delta to get the adjustment. |
- const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); |
- |
- const uint8x16_t v_pos_adjustment = |
- vandq_u8(v_diff_pos_mask, v_abs_adjustment); |
- const uint8x16_t v_neg_adjustment = |
- vandq_u8(v_diff_neg_mask, v_abs_adjustment); |
- |
- v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); |
- v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); |
- |
- // Store results. |
- vst1q_u8(running_avg_y, v_running_avg_y); |
- |
- { |
- const int8x16_t v_sum_diff = |
- vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), |
- vreinterpretq_s8_u8(v_pos_adjustment)); |
- |
- const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); |
- const int32x4_t fedc_ba98_7654_3210 = |
- vpaddlq_s16(fe_dc_ba_98_76_54_32_10); |
- const int64x2_t fedcba98_76543210 = |
- vpaddlq_s32(fedc_ba98_7654_3210); |
- |
- v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); |
- } |
- // Update pointers for next iteration. |
- sig += sig_stride; |
- mc_running_avg_y += mc_running_avg_y_stride; |
- running_avg_y += running_avg_y_stride; |
- } |
- { |
- // Update the sum of all pixel differences of this MB. |
- x = vqadd_s64(vget_high_s64(v_sum_diff_total), |
- vget_low_s64(v_sum_diff_total)); |
- sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); |
- |
- if (sum_diff > sum_diff_thresh) { |
- return COPY_BLOCK; |
- } |
- } |
- } else { |
- return COPY_BLOCK; |
- } |
- } |
- } |
+ int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), |
+ vget_low_s64(v_sum_diff_total)); |
+ int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); |
+ sum_diff_thresh = |
+ increase_denoising ? kSumDiffThresholdHigh : kSumDiffThreshold; |
+ if (sum_diff > sum_diff_thresh) |
+ return COPY_BLOCK; |
// Tell above level that block was filtered. |
running_avg_y -= running_avg_y_stride * 16; |