Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Unified Diff: webrtc/modules/video_processing/util/denoiser_filter_neon.cc

Issue 1871853003: External VNR speed improvement and more. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: More clean-up. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/video_processing/util/denoiser_filter_neon.cc
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc
index 2920305f71bd23a70927e3639dace7fc26d20baa..195b985b98fe953ade1e9fc72bbe40d94980e060 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc
+++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc
@@ -75,20 +75,6 @@ void DenoiserFilterNEON::CopyMem16x16(const uint8_t* src,
}
}
-void DenoiserFilterNEON::CopyMem8x8(const uint8_t* src,
- int src_stride,
- uint8_t* dst,
- int dst_stride) {
- uint8x8_t vtmp;
-
- for (int r = 0; r < 8; r++) {
- vtmp = vld1_u8(src);
- vst1_u8(dst, vtmp);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
uint32_t DenoiserFilterNEON::Variance16x8(const uint8_t* a,
int a_stride,
const uint8_t* b,
@@ -106,8 +92,7 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y,
const uint8_t* sig,
int sig_stride,
uint8_t motion_magnitude,
- int increase_denoising,
- bool denoise_always) {
+ int increase_denoising) {
// If motion_magnitude is small, making the denoiser more aggressive by
// increasing the adjustment for each level, level1 adjustment is
// increased, the deltas stay the same.
@@ -190,92 +175,13 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y,
}
// Too much adjustments => copy block.
- {
- int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
- vget_low_s64(v_sum_diff_total));
- int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
- if (denoise_always)
- sum_diff_thresh = INT_MAX;
- else if (increase_denoising)
- sum_diff_thresh = kSumDiffThresholdHigh;
- else
- sum_diff_thresh = kSumDiffThreshold;
- if (sum_diff > sum_diff_thresh) {
- // Before returning to copy the block (i.e., apply no denoising),
- // checK if we can still apply some (weaker) temporal filtering to
- // this block, that would otherwise not be denoised at all. Simplest
- // is to apply an additional adjustment to running_avg_y to bring it
- // closer to sig. The adjustment is capped by a maximum delta, and
- // chosen such that in most cases the resulting sum_diff will be
- // within the accceptable range given by sum_diff_thresh.
-
- // The delta is set by the excess of absolute pixel diff over the
- // threshold.
- int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1;
- // Only apply the adjustment for max delta up to 3.
- if (delta < 4) {
- const uint8x16_t k_delta = vmovq_n_u8(delta);
- sig -= sig_stride * 16;
- mc_running_avg_y -= mc_running_avg_y_stride * 16;
- running_avg_y -= running_avg_y_stride * 16;
- for (int r = 0; r < 16; ++r) {
- uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y);
- const uint8x16_t v_sig = vld1q_u8(sig);
- const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y);
-
- // Calculate absolute difference and sign masks.
- const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y);
- const uint8x16_t v_diff_pos_mask =
- vcltq_u8(v_sig, v_mc_running_avg_y);
- const uint8x16_t v_diff_neg_mask =
- vcgtq_u8(v_sig, v_mc_running_avg_y);
- // Clamp absolute difference to delta to get the adjustment.
- const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta));
-
- const uint8x16_t v_pos_adjustment =
- vandq_u8(v_diff_pos_mask, v_abs_adjustment);
- const uint8x16_t v_neg_adjustment =
- vandq_u8(v_diff_neg_mask, v_abs_adjustment);
-
- v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment);
- v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment);
-
- // Store results.
- vst1q_u8(running_avg_y, v_running_avg_y);
-
- {
- const int8x16_t v_sum_diff =
- vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment),
- vreinterpretq_s8_u8(v_pos_adjustment));
-
- const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff);
- const int32x4_t fedc_ba98_7654_3210 =
- vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
- const int64x2_t fedcba98_76543210 =
- vpaddlq_s32(fedc_ba98_7654_3210);
-
- v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210);
- }
- // Update pointers for next iteration.
- sig += sig_stride;
- mc_running_avg_y += mc_running_avg_y_stride;
- running_avg_y += running_avg_y_stride;
- }
- {
- // Update the sum of all pixel differences of this MB.
- x = vqadd_s64(vget_high_s64(v_sum_diff_total),
- vget_low_s64(v_sum_diff_total));
- sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
-
- if (sum_diff > sum_diff_thresh) {
- return COPY_BLOCK;
- }
- }
- } else {
- return COPY_BLOCK;
- }
- }
- }
+ int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
+ vget_low_s64(v_sum_diff_total));
+ int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
+ sum_diff_thresh =
+ increase_denoising ? kSumDiffThresholdHigh : kSumDiffThreshold;
+ if (sum_diff > sum_diff_thresh)
+ return COPY_BLOCK;
// Tell above level that block was filtered.
running_avg_y -= running_avg_y_stride * 16;

Powered by Google App Engine
This is Rietveld 408576698