| Index: webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
|
| diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
|
| index 614b6c94859b58546e100657f60c8f98e2bb1426..0545a97398ec656956b418d081048957c98b3bb7 100644
|
| --- a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
|
| +++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
|
| @@ -9,7 +9,6 @@
|
| */
|
|
|
| #include <emmintrin.h>
|
| -
|
| #include "webrtc/modules/video_processing/util/denoiser_filter_sse2.h"
|
|
|
| namespace webrtc {
|
| @@ -110,18 +109,6 @@ void DenoiserFilterSSE2::CopyMem16x16(const uint8_t* src,
|
| }
|
| }
|
|
|
| -// TODO(jackychen): Optimize this function using SSE2.
|
| -void DenoiserFilterSSE2::CopyMem8x8(const uint8_t* src,
|
| - int src_stride,
|
| - uint8_t* dst,
|
| - int dst_stride) {
|
| - for (int i = 0; i < 8; i++) {
|
| - memcpy(dst, src, 8);
|
| - src += src_stride;
|
| - dst += dst_stride;
|
| - }
|
| -}
|
| -
|
| uint32_t DenoiserFilterSSE2::Variance16x8(const uint8_t* src,
|
| int src_stride,
|
| const uint8_t* ref,
|
| @@ -139,8 +126,8 @@ DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y,
|
| const uint8_t* sig,
|
| int sig_stride,
|
| uint8_t motion_magnitude,
|
| - int increase_denoising,
|
| - bool denoise_always) {
|
| + int increase_denoising) {
|
| + DenoiserDecision decision = FILTER_BLOCK;
|
| unsigned int sum_diff_thresh = 0;
|
| int shift_inc =
|
| (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) ? 1
|
| @@ -210,76 +197,13 @@ DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y,
|
| running_avg_y += avg_y_stride;
|
| }
|
|
|
| - {
|
| - // Compute the sum of all pixel differences of this MB.
|
| - unsigned int abs_sum_diff = AbsSumDiff16x1(acc_diff);
|
| - if (denoise_always)
|
| - sum_diff_thresh = INT_MAX;
|
| - else if (increase_denoising)
|
| - sum_diff_thresh = kSumDiffThresholdHigh;
|
| - else
|
| - sum_diff_thresh = kSumDiffThreshold;
|
| - if (abs_sum_diff > sum_diff_thresh) {
|
| - // Before returning to copy the block (i.e., apply no denoising),
|
| - // check if we can still apply some (weaker) temporal filtering to
|
| - // this block, that would otherwise not be denoised at all. Simplest
|
| - // is to apply an additional adjustment to running_avg_y to bring it
|
| - // closer to sig. The adjustment is capped by a maximum delta, and
|
| - // chosen such that in most cases the resulting sum_diff will be
|
| - // within the acceptable range given by sum_diff_thresh.
|
| -
|
| - // The delta is set by the excess of absolute pixel diff over the
|
| - // threshold.
|
| - int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
|
| - // Only apply the adjustment for max delta up to 3.
|
| - if (delta < 4) {
|
| - const __m128i k_delta = _mm_set1_epi8(delta);
|
| - sig -= sig_stride * 16;
|
| - mc_running_avg_y -= mc_avg_y_stride * 16;
|
| - running_avg_y -= avg_y_stride * 16;
|
| - for (int r = 0; r < 16; ++r) {
|
| - __m128i v_running_avg_y =
|
| - _mm_loadu_si128(reinterpret_cast<__m128i*>(&running_avg_y[0]));
|
| - // Calculate differences.
|
| - const __m128i v_sig =
|
| - _mm_loadu_si128(reinterpret_cast<const __m128i*>(&sig[0]));
|
| - const __m128i v_mc_running_avg_y =
|
| - _mm_loadu_si128(reinterpret_cast<__m128i*>(&mc_running_avg_y[0]));
|
| - const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
|
| - const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
|
| - // Obtain the sign. FF if diff is negative.
|
| - const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
|
| - // Clamp absolute difference to delta to get the adjustment.
|
| - const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
|
| - // Restore the sign and get positive and negative adjustments.
|
| - __m128i padj, nadj;
|
| - padj = _mm_andnot_si128(diff_sign, adj);
|
| - nadj = _mm_and_si128(diff_sign, adj);
|
| - // Calculate filtered value.
|
| - v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj);
|
| - v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj);
|
| - _mm_storeu_si128(reinterpret_cast<__m128i*>(running_avg_y),
|
| - v_running_avg_y);
|
| -
|
| - // Accumulate the adjustments.
|
| - acc_diff = _mm_subs_epi8(acc_diff, padj);
|
| - acc_diff = _mm_adds_epi8(acc_diff, nadj);
|
| -
|
| - // Update pointers for next iteration.
|
| - sig += sig_stride;
|
| - mc_running_avg_y += mc_avg_y_stride;
|
| - running_avg_y += avg_y_stride;
|
| - }
|
| - abs_sum_diff = AbsSumDiff16x1(acc_diff);
|
| - if (abs_sum_diff > sum_diff_thresh) {
|
| - return COPY_BLOCK;
|
| - }
|
| - } else {
|
| - return COPY_BLOCK;
|
| - }
|
| - }
|
| - }
|
| - return FILTER_BLOCK;
|
| + // Compute the sum of all pixel differences of this MB.
|
| + unsigned int abs_sum_diff = AbsSumDiff16x1(acc_diff);
|
| + sum_diff_thresh =
|
| + increase_denoising ? kSumDiffThresholdHigh : kSumDiffThreshold;
|
| + if (abs_sum_diff > sum_diff_thresh)
|
| + decision = COPY_BLOCK;
|
| + return decision;
|
| }
|
|
|
| } // namespace webrtc
|
|
|