OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/residual_echo_detector.h" | 11 #include "webrtc/modules/audio_processing/residual_echo_detector.h" |
12 | 12 |
13 #include <algorithm> | 13 #include <algorithm> |
14 #include <numeric> | 14 #include <numeric> |
15 | 15 |
16 #include "webrtc/modules/audio_processing/audio_buffer.h" | 16 #include "webrtc/modules/audio_processing/audio_buffer.h" |
17 #include "webrtc/system_wrappers/include/metrics.h" | 17 #include "webrtc/system_wrappers/include/metrics.h" |
18 | 18 |
19 namespace { | 19 namespace { |
20 | 20 |
21 float Power(rtc::ArrayView<const float> input) { | 21 float Power(rtc::ArrayView<const float> input) { |
22 return std::inner_product(input.begin(), input.end(), input.begin(), 0.f); | 22 return std::inner_product(input.begin(), input.end(), input.begin(), 0.f); |
23 } | 23 } |
24 | 24 |
25 constexpr size_t kLookbackFrames = 650; | 25 constexpr size_t kLookbackFrames = 650; |
26 // TODO(ivoc): Verify the size of this buffer. | 26 // TODO(ivoc): Verify the size of this buffer. |
27 constexpr size_t kRenderBufferSize = 30; | 27 constexpr size_t kRenderBufferSize = 30; |
28 constexpr float kAlpha = 0.001f; | 28 constexpr float kAlpha = 0.001f; |
| 29 // 10 seconds of data, updated every 10 ms. |
| 30 constexpr size_t kAggregationBufferSize = 10 * 100; |
29 | 31 |
30 } // namespace | 32 } // namespace |
31 | 33 |
32 namespace webrtc { | 34 namespace webrtc { |
33 | 35 |
34 ResidualEchoDetector::ResidualEchoDetector() | 36 ResidualEchoDetector::ResidualEchoDetector() |
35 : render_buffer_(kRenderBufferSize), | 37 : render_buffer_(kRenderBufferSize), |
36 render_power_(kLookbackFrames), | 38 render_power_(kLookbackFrames), |
37 render_power_mean_(kLookbackFrames), | 39 render_power_mean_(kLookbackFrames), |
38 render_power_std_dev_(kLookbackFrames), | 40 render_power_std_dev_(kLookbackFrames), |
39 covariances_(kLookbackFrames){}; | 41 covariances_(kLookbackFrames), |
| 42 recent_likelihood_max_(kAggregationBufferSize) {} |
40 | 43 |
41 ResidualEchoDetector::~ResidualEchoDetector() = default; | 44 ResidualEchoDetector::~ResidualEchoDetector() = default; |
42 | 45 |
43 void ResidualEchoDetector::AnalyzeRenderAudio( | 46 void ResidualEchoDetector::AnalyzeRenderAudio( |
44 rtc::ArrayView<const float> render_audio) { | 47 rtc::ArrayView<const float> render_audio) { |
45 if (render_buffer_.Size() == 0) { | 48 if (render_buffer_.Size() == 0) { |
46 frames_since_zero_buffer_size_ = 0; | 49 frames_since_zero_buffer_size_ = 0; |
47 } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { | 50 } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { |
48 // This can happen in a few cases: at the start of a call, due to a glitch | 51 // This can happen in a few cases: at the start of a call, due to a glitch |
49 // or due to clock drift. The excess capture value will be ignored. | 52 // or due to clock drift. The excess capture value will be ignored. |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
100 render_power_std_dev_[read_index]); | 103 render_power_std_dev_[read_index]); |
101 echo_likelihood_ = std::max( | 104 echo_likelihood_ = std::max( |
102 echo_likelihood_, covariances_[delay].normalized_cross_correlation()); | 105 echo_likelihood_, covariances_[delay].normalized_cross_correlation()); |
103 } | 106 } |
104 reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; | 107 reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; |
105 echo_likelihood_ *= reliability_; | 108 echo_likelihood_ *= reliability_; |
106 int echo_percentage = static_cast<int>(echo_likelihood_ * 100); | 109 int echo_percentage = static_cast<int>(echo_likelihood_ * 100); |
107 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", | 110 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", |
108 echo_percentage, 0, 100, 100 /* number of bins */); | 111 echo_percentage, 0, 100, 100 /* number of bins */); |
109 | 112 |
| 113 // Update the buffer of recent likelihood values. |
| 114 recent_likelihood_max_.Update(echo_likelihood_); |
| 115 |
110 // Update the next insertion index. | 116 // Update the next insertion index. |
111 ++next_insertion_index_; | 117 ++next_insertion_index_; |
112 next_insertion_index_ %= kLookbackFrames; | 118 next_insertion_index_ %= kLookbackFrames; |
113 } | 119 } |
114 | 120 |
115 void ResidualEchoDetector::Initialize() { | 121 void ResidualEchoDetector::Initialize() { |
116 render_buffer_.Clear(); | 122 render_buffer_.Clear(); |
117 std::fill(render_power_.begin(), render_power_.end(), 0.f); | 123 std::fill(render_power_.begin(), render_power_.end(), 0.f); |
118 std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); | 124 std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); |
119 std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); | 125 std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); |
120 render_statistics_.Clear(); | 126 render_statistics_.Clear(); |
121 capture_statistics_.Clear(); | 127 capture_statistics_.Clear(); |
| 128 recent_likelihood_max_.Clear(); |
122 for (auto& cov : covariances_) { | 129 for (auto& cov : covariances_) { |
123 cov.Clear(); | 130 cov.Clear(); |
124 } | 131 } |
125 echo_likelihood_ = 0.f; | 132 echo_likelihood_ = 0.f; |
126 next_insertion_index_ = 0; | 133 next_insertion_index_ = 0; |
127 reliability_ = 0.f; | 134 reliability_ = 0.f; |
128 } | 135 } |
129 | 136 |
130 void ResidualEchoDetector::PackRenderAudioBuffer( | 137 void ResidualEchoDetector::PackRenderAudioBuffer( |
131 AudioBuffer* audio, | 138 AudioBuffer* audio, |
132 std::vector<float>* packed_buffer) { | 139 std::vector<float>* packed_buffer) { |
133 RTC_DCHECK_GE(160, audio->num_frames_per_band()); | 140 RTC_DCHECK_GE(160, audio->num_frames_per_band()); |
134 | 141 |
135 packed_buffer->clear(); | 142 packed_buffer->clear(); |
136 packed_buffer->insert(packed_buffer->end(), | 143 packed_buffer->insert(packed_buffer->end(), |
137 audio->split_bands_const_f(0)[kBand0To8kHz], | 144 audio->split_bands_const_f(0)[kBand0To8kHz], |
138 (audio->split_bands_const_f(0)[kBand0To8kHz] + | 145 (audio->split_bands_const_f(0)[kBand0To8kHz] + |
139 audio->num_frames_per_band())); | 146 audio->num_frames_per_band())); |
140 } | 147 } |
141 | 148 |
142 } // namespace webrtc | 149 } // namespace webrtc |
OLD | NEW |