Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "webrtc/modules/audio_mixer/frame_combiner.h" | 11 #include "webrtc/modules/audio_mixer/frame_combiner.h" |
| 12 | 12 |
| 13 #include <algorithm> | 13 #include <algorithm> |
| 14 #include <array> | 14 #include <array> |
| 15 #include <functional> | 15 #include <functional> |
| 16 #include <memory> | 16 #include <memory> |
| 17 | 17 |
| 18 #include "webrtc/audio/utility/audio_frame_operations.h" | 18 #include "webrtc/audio/utility/audio_frame_operations.h" |
| 19 #include "webrtc/base/logging.h" | 19 #include "webrtc/base/logging.h" |
| 20 #include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" | 20 #include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" |
| 21 #include "webrtc/modules/audio_mixer/audio_mixer_impl.h" | 21 #include "webrtc/modules/audio_mixer/audio_mixer_impl.h" |
| 22 | 22 |
| 23 namespace webrtc { | 23 namespace webrtc { |
| 24 namespace { | 24 namespace { |
| 25 | 25 |
| 26 // Stereo, 48 kHz, 10 ms. | 26 // Stereo, 48 kHz, 10 ms. |
| 27 constexpr int kMaximalFrameSize = 2 * 48 * 10; | 27 constexpr int kMaximalFrameSize = 2 * 48 * 10; |
| 28 | 28 |
| 29 void CombineZeroFrames(AudioFrame* audio_frame_for_mixing) { | 29 void CombineZeroFrames(bool use_limiter, |
| 30 AudioProcessing* limiter, | |
| 31 AudioFrame* audio_frame_for_mixing) { | |
| 30 audio_frame_for_mixing->elapsed_time_ms_ = -1; | 32 audio_frame_for_mixing->elapsed_time_ms_ = -1; |
| 31 AudioFrameOperations::Mute(audio_frame_for_mixing); | 33 AudioFrameOperations::Mute(audio_frame_for_mixing); |
| 34 // The limiter should still process a zero frame to avoid jumps in | |
| 35 // its gain curve. | |
| 36 if (use_limiter) { | |
| 37 RTC_DCHECK(limiter); | |
| 38 // The limiter smoothly increases frames with half gain to full volume. | |
| 39 // Here it's no need to apply half gain, since frame is zero anyway. | |
|
ivoc
2017/03/27 15:44:55
Here there's no need...
... since the frame is zer
aleloi
2017/03/28 14:49:05
Done.
ivoc
2017/03/28 16:11:06
Is it?
| |
| 40 limiter->ProcessStream(audio_frame_for_mixing); | |
| 41 } | |
| 32 } | 42 } |
| 33 | 43 |
| 34 void CombineOneFrame(const AudioFrame* input_frame, | 44 void CombineOneFrame(const AudioFrame* input_frame, |
| 45 bool use_limiter, | |
| 46 AudioProcessing* limiter, | |
| 35 AudioFrame* audio_frame_for_mixing) { | 47 AudioFrame* audio_frame_for_mixing) { |
| 36 audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; | 48 audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; |
| 37 audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; | 49 audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; |
| 38 std::copy(input_frame->data_, | 50 std::copy(input_frame->data_, |
| 39 input_frame->data_ + | 51 input_frame->data_ + |
| 40 input_frame->num_channels_ * input_frame->samples_per_channel_, | 52 input_frame->num_channels_ * input_frame->samples_per_channel_, |
| 41 audio_frame_for_mixing->data_); | 53 audio_frame_for_mixing->data_); |
| 54 if (use_limiter) { | |
| 55 AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing); | |
| 56 RTC_DCHECK(limiter); | |
| 57 limiter->ProcessStream(audio_frame_for_mixing); | |
| 58 } | |
| 59 } | |
| 60 | |
| 61 // Lower-level helper function called from Combine(...) when there | |
| 62 // are several input frames. | |
| 63 // | |
| 64 // TODO(aleloi): change interface to ArrayView<int16_t> output_frame | |
| 65 // once we have gotten rid of the APM limiter. | |
| 66 // | |
| 67 // Only the 'data' field of output_frame should be modified. The | |
| 68 // rest are used for potentially sending the output to the APM | |
| 69 // limiter. | |
| 70 void CombineMultipleFrames( | |
| 71 const std::vector<rtc::ArrayView<const int16_t>>& input_frames, | |
| 72 bool use_limiter, | |
| 73 AudioProcessing* limiter, | |
| 74 AudioFrame* audio_frame_for_mixing) { | |
| 75 RTC_DCHECK(!input_frames.empty()); | |
| 76 RTC_DCHECK(audio_frame_for_mixing); | |
| 77 | |
| 78 const size_t frame_length = input_frames.front().size(); | |
| 79 for (const auto& frame : input_frames) { | |
| 80 RTC_DCHECK_EQ(frame_length, frame.size()); | |
| 81 } | |
| 82 | |
| 83 // Algorithm: int16 frames are added to a sufficiently large | |
| 84 // statically allocated int32 buffer. For > 2 participants this is | |
| 85 // more efficient than addition in place in the int16 audio | |
| 86 // frame. The audio quality loss due to halving the samples is | |
| 87 // smaller than 16-bit addition in place. | |
| 88 RTC_DCHECK_GE(kMaximalFrameSize, frame_length); | |
| 89 std::array<int32_t, kMaximalFrameSize> add_buffer; | |
| 90 | |
| 91 add_buffer.fill(0); | |
| 92 | |
| 93 for (const auto& frame : input_frames) { | |
| 94 std::transform(frame.begin(), frame.end(), add_buffer.begin(), | |
| 95 add_buffer.begin(), std::plus<int32_t>()); | |
| 96 } | |
| 97 | |
| 98 if (use_limiter) { | |
| 99 // Halve all samples to avoid saturation before limiting. | |
| 100 std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, | |
| 101 audio_frame_for_mixing->data_, [](int32_t a) { | |
| 102 return rtc::saturated_cast<int16_t>(a / 2); | |
| 103 }); | |
| 104 | |
| 105 // Smoothly limit the audio. | |
| 106 RTC_DCHECK(limiter); | |
| 107 const int error = limiter->ProcessStream(audio_frame_for_mixing); | |
| 108 if (error != limiter->kNoError) { | |
| 109 LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; | |
| 110 RTC_NOTREACHED(); | |
| 111 } | |
| 112 | |
| 113 // And now we can safely restore the level. This procedure results in | |
| 114 // some loss of resolution, deemed acceptable. | |
| 115 // | |
| 116 // It's possible to apply the gain in the AGC (with a target level of 0 dbFS | |
| 117 // and compression gain of 6 dB). However, in the transition frame when this | |
| 118 // is enabled (moving from one to two audio sources) it has the potential to | |
| 119 // create discontinuities in the mixed frame. | |
| 120 // | |
| 121 // Instead we double the frame (with addition since left-shifting a | |
| 122 // negative value is undefined). | |
| 123 AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); | |
| 124 } else { | |
| 125 std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, | |
| 126 audio_frame_for_mixing->data_, | |
| 127 [](int32_t a) { return rtc::saturated_cast<int16_t>(a); }); | |
| 128 } | |
| 42 } | 129 } |
| 43 | 130 |
| 44 std::unique_ptr<AudioProcessing> CreateLimiter() { | 131 std::unique_ptr<AudioProcessing> CreateLimiter() { |
| 45 Config config; | 132 Config config; |
| 46 config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); | 133 config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); |
| 47 std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config)); | 134 std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config)); |
| 48 RTC_DCHECK(limiter); | 135 RTC_DCHECK(limiter); |
| 49 | 136 |
| 50 const auto check_no_error = [](int x) { | 137 const auto check_no_error = [](int x) { |
| 51 RTC_DCHECK_EQ(x, AudioProcessing::kNoError); | 138 RTC_DCHECK_EQ(x, AudioProcessing::kNoError); |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 67 | 154 |
| 68 FrameCombiner::FrameCombiner(bool use_apm_limiter) | 155 FrameCombiner::FrameCombiner(bool use_apm_limiter) |
| 69 : use_apm_limiter_(use_apm_limiter), | 156 : use_apm_limiter_(use_apm_limiter), |
| 70 limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} | 157 limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} |
| 71 | 158 |
| 72 FrameCombiner::~FrameCombiner() = default; | 159 FrameCombiner::~FrameCombiner() = default; |
| 73 | 160 |
| 74 void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list, | 161 void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list, |
| 75 size_t number_of_channels, | 162 size_t number_of_channels, |
| 76 int sample_rate, | 163 int sample_rate, |
| 164 int number_of_streams, | |
| 77 AudioFrame* audio_frame_for_mixing) const { | 165 AudioFrame* audio_frame_for_mixing) const { |
| 78 RTC_DCHECK(audio_frame_for_mixing); | 166 RTC_DCHECK(audio_frame_for_mixing); |
| 79 const size_t samples_per_channel = static_cast<size_t>( | 167 const size_t samples_per_channel = static_cast<size_t>( |
| 80 (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); | 168 (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); |
| 81 | 169 |
| 82 for (const auto* frame : mix_list) { | 170 for (const auto* frame : mix_list) { |
| 83 RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); | 171 RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); |
| 84 RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); | 172 RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); |
| 85 } | 173 } |
| 86 | 174 |
| 87 // Frames could be both stereo and mono. | 175 // Frames could be both stereo and mono. |
| 88 for (auto* frame : mix_list) { | 176 for (auto* frame : mix_list) { |
| 89 RemixFrame(number_of_channels, frame); | 177 RemixFrame(number_of_channels, frame); |
| 90 } | 178 } |
| 91 | 179 |
| 92 // TODO(aleloi): Issue bugs.webrtc.org/3390. | 180 // TODO(aleloi): Issue bugs.webrtc.org/3390. |
| 93 // Audio frame timestamp. The 'timestamp_' field is set to dummy | 181 // Audio frame timestamp. The 'timestamp_' field is set to dummy |
| 94 // value '0', because it is only supported in the one channel case and | 182 // value '0', because it is only supported in the one channel case and |
| 95 // is then updated in the helper functions. | 183 // is then updated in the helper functions. |
| 96 audio_frame_for_mixing->UpdateFrame( | 184 audio_frame_for_mixing->UpdateFrame( |
| 97 -1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, | 185 -1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, |
| 98 AudioFrame::kVadUnknown, number_of_channels); | 186 AudioFrame::kVadUnknown, number_of_channels); |
| 99 | 187 |
| 188 const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1; | |
| 189 | |
| 100 if (mix_list.empty()) { | 190 if (mix_list.empty()) { |
| 101 CombineZeroFrames(audio_frame_for_mixing); | 191 CombineZeroFrames(use_limiter_this_round, limiter_.get(), |
| 192 audio_frame_for_mixing); | |
| 102 } else if (mix_list.size() == 1) { | 193 } else if (mix_list.size() == 1) { |
| 103 CombineOneFrame(mix_list.front(), audio_frame_for_mixing); | 194 CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(), |
| 195 audio_frame_for_mixing); | |
| 104 } else { | 196 } else { |
| 105 std::vector<rtc::ArrayView<const int16_t>> input_frames; | 197 std::vector<rtc::ArrayView<const int16_t>> input_frames; |
| 106 for (size_t i = 0; i < mix_list.size(); ++i) { | 198 for (size_t i = 0; i < mix_list.size(); ++i) { |
| 107 input_frames.push_back(rtc::ArrayView<const int16_t>( | 199 input_frames.push_back(rtc::ArrayView<const int16_t>( |
| 108 mix_list[i]->data_, samples_per_channel * number_of_channels)); | 200 mix_list[i]->data_, samples_per_channel * number_of_channels)); |
| 109 } | 201 } |
| 110 CombineMultipleFrames(input_frames, audio_frame_for_mixing); | 202 CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(), |
| 111 } | 203 audio_frame_for_mixing); |
| 112 } | |
| 113 | |
| 114 void FrameCombiner::CombineMultipleFrames( | |
| 115 const std::vector<rtc::ArrayView<const int16_t>>& input_frames, | |
| 116 AudioFrame* audio_frame_for_mixing) const { | |
| 117 RTC_DCHECK(!input_frames.empty()); | |
| 118 RTC_DCHECK(audio_frame_for_mixing); | |
| 119 | |
| 120 const size_t frame_length = input_frames.front().size(); | |
| 121 for (const auto& frame : input_frames) { | |
| 122 RTC_DCHECK_EQ(frame_length, frame.size()); | |
| 123 } | |
| 124 | |
| 125 // Algorithm: int16 frames are added to a sufficiently large | |
| 126 // statically allocated int32 buffer. For > 2 participants this is | |
| 127 // more efficient than addition in place in the int16 audio | |
| 128 // frame. The audio quality loss due to halving the samples is | |
| 129 // smaller than 16-bit addition in place. | |
| 130 RTC_DCHECK_GE(kMaximalFrameSize, frame_length); | |
| 131 std::array<int32_t, kMaximalFrameSize> add_buffer; | |
| 132 | |
| 133 add_buffer.fill(0); | |
| 134 | |
| 135 for (const auto& frame : input_frames) { | |
| 136 std::transform(frame.begin(), frame.end(), add_buffer.begin(), | |
| 137 add_buffer.begin(), std::plus<int32_t>()); | |
| 138 } | |
| 139 | |
| 140 if (use_apm_limiter_) { | |
| 141 // Halve all samples to avoid saturation before limiting. | |
| 142 std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, | |
| 143 audio_frame_for_mixing->data_, [](int32_t a) { | |
| 144 return rtc::saturated_cast<int16_t>(a / 2); | |
| 145 }); | |
| 146 | |
| 147 // Smoothly limit the audio. | |
| 148 RTC_DCHECK(limiter_); | |
| 149 const int error = limiter_->ProcessStream(audio_frame_for_mixing); | |
| 150 if (error != limiter_->kNoError) { | |
| 151 LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; | |
| 152 RTC_NOTREACHED(); | |
| 153 } | |
| 154 | |
| 155 // And now we can safely restore the level. This procedure results in | |
| 156 // some loss of resolution, deemed acceptable. | |
| 157 // | |
| 158 // It's possible to apply the gain in the AGC (with a target level of 0 dbFS | |
| 159 // and compression gain of 6 dB). However, in the transition frame when this | |
| 160 // is enabled (moving from one to two audio sources) it has the potential to | |
| 161 // create discontinuities in the mixed frame. | |
| 162 // | |
| 163 // Instead we double the frame (with addition since left-shifting a | |
| 164 // negative value is undefined). | |
| 165 AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); | |
| 166 } else { | |
| 167 std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, | |
| 168 audio_frame_for_mixing->data_, | |
| 169 [](int32_t a) { return rtc::saturated_cast<int16_t>(a); }); | |
| 170 } | 204 } |
| 171 } | 205 } |
| 172 } // namespace webrtc | 206 } // namespace webrtc |
| OLD | NEW |