Index: webrtc/modules/audio_coding/codecs/isac/unittest.cc |
diff --git a/webrtc/modules/audio_coding/codecs/isac/unittest.cc b/webrtc/modules/audio_coding/codecs/isac/unittest.cc |
index a80fd08bcfcb33b8d8eaaa88d6d609386802ed06..d05ffa6e48f646767d709052ef4a2ea9dda3eee5 100644 |
--- a/webrtc/modules/audio_coding/codecs/isac/unittest.cc |
+++ b/webrtc/modules/audio_coding/codecs/isac/unittest.cc |
@@ -24,10 +24,11 @@ namespace webrtc { |
namespace { |
+const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz |
+ |
std::vector<int16_t> LoadSpeechData() { |
webrtc::test::InputAudioFile input_file( |
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm")); |
- static const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz |
std::vector<int16_t> speech_data(kIsacNumberOfSamples); |
input_file.Read(kIsacNumberOfSamples, speech_data.data()); |
return speech_data; |
@@ -41,32 +42,45 @@ IsacBandwidthInfo GetBwInfo(typename T::instance_type* inst) { |
return bi; |
} |
+// Encodes one packet. Returns the packet duration in milliseconds. |
template <typename T> |
-rtc::Buffer EncodePacket(typename T::instance_type* inst, |
- const IsacBandwidthInfo* bi, |
- const int16_t* speech_data, |
- int framesize_ms) { |
- rtc::Buffer output(1000); |
- for (int i = 0;; ++i) { |
+int EncodePacket(typename T::instance_type* inst, |
+ const IsacBandwidthInfo* bi, |
+ const int16_t* speech_data, |
+ rtc::Buffer* output) { |
+ output->SetSize(1000); |
+ for (int duration_ms = 10;; duration_ms += 10) { |
if (bi) |
T::SetBandwidthInfo(inst, bi); |
- int encoded_bytes = T::Encode(inst, speech_data, output.data()); |
- if (i + 1 == framesize_ms / 10) { |
+ int encoded_bytes = T::Encode(inst, speech_data, output->data()); |
+ if (encoded_bytes > 0 || duration_ms >= 60) { |
EXPECT_GT(encoded_bytes, 0); |
- EXPECT_LE(static_cast<size_t>(encoded_bytes), output.size()); |
- output.SetSize(encoded_bytes); |
- return output; |
+ EXPECT_LE(static_cast<size_t>(encoded_bytes), output->size()); |
+ output->SetSize(encoded_bytes); |
+ return duration_ms; |
} |
- EXPECT_EQ(0, encoded_bytes); |
} |
} |
+template <typename T> |
+std::vector<int16_t> DecodePacket(typename T::instance_type* inst, |
+ const rtc::Buffer& encoded) { |
+ std::vector<int16_t> decoded(kIsacNumberOfSamples); |
+ int16_t speech_type; |
+ int nsamples = T::DecodeInternal(inst, encoded.data(), encoded.size(), |
+ &decoded.front(), &speech_type); |
+ EXPECT_GT(nsamples, 0); |
+ EXPECT_LE(static_cast<size_t>(nsamples), decoded.size()); |
+ decoded.resize(nsamples); |
+ return decoded; |
+} |
+ |
class BoundedCapacityChannel final { |
public: |
- BoundedCapacityChannel(int rate_bits_per_second) |
+ BoundedCapacityChannel(int sample_rate_hz, int rate_bits_per_second) |
: current_time_rtp_(0), |
channel_rate_bytes_per_sample_(rate_bits_per_second / |
- (8.0 * kSamplesPerSecond)) {} |
+ (8.0 * sample_rate_hz)) {} |
// Simulate sending the given number of bytes at the given RTP time. Returns |
// the new current RTP time after the sending is done. |
@@ -81,47 +95,6 @@ class BoundedCapacityChannel final { |
// The somewhat strange unit for channel rate, bytes per sample, is because |
// RTP time is measured in samples: |
const double channel_rate_bytes_per_sample_; |
- static const int kSamplesPerSecond = 16000; |
-}; |
- |
-template <typename T, bool adaptive> |
-struct TestParam {}; |
- |
-template <> |
-struct TestParam<IsacFloat, true> { |
- static const int time_to_settle = 200; |
- static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
- return rate_bits_per_second; |
- } |
-}; |
- |
-template <> |
-struct TestParam<IsacFix, true> { |
- static const int time_to_settle = 350; |
- static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
- // For some reason, IsacFix fails to adapt to the channel's actual |
- // bandwidth. Instead, it settles on a few hundred packets at 10kbit/s, |
- // then a few hundred at 5kbit/s, then a few hundred at 10kbit/s, and so |
- // on. The 200 packets starting at 350 are in the middle of the first |
- // 10kbit/s run. |
- return 10000; |
- } |
-}; |
- |
-template <> |
-struct TestParam<IsacFloat, false> { |
- static const int time_to_settle = 0; |
- static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
- return 32000; |
- } |
-}; |
- |
-template <> |
-struct TestParam<IsacFix, false> { |
- static const int time_to_settle = 0; |
- static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
- return 16000; |
- } |
}; |
// Test that the iSAC encoder produces identical output whether or not we use a |
@@ -129,143 +102,153 @@ struct TestParam<IsacFix, false> { |
// communicate BW estimation info explicitly. |
template <typename T, bool adaptive> |
void TestGetSetBandwidthInfo(const int16_t* speech_data, |
- int rate_bits_per_second) { |
- using Param = TestParam<T, adaptive>; |
- const int framesize_ms = adaptive ? 60 : 30; |
+ int rate_bits_per_second, |
+ int sample_rate_hz, |
+ int frame_size_ms) { |
+ const int bit_rate = 32000; |
// Conjoined encoder/decoder pair: |
typename T::instance_type* encdec; |
ASSERT_EQ(0, T::Create(&encdec)); |
ASSERT_EQ(0, T::EncoderInit(encdec, adaptive ? 0 : 1)); |
ASSERT_EQ(0, T::DecoderInit(encdec)); |
+ ASSERT_EQ(0, T::SetEncSampRate(encdec, sample_rate_hz)); |
+ if (adaptive) |
+ ASSERT_EQ(0, T::ControlBwe(encdec, bit_rate, frame_size_ms, false)); |
+ else |
+ ASSERT_EQ(0, T::Control(encdec, bit_rate, frame_size_ms)); |
// Disjoint encoder/decoder pair: |
typename T::instance_type* enc; |
ASSERT_EQ(0, T::Create(&enc)); |
ASSERT_EQ(0, T::EncoderInit(enc, adaptive ? 0 : 1)); |
+ ASSERT_EQ(0, T::SetEncSampRate(enc, sample_rate_hz)); |
+ if (adaptive) |
+ ASSERT_EQ(0, T::ControlBwe(enc, bit_rate, frame_size_ms, false)); |
+ else |
+ ASSERT_EQ(0, T::Control(enc, bit_rate, frame_size_ms)); |
typename T::instance_type* dec; |
ASSERT_EQ(0, T::Create(&dec)); |
ASSERT_EQ(0, T::DecoderInit(dec)); |
+ T::SetInitialBweBottleneck(dec, bit_rate); |
+ T::SetEncSampRateInDecoder(dec, sample_rate_hz); |
// 0. Get initial BW info from decoder. |
auto bi = GetBwInfo<T>(dec); |
- BoundedCapacityChannel channel1(rate_bits_per_second), |
- channel2(rate_bits_per_second); |
- std::vector<size_t> packet_sizes; |
- for (int i = 0; i < Param::time_to_settle + 200; ++i) { |
+ BoundedCapacityChannel channel1(sample_rate_hz, rate_bits_per_second), |
+ channel2(sample_rate_hz, rate_bits_per_second); |
+ |
+ int elapsed_time_ms = 0; |
+ for (int i = 0; elapsed_time_ms < 10000; ++i) { |
std::ostringstream ss; |
ss << " i = " << i; |
SCOPED_TRACE(ss.str()); |
- // 1. Encode 6 * 10 ms (adaptive) or 3 * 10 ms (nonadaptive). The separate |
- // encoder is given the BW info before each encode call. |
- auto bitstream1 = |
- EncodePacket<T>(encdec, nullptr, speech_data, framesize_ms); |
- auto bitstream2 = EncodePacket<T>(enc, &bi, speech_data, framesize_ms); |
+ // 1. Encode 3 * 10 ms or 6 * 10 ms. The separate encoder is given the BW |
+ // info before each encode call. |
+ rtc::Buffer bitstream1, bitstream2; |
+ int duration1_ms = |
+ EncodePacket<T>(encdec, nullptr, speech_data, &bitstream1); |
+ int duration2_ms = EncodePacket<T>(enc, &bi, speech_data, &bitstream2); |
+ EXPECT_EQ(duration1_ms, duration2_ms); |
+ if (adaptive) |
+ EXPECT_TRUE(duration1_ms == 30 || duration1_ms == 60); |
+ else |
+ EXPECT_EQ(frame_size_ms, duration1_ms); |
+ ASSERT_EQ(bitstream1.size(), bitstream2.size()); |
EXPECT_EQ(bitstream1, bitstream2); |
- if (i > Param::time_to_settle) |
- packet_sizes.push_back(bitstream1.size()); |
- |
- // 2. Deliver the encoded data to the decoders (but don't actually ask them |
- // to decode it; that's not necessary). Then get new BW info from the |
- // separate decoder. |
- const int samples_per_packet = 16 * framesize_ms; |
- const int send_time = i * samples_per_packet; |
+ |
+ // 2. Deliver the encoded data to the decoders. |
+ const int send_time = elapsed_time_ms * (sample_rate_hz / 1000); |
EXPECT_EQ(0, T::UpdateBwEstimate( |
encdec, bitstream1.data(), bitstream1.size(), i, send_time, |
channel1.Send(send_time, bitstream1.size()))); |
EXPECT_EQ(0, T::UpdateBwEstimate( |
dec, bitstream2.data(), bitstream2.size(), i, send_time, |
channel2.Send(send_time, bitstream2.size()))); |
+ |
+ // 3. Decode, and get new BW info from the separate decoder. |
+ ASSERT_EQ(0, T::SetDecSampRate(encdec, sample_rate_hz)); |
+ ASSERT_EQ(0, T::SetDecSampRate(dec, sample_rate_hz)); |
+ auto decoded1 = DecodePacket<T>(encdec, bitstream1); |
+ auto decoded2 = DecodePacket<T>(dec, bitstream2); |
+ EXPECT_EQ(decoded1, decoded2); |
bi = GetBwInfo<T>(dec); |
+ |
+ elapsed_time_ms += duration1_ms; |
} |
EXPECT_EQ(0, T::Free(encdec)); |
EXPECT_EQ(0, T::Free(enc)); |
EXPECT_EQ(0, T::Free(dec)); |
- |
- // The average send bitrate is close to the channel's capacity. |
- double avg_size = |
- std::accumulate(packet_sizes.begin(), packet_sizes.end(), 0) / |
- static_cast<double>(packet_sizes.size()); |
- double avg_rate_bits_per_second = 8.0 * avg_size / (framesize_ms * 1e-3); |
- double expected_rate_bits_per_second = |
- Param::ExpectedRateBitsPerSecond(rate_bits_per_second); |
- EXPECT_GT(avg_rate_bits_per_second / expected_rate_bits_per_second, 0.95); |
- EXPECT_LT(avg_rate_bits_per_second / expected_rate_bits_per_second, 1.06); |
- |
- // The largest packet isn't that large, and the smallest not that small. |
- size_t min_size = *std::min_element(packet_sizes.begin(), packet_sizes.end()); |
- size_t max_size = *std::max_element(packet_sizes.begin(), packet_sizes.end()); |
- double size_range = max_size - min_size; |
- EXPECT_LE(size_range / avg_size, 0.16); |
} |
-} // namespace |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat12kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 12000); |
-} |
+enum class IsacType { Fix, Float }; |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat15kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 15000); |
+std::ostream& operator<<(std::ostream& os, IsacType t) { |
+ os << (t == IsacType::Fix ? "fix" : "float"); |
+ return os; |
} |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat19kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 19000); |
-} |
+struct IsacTestParam { |
+ IsacType isac_type; |
+ bool adaptive; |
+ int channel_rate_bits_per_second; |
+ int sample_rate_hz; |
+ int frame_size_ms; |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat22kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 22000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix12kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 12000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix15kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 15000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix19kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 19000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix22kAdaptive) { |
- TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 22000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat12k) { |
- TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 12000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat15k) { |
- TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 15000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat19k) { |
- TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 19000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFloat22k) { |
- TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 22000); |
-} |
- |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix12k) { |
- TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 12000); |
-} |
+ friend std::ostream& operator<<(std::ostream& os, const IsacTestParam& itp) { |
+ os << '{' << itp.isac_type << ',' |
+ << (itp.adaptive ? "adaptive" : "nonadaptive") << ',' |
+ << itp.channel_rate_bits_per_second << ',' << itp.sample_rate_hz << ',' |
+ << itp.frame_size_ms << '}'; |
+ return os; |
+ } |
+}; |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix15k) { |
- TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 15000); |
-} |
+class IsacCommonTest : public testing::TestWithParam<IsacTestParam> {}; |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix19k) { |
- TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 19000); |
-} |
+} // namespace |
-TEST(IsacCommonTest, GetSetBandwidthInfoFix22k) { |
- TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 22000); |
-} |
+TEST_P(IsacCommonTest, GetSetBandwidthInfo) { |
+ auto p = GetParam(); |
+ auto test_fun = [p] { |
+ if (p.isac_type == IsacType::Fix) { |
+ if (p.adaptive) |
+ return TestGetSetBandwidthInfo<IsacFix, true>; |
+ else |
+ return TestGetSetBandwidthInfo<IsacFix, false>; |
+ } else { |
+ if (p.adaptive) |
+ return TestGetSetBandwidthInfo<IsacFloat, true>; |
+ else |
+ return TestGetSetBandwidthInfo<IsacFloat, false>; |
+ } |
+ }(); |
+ test_fun(LoadSpeechData().data(), p.channel_rate_bits_per_second, |
+ p.sample_rate_hz, p.frame_size_ms); |
+} |
+ |
+std::vector<IsacTestParam> TestCases() { |
+ static const IsacType types[] = {IsacType::Fix, IsacType::Float}; |
+ static const bool adaptives[] = {true, false}; |
+ static const int channel_rates[] = {12000, 15000, 19000, 22000}; |
+ static const int sample_rates[] = {16000, 32000}; |
+ static const int frame_sizes[] = {30, 60}; |
+ std::vector<IsacTestParam> cases; |
+ for (IsacType type : types) |
+ for (bool adaptive : adaptives) |
+ for (int channel_rate : channel_rates) |
+ for (int sample_rate : sample_rates) |
+ if (!(type == IsacType::Fix && sample_rate == 32000)) |
+ for (int frame_size : frame_sizes) |
+ if (!(sample_rate == 32000 && frame_size == 60)) |
+ cases.push_back( |
+ {type, adaptive, channel_rate, sample_rate, frame_size}); |
+ return cases; |
+} |
+ |
+INSTANTIATE_TEST_CASE_P(, IsacCommonTest, testing::ValuesIn(TestCases())); |
} // namespace webrtc |