webrtc/modules/audio_coding/codecs/opus/opus_interface.c - Issue 1415173005: Prevent Opus DTX from generating intermittent noise during silence

Side by Side Diff: webrtc/modules/audio_coding/codecs/opus/opus_interface.c

Issue 1415173005: Prevent Opus DTX from generating intermittent noise during silence (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: new memory treatment and a test Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_coding/codecs/opus/opus_inst.h ('K') | « webrtc/modules/audio_coding/codecs/opus/opus_inst.h ('k') | webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc » ('j') | webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 11 matching lines...) Expand all Loading...
22 * side, we must allow for packets of that size. NetEq is currently limited	22 * side, we must allow for packets of that size. NetEq is currently limited

23 * to 60 ms on the receive side. */	23 * to 60 ms on the receive side. */

24 kWebRtcOpusMaxDecodeFrameSizeMs = 120,	24 kWebRtcOpusMaxDecodeFrameSizeMs = 120,

25	25

26 /* Maximum sample count per channel is 48 kHz * maximum frame size in	26 /* Maximum sample count per channel is 48 kHz * maximum frame size in

27 * milliseconds. */	27 * milliseconds. */

28 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,	28 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,

29	29

30 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */	30 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */

31 kWebRtcOpusDefaultFrameSize = 960,	31 kWebRtcOpusDefaultFrameSize = 960,

	32

	33 // Maximum number of consecutive zeros, beyond or equal to which DTX can fail.

	34 kZeroBreakCount = 157,

32 };	35 };

33	36

34 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,	37 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,

35 int32_t channels,	38 int32_t channels,

36 int32_t application) {	39 int32_t application) {

37 OpusEncInst* state;	40 if (inst == NULL)
	kwiberg-webrtc 2015/10/29 16:35:21 if (!inst) if (!inst) minyue-webrtc 2015/10/30 14:06:46 Done. Show quoted text On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > if (!inst) Done.
38 if (inst != NULL) {	41 return -1;

39 state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));

40 if (state) {

41 int opus_app;

42 switch (application) {

43 case 0: {

44 opus_app = OPUS_APPLICATION_VOIP;

45 break;

46 }

47 case 1: {

48 opus_app = OPUS_APPLICATION_AUDIO;

49 break;

50 }

51 default: {

52 free(state);

53 return -1;

54 }

55 }

56	42

57 int error;	43 OpusEncInst* state = (OpusEncInst*)calloc(1, sizeof(OpusEncInst));
	kwiberg-webrtc 2015/10/29 16:35:21 You don't need to cast. C allows implicit conversi You don't need to cast. C allows implicit conversion from void* to any pointer type. minyue-webrtc 2015/10/30 14:06:46 Done. Show quoted text On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > You don't need to cast. C allows implicit conversion from void* to any pointer > type. Done.
58 state->encoder = opus_encoder_create(48000, channels, opus_app,	44 if (!state)

59 &error);	45 return -1;

60 state->in_dtx_mode = 0;	46

61 if (error == OPUS_OK && state->encoder != NULL) {	47 // Allocate zero counters.

62 *inst = state;	48 state->zero_counts = (size_t*)calloc(channels, sizeof(size_t));
	kwiberg-webrtc 2015/10/29 16:35:21 No need to cast. No need to cast. minyue-webrtc 2015/10/30 14:06:46 Done. Show quoted text On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > No need to cast. Done.
63 return 0;	49 if (!state->zero_counts) {

64 }	50 free(state);

65 free(state);	51 return -1;
	kwiberg-webrtc 2015/10/29 16:35:21 Don't try to handle malloc failures. Just assert i Don't try to handle malloc failures. Just assert if you feel you have to do anything. minyue-webrtc 2015/10/30 14:06:46 Done. Show quoted text On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > Don't try to handle malloc failures. Just assert if you feel you have to do > anything. Done.
	52 }

	53

	54 int opus_app;

	55 switch (application) {

	56 case 0: {

	57 opus_app = OPUS_APPLICATION_VOIP;

	58 break;

	59 }

	60 case 1: {

	61 opus_app = OPUS_APPLICATION_AUDIO;

	62 break;

	63 }

	64 default: {

	65 WebRtcOpus_EncoderFree(state);
	minyue-webrtc 2015/10/30 14:06:46 I found switch() {} can be placed before state get I found switch() {} can be placed before state gets allocated, to avoid line 65.
	66 return -1;

66 }	67 }

67 }	68 }

68 return -1;	69

	70 int error;

	71 state->encoder = opus_encoder_create(48000, channels, opus_app,

	72 &error);

	73 if (error != OPUS_OK \|\| state->encoder == NULL) {
	kwiberg-webrtc 2015/10/29 16:35:21 !state->encoder !state->encoder minyue-webrtc 2015/10/30 14:06:46 Done. Show quoted text On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > !state->encoder Done.
	74 WebRtcOpus_EncoderFree(state);

	75 return -1;

	76 }

	77

	78 state->in_dtx_mode = 0;

	79 state->channels = channels;

	80

	81 *inst = state;

	82 return 0;

69 }	83 }

70	84

71 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {	85 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {

72 if (inst) {	86 if (inst) {

73 opus_encoder_destroy(inst->encoder);	87 opus_encoder_destroy(inst->encoder);

	88 free(inst->zero_counts);

74 free(inst);	89 free(inst);

75 return 0;	90 return 0;

76 } else {	91 } else {

77 return -1;	92 return -1;

78 }	93 }

79 }	94 }

80	95

81 int WebRtcOpus_Encode(OpusEncInst* inst,	96 int WebRtcOpus_Encode(OpusEncInst* inst,

82 const int16_t* audio_in,	97 const int16_t* audio_in,

83 size_t samples,	98 size_t samples,

84 size_t length_encoded_buffer,	99 size_t length_encoded_buffer,

85 uint8_t* encoded) {	100 uint8_t* encoded) {

86 int res;	101 int res;

	102 int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];

87	103

88 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {	104 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {

89 return -1;	105 return -1;

90 }	106 }

91	107

	108 const int channels = inst->channels;

	109 int16_t* pointer = buffer;

	110 // Break long consecutive zeros by forcing a "1" every \|kZeroBreakCount\|

	111 // samples.

	112 memcpy(pointer, audio_in, samples * channels * sizeof(int16_t));

	113 if (inst->in_dtx_mode) {

	114 for (size_t i = 0; i < samples; ++i) {

	115 for (int c = 0; c < channels; ++c, ++pointer) {

	116 if (*pointer == 0) {

	117 ++inst->zero_counts[c];

	118 if (inst->zero_counts[c] == kZeroBreakCount) {

	119 *pointer = 1;

	120 inst->zero_counts[c] = 0;

	121 }

	122 } else {

	123 inst->zero_counts[c] = 0;

	124 }

	125 }

	126 }

	127 }
	kwiberg-webrtc 2015/10/29 16:35:21 It would've been easier to read this loop if you'd It would've been easier to read this loop if you'd used buffer[i * channels + c] instead of pointer. Also, you pay the cost of copying whether you need it or not. If long runs of zeros aren't common, this can get expensive. You could copy on demand instead. Before the loop, do const int16_t input = audio_in; int16_t* writable_input = NULL; Then read from input[i * channels + c], and at the one place where you write, do if (!writable_input) { memcpy(buffer, audio_in, samples * channels * sizeof(int16_t)); input = writable_input = buffer; } writable_input[i * channels + c] = 1; minyue-webrtc 2015/10/30 14:06:46 Yes, I agree that "input[i * channels + c]" and I Show quoted text On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > It would've been easier to read this loop if you'd used buffer[i * > channels + c] instead of pointer. > > Also, you pay the cost of copying whether you need it or not. If long > runs of zeros aren't common, this can get expensive. You could copy on > demand instead. Before the loop, do > > const int16_t input = audio_in; > int16_t* writable_input = NULL; > > Then read from input[i * channels + c], and at the one place where you > write, do > > if (!writable_input) { > memcpy(buffer, audio_in, samples * channels * sizeof(int16_t)); > input = writable_input = buffer; > } > writable_input[i * channels + c] = 1; Yes, I agree that "input[i * channels + c]" and I have changed it. But will the compiler do "i * channel + c" as efficient as "++"? Your idea of avoiding memcpy is nice and I have made a change. It is slightly different from yours, just for maybe a little bit better readability. kwiberg-webrtc 2015/11/01 02:01:55 Yes. I tried these two: void f1(int a, int b) { Show quoted text On 2015/10/30 14:06:46, minyue-webrtc wrote: > On 2015/10/29 16:35:21, kwiberg-webrtc wrote: > > It would've been easier to read this loop if you'd used buffer[i * > > channels + c] instead of pointer. > > > > Also, you pay the cost of copying whether you need it or not. If long > > runs of zeros aren't common, this can get expensive. You could copy on > > demand instead. Before the loop, do > > > > const int16_t input = audio_in; > > int16_t* writable_input = NULL; > > > > Then read from input[i * channels + c], and at the one place where you > > write, do > > > > if (!writable_input) { > > memcpy(buffer, audio_in, samples * channels * sizeof(int16_t)); > > input = writable_input = buffer; > > } > > writable_input[i * channels + c] = 1; > > Yes, I agree that "input[i * channels + c]" and I have changed it. But will the > compiler do "i * channel + c" as efficient as "++"? Yes. I tried these two: void f1(int a, int b) { for (int i = 0; i < a; ++i) { for (int j = 0; j < b; ++j) { g(i * b + j); h(i * b + j); } } } void f2(int a, int b) { int ij = 0; for (int i = 0; i < a; ++i) { for (int j = 0; j < b; ++j, ++ij) { g(ij); h(ij); } } } clang -O2 didn't generate identical code for them, but close enough as to make no difference. Modern compilers know to make this optimization on their own.
	128

92 res = opus_encode(inst->encoder,	129 res = opus_encode(inst->encoder,

93 (const opus_int16*)audio_in,	130 buffer,

94 (int)samples,	131 (int)samples,

95 encoded,	132 encoded,

96 (opus_int32)length_encoded_buffer);	133 (opus_int32)length_encoded_buffer);

97	134

98 if (res == 1) {	135 if (res == 1) {

99 // Indicates DTX since the packet has nothing but a header. In principle,	136 // Indicates DTX since the packet has nothing but a header. In principle,

100 // there is no need to send this packet. However, we do transmit the first	137 // there is no need to send this packet. However, we do transmit the first

101 // occurrence to let the decoder know that the encoder enters DTX mode.	138 // occurrence to let the decoder know that the encoder enters DTX mode.

102 if (inst->in_dtx_mode) {	139 if (inst->in_dtx_mode) {

103 return 0;	140 return 0;

(...skipping 348 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
452 return 0;	489 return 0;

453 }	490 }

454	491

455 for (n = 0; n < channels; n++) {	492 for (n = 0; n < channels; n++) {

456 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))	493 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))

457 return 1;	494 return 1;

458 }	495 }

459	496

460 return 0;	497 return 0;

461 }	498 }

OLD	NEW