OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ | 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ |
12 #define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ | 12 #define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ |
13 | 13 |
14 #include <algorithm> | 14 #include <algorithm> |
15 #include <vector> | 15 #include <vector> |
16 | 16 |
17 #include "webrtc/typedefs.h" | 17 #include "webrtc/typedefs.h" |
18 | 18 |
19 namespace webrtc { | 19 namespace webrtc { |
20 | 20 |
21 // This is the interface class for encoders in AudioCoding module. Each codec | 21 // This is the interface class for encoders in AudioCoding module. Each codec |
22 // type must have an implementation of this class. | 22 // type must have an implementation of this class. |
23 class AudioEncoder { | 23 class AudioEncoder { |
24 public: | 24 public: |
25 struct EncodedInfoLeaf { | 25 struct EncodedInfoLeaf { |
26 EncodedInfoLeaf() | 26 size_t encoded_bytes = 0; |
27 : encoded_bytes(0), | 27 uint32_t encoded_timestamp = 0; |
28 encoded_timestamp(0), | 28 int payload_type = 0; |
29 payload_type(0), | 29 bool send_even_if_empty = false; |
30 send_even_if_empty(false), | 30 bool speech = true; |
31 speech(true) {} | |
32 | |
33 size_t encoded_bytes; | |
34 uint32_t encoded_timestamp; | |
35 int payload_type; | |
36 bool send_even_if_empty; | |
37 bool speech; | |
38 }; | 31 }; |
39 | 32 |
40 // This is the main struct for auxiliary encoding information. Each encoded | 33 // This is the main struct for auxiliary encoding information. Each encoded |
41 // packet should be accompanied by one EncodedInfo struct, containing the | 34 // packet should be accompanied by one EncodedInfo struct, containing the |
42 // total number of |encoded_bytes|, the |encoded_timestamp| and the | 35 // total number of |encoded_bytes|, the |encoded_timestamp| and the |
43 // |payload_type|. If the packet contains redundant encodings, the |redundant| | 36 // |payload_type|. If the packet contains redundant encodings, the |redundant| |
44 // vector will be populated with EncodedInfoLeaf structs. Each struct in the | 37 // vector will be populated with EncodedInfoLeaf structs. Each struct in the |
45 // vector represents one encoding; the order of structs in the vector is the | 38 // vector represents one encoding; the order of structs in the vector is the |
46 // same as the order in which the actual payloads are written to the byte | 39 // same as the order in which the actual payloads are written to the byte |
47 // stream. When EncoderInfoLeaf structs are present in the vector, the main | 40 // stream. When EncoderInfoLeaf structs are present in the vector, the main |
48 // struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the | 41 // struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the |
49 // vector. | 42 // vector. |
50 struct EncodedInfo : public EncodedInfoLeaf { | 43 struct EncodedInfo : public EncodedInfoLeaf { |
51 EncodedInfo(); | 44 EncodedInfo(); |
52 ~EncodedInfo(); | 45 ~EncodedInfo(); |
53 | 46 |
54 std::vector<EncodedInfoLeaf> redundant; | 47 std::vector<EncodedInfoLeaf> redundant; |
55 }; | 48 }; |
56 | 49 |
57 virtual ~AudioEncoder() {} | 50 virtual ~AudioEncoder() = default; |
58 | 51 |
59 // Accepts one 10 ms block of input audio (i.e., sample_rate_hz() / 100 * | 52 // Returns the maximum number of bytes that can be produced by the encoder |
60 // num_channels() samples). Multi-channel audio must be sample-interleaved. | |
61 // The encoder produces zero or more bytes of output in |encoded| and | |
62 // returns additional encoding information. | |
63 // The caller is responsible for making sure that |max_encoded_bytes| is | |
64 // not smaller than the number of bytes actually produced by the encoder. | |
65 EncodedInfo Encode(uint32_t rtp_timestamp, | |
66 const int16_t* audio, | |
67 size_t num_samples_per_channel, | |
68 size_t max_encoded_bytes, | |
69 uint8_t* encoded); | |
70 | |
71 // Return the input sample rate in Hz and the number of input channels. | |
72 // These are constants set at instantiation time. | |
73 virtual int SampleRateHz() const = 0; | |
74 virtual int NumChannels() const = 0; | |
75 | |
76 // Return the maximum number of bytes that can be produced by the encoder | |
77 // at each Encode() call. The caller can use the return value to determine | 53 // at each Encode() call. The caller can use the return value to determine |
78 // the size of the buffer that needs to be allocated. This value is allowed | 54 // the size of the buffer that needs to be allocated. This value is allowed |
79 // to depend on encoder parameters like bitrate, frame size etc., so if | 55 // to depend on encoder parameters like bitrate, frame size etc., so if |
80 // any of these change, the caller of Encode() is responsible for checking | 56 // any of these change, the caller of Encode() is responsible for checking |
81 // that the buffer is large enough by calling MaxEncodedBytes() again. | 57 // that the buffer is large enough by calling MaxEncodedBytes() again. |
82 virtual size_t MaxEncodedBytes() const = 0; | 58 virtual size_t MaxEncodedBytes() const = 0; |
83 | 59 |
84 // Returns the rate with which the RTP timestamps are updated. By default, | 60 // Returns the input sample rate in Hz and the number of input channels. |
85 // this is the same as sample_rate_hz(). | 61 // These are constants set at instantiation time. |
| 62 virtual int SampleRateHz() const = 0; |
| 63 virtual int NumChannels() const = 0; |
| 64 |
| 65 // Returns the rate at which the RTP timestamps are updated. The default |
| 66 // implementation returns SampleRateHz(). |
86 virtual int RtpTimestampRateHz() const; | 67 virtual int RtpTimestampRateHz() const; |
87 | 68 |
88 // Returns the number of 10 ms frames the encoder will put in the next | 69 // Returns the number of 10 ms frames the encoder will put in the next |
89 // packet. This value may only change when Encode() outputs a packet; i.e., | 70 // packet. This value may only change when Encode() outputs a packet; i.e., |
90 // the encoder may vary the number of 10 ms frames from packet to packet, but | 71 // the encoder may vary the number of 10 ms frames from packet to packet, but |
91 // it must decide the length of the next packet no later than when outputting | 72 // it must decide the length of the next packet no later than when outputting |
92 // the preceding packet. | 73 // the preceding packet. |
93 virtual size_t Num10MsFramesInNextPacket() const = 0; | 74 virtual size_t Num10MsFramesInNextPacket() const = 0; |
94 | 75 |
95 // Returns the maximum value that can be returned by | 76 // Returns the maximum value that can be returned by |
96 // Num10MsFramesInNextPacket(). | 77 // Num10MsFramesInNextPacket(). |
97 virtual size_t Max10MsFramesInAPacket() const = 0; | 78 virtual size_t Max10MsFramesInAPacket() const = 0; |
98 | 79 |
99 // Returns the current target bitrate in bits/s. The value -1 means that the | 80 // Returns the current target bitrate in bits/s. The value -1 means that the |
100 // codec adapts the target automatically, and a current target cannot be | 81 // codec adapts the target automatically, and a current target cannot be |
101 // provided. | 82 // provided. |
102 virtual int GetTargetBitrate() const = 0; | 83 virtual int GetTargetBitrate() const = 0; |
103 | 84 |
104 // Changes the target bitrate. The implementation is free to alter this value, | 85 // Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 * |
105 // e.g., if the desired value is outside the valid range. | 86 // NumChannels() samples). Multi-channel audio must be sample-interleaved. |
106 virtual void SetTargetBitrate(int bits_per_second) {} | 87 // The encoder produces zero or more bytes of output in |encoded| and |
| 88 // returns additional encoding information. |
| 89 // The caller is responsible for making sure that |max_encoded_bytes| is |
| 90 // not smaller than the number of bytes actually produced by the encoder. |
| 91 // Encode() checks some preconditions, calls EncodeInternal() which does the |
| 92 // actual work, and then checks some postconditions. |
| 93 EncodedInfo Encode(uint32_t rtp_timestamp, |
| 94 const int16_t* audio, |
| 95 size_t num_samples_per_channel, |
| 96 size_t max_encoded_bytes, |
| 97 uint8_t* encoded); |
107 | 98 |
108 // Tells the implementation what the projected packet loss rate is. The rate | |
109 // is in the range [0.0, 1.0]. This rate is typically used to adjust channel | |
110 // coding efforts, such as FEC. | |
111 virtual void SetProjectedPacketLossRate(double fraction) {} | |
112 | |
113 // This is the encode function that the inherited classes must implement. It | |
114 // is called from Encode in the base class. | |
115 virtual EncodedInfo EncodeInternal(uint32_t rtp_timestamp, | 99 virtual EncodedInfo EncodeInternal(uint32_t rtp_timestamp, |
116 const int16_t* audio, | 100 const int16_t* audio, |
117 size_t max_encoded_bytes, | 101 size_t max_encoded_bytes, |
118 uint8_t* encoded) = 0; | 102 uint8_t* encoded) = 0; |
119 }; | |
120 | 103 |
121 class AudioEncoderMutable : public AudioEncoder { | 104 // Resets the encoder to its starting state, discarding any input that has |
122 public: | 105 // been fed to the encoder but not yet emitted in a packet. |
123 enum Application { kApplicationSpeech, kApplicationAudio }; | |
124 | |
125 // Discards unprocessed audio data. | |
126 virtual void Reset() = 0; | 106 virtual void Reset() = 0; |
127 | 107 |
128 // Enables codec-internal FEC, if the implementation supports it. | 108 // Enables or disables codec-internal FEC (forward error correction). Returns |
129 virtual bool SetFec(bool enable) = 0; | 109 // true if the codec was able to comply. The default implementation returns |
| 110 // true when asked to disable FEC and false when asked to enable it (meaning |
| 111 // that FEC isn't supported). |
| 112 virtual bool SetFec(bool enable); |
130 | 113 |
131 // Enables or disables codec-internal VAD/DTX, if the implementation supports | 114 // Enables or disables codec-internal VAD/DTX. Returns true if the codec was |
132 // it. | 115 // able to comply. The default implementation returns true when asked to |
133 virtual bool SetDtx(bool enable) = 0; | 116 // disable DTX and false when asked to enable it (meaning that DTX isn't |
| 117 // supported). |
| 118 virtual bool SetDtx(bool enable); |
134 | 119 |
135 // Sets the application mode. The implementation is free to disregard this | 120 // Sets the application mode. Returns true if the codec was able to comply. |
136 // setting. | 121 // The default implementation just returns false. |
137 virtual bool SetApplication(Application application) = 0; | 122 enum class Application { kSpeech, kAudio }; |
| 123 virtual bool SetApplication(Application application); |
138 | 124 |
139 // Sets an upper limit on the payload size produced by the encoder. The | 125 // Tells the encoder about the highest sample rate the decoder is expected to |
140 // implementation is free to disregard this setting. | 126 // use when decoding the bitstream. The encoder would typically use this |
141 virtual void SetMaxPayloadSize(int max_payload_size_bytes) = 0; | 127 // information to adjust the quality of the encoding. The default |
| 128 // implementation just returns true. |
| 129 // TODO(kwiberg): Change return value to void, since it doesn't matter |
| 130 // whether the encoder approved of the max playback rate or not. |
| 131 virtual bool SetMaxPlaybackRate(int frequency_hz); |
142 | 132 |
143 // Sets the maximum rate which the codec may not exceed for any packet. | 133 // Tells the encoder what the projected packet loss rate is. The rate is in |
144 virtual void SetMaxRate(int max_rate_bps) = 0; | 134 // the range [0.0, 1.0]. The encoder would typically use this information to |
| 135 // adjust channel coding efforts, such as FEC. The default implementation |
| 136 // does nothing. |
| 137 virtual void SetProjectedPacketLossRate(double fraction); |
145 | 138 |
146 // Informs the encoder about the maximum sample rate which the decoder will | 139 // Tells the encoder what average bitrate we'd like it to produce. The |
147 // use when decoding the bitstream. The implementation is free to disregard | 140 // encoder is free to adjust or disregard the given bitrate (the default |
148 // this hint. | 141 // implementation does the latter). |
149 virtual bool SetMaxPlaybackRate(int frequency_hz) = 0; | 142 virtual void SetTargetBitrate(int target_bps); |
| 143 |
| 144 // Sets the maximum bitrate which must not be exceeded for any packet. The |
| 145 // encoder is free to adjust or disregard this value (the default |
| 146 // implementation does the latter). |
| 147 virtual void SetMaxBitrate(int max_bps); |
| 148 |
| 149 // Sets an upper limit on the size of packet payloads produced by the |
| 150 // encoder. The encoder is free to adjust or disregard this value (the |
| 151 // default implementation does the latter). |
| 152 virtual void SetMaxPayloadSize(int max_payload_size_bytes); |
150 }; | 153 }; |
151 } // namespace webrtc | 154 } // namespace webrtc |
152 #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ | 155 #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ |
OLD | NEW |