Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ | 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ |
| 12 #define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ | 12 #define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ |
| 13 | 13 |
| 14 #include <algorithm> | 14 #include <algorithm> |
| 15 #include <vector> | 15 #include <vector> |
| 16 | 16 |
| 17 #include "webrtc/typedefs.h" | 17 #include "webrtc/typedefs.h" |
| 18 | 18 |
| 19 namespace webrtc { | 19 namespace webrtc { |
| 20 | 20 |
| 21 // This is the interface class for encoders in AudioCoding module. Each codec | 21 // This is the interface class for encoders in AudioCoding module. Each codec |
| 22 // type must have an implementation of this class. | 22 // type must have an implementation of this class. |
| 23 class AudioEncoder { | 23 class AudioEncoder { |
| 24 public: | 24 public: |
| 25 struct EncodedInfoLeaf { | 25 struct EncodedInfoLeaf { |
| 26 EncodedInfoLeaf() | 26 size_t encoded_bytes = 0; |
| 27 : encoded_bytes(0), | 27 uint32_t encoded_timestamp = 0; |
| 28 encoded_timestamp(0), | 28 int payload_type = 0; |
| 29 payload_type(0), | 29 bool send_even_if_empty = false; |
| 30 send_even_if_empty(false), | 30 bool speech = true; |
| 31 speech(true) {} | |
| 32 | |
| 33 size_t encoded_bytes; | |
| 34 uint32_t encoded_timestamp; | |
| 35 int payload_type; | |
| 36 bool send_even_if_empty; | |
| 37 bool speech; | |
| 38 }; | 31 }; |
| 39 | 32 |
| 40 // This is the main struct for auxiliary encoding information. Each encoded | 33 // This is the main struct for auxiliary encoding information. Each encoded |
| 41 // packet should be accompanied by one EncodedInfo struct, containing the | 34 // packet should be accompanied by one EncodedInfo struct, containing the |
| 42 // total number of |encoded_bytes|, the |encoded_timestamp| and the | 35 // total number of |encoded_bytes|, the |encoded_timestamp| and the |
| 43 // |payload_type|. If the packet contains redundant encodings, the |redundant| | 36 // |payload_type|. If the packet contains redundant encodings, the |redundant| |
| 44 // vector will be populated with EncodedInfoLeaf structs. Each struct in the | 37 // vector will be populated with EncodedInfoLeaf structs. Each struct in the |
| 45 // vector represents one encoding; the order of structs in the vector is the | 38 // vector represents one encoding; the order of structs in the vector is the |
| 46 // same as the order in which the actual payloads are written to the byte | 39 // same as the order in which the actual payloads are written to the byte |
| 47 // stream. When EncoderInfoLeaf structs are present in the vector, the main | 40 // stream. When EncoderInfoLeaf structs are present in the vector, the main |
| 48 // struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the | 41 // struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the |
| 49 // vector. | 42 // vector. |
| 50 struct EncodedInfo : public EncodedInfoLeaf { | 43 struct EncodedInfo : public EncodedInfoLeaf { |
| 51 EncodedInfo(); | 44 EncodedInfo(); |
| 52 ~EncodedInfo(); | 45 ~EncodedInfo(); |
| 53 | 46 |
| 54 std::vector<EncodedInfoLeaf> redundant; | 47 std::vector<EncodedInfoLeaf> redundant; |
| 55 }; | 48 }; |
| 56 | 49 |
| 57 virtual ~AudioEncoder() {} | 50 virtual ~AudioEncoder() = default; |
| 58 | 51 |
| 59 // Accepts one 10 ms block of input audio (i.e., sample_rate_hz() / 100 * | 52 // Returns the maximum number of bytes that can be produced by the encoder |
| 60 // num_channels() samples). Multi-channel audio must be sample-interleaved. | |
| 61 // The encoder produces zero or more bytes of output in |encoded| and | |
| 62 // returns additional encoding information. | |
| 63 // The caller is responsible for making sure that |max_encoded_bytes| is | |
| 64 // not smaller than the number of bytes actually produced by the encoder. | |
| 65 EncodedInfo Encode(uint32_t rtp_timestamp, | |
| 66 const int16_t* audio, | |
| 67 size_t num_samples_per_channel, | |
| 68 size_t max_encoded_bytes, | |
| 69 uint8_t* encoded); | |
| 70 | |
| 71 // Return the input sample rate in Hz and the number of input channels. | |
| 72 // These are constants set at instantiation time. | |
| 73 virtual int SampleRateHz() const = 0; | |
| 74 virtual int NumChannels() const = 0; | |
| 75 | |
| 76 // Return the maximum number of bytes that can be produced by the encoder | |
| 77 // at each Encode() call. The caller can use the return value to determine | 53 // at each Encode() call. The caller can use the return value to determine |
| 78 // the size of the buffer that needs to be allocated. This value is allowed | 54 // the size of the buffer that needs to be allocated. This value is allowed |
| 79 // to depend on encoder parameters like bitrate, frame size etc., so if | 55 // to depend on encoder parameters like bitrate, frame size etc., so if |
| 80 // any of these change, the caller of Encode() is responsible for checking | 56 // any of these change, the caller of Encode() is responsible for checking |
| 81 // that the buffer is large enough by calling MaxEncodedBytes() again. | 57 // that the buffer is large enough by calling MaxEncodedBytes() again. |
| 82 virtual size_t MaxEncodedBytes() const = 0; | 58 virtual size_t MaxEncodedBytes() const = 0; |
| 83 | 59 |
| 84 // Returns the rate with which the RTP timestamps are updated. By default, | 60 // Returns the input sample rate in Hz and the number of input channels. |
| 85 // this is the same as sample_rate_hz(). | 61 // These are constants set at instantiation time. |
| 62 virtual int SampleRateHz() const = 0; | |
| 63 virtual int NumChannels() const = 0; | |
| 64 | |
| 65 // Returns the rate at which the RTP timestamps are updated. The default | |
| 66 // implementation returns SampleRateHz(). | |
| 86 virtual int RtpTimestampRateHz() const; | 67 virtual int RtpTimestampRateHz() const; |
| 87 | 68 |
| 88 // Returns the number of 10 ms frames the encoder will put in the next | 69 // Returns the number of 10 ms frames the encoder will put in the next |
| 89 // packet. This value may only change when Encode() outputs a packet; i.e., | 70 // packet. This value may only change when Encode() outputs a packet; i.e., |
| 90 // the encoder may vary the number of 10 ms frames from packet to packet, but | 71 // the encoder may vary the number of 10 ms frames from packet to packet, but |
| 91 // it must decide the length of the next packet no later than when outputting | 72 // it must decide the length of the next packet no later than when outputting |
| 92 // the preceding packet. | 73 // the preceding packet. |
| 93 virtual size_t Num10MsFramesInNextPacket() const = 0; | 74 virtual size_t Num10MsFramesInNextPacket() const = 0; |
| 94 | 75 |
| 95 // Returns the maximum value that can be returned by | 76 // Returns the maximum value that can be returned by |
| 96 // Num10MsFramesInNextPacket(). | 77 // Num10MsFramesInNextPacket(). |
| 97 virtual size_t Max10MsFramesInAPacket() const = 0; | 78 virtual size_t Max10MsFramesInAPacket() const = 0; |
| 98 | 79 |
| 99 // Returns the current target bitrate in bits/s. The value -1 means that the | 80 // Returns the current target bitrate in bits/s. The value -1 means that the |
| 100 // codec adapts the target automatically, and a current target cannot be | 81 // codec adapts the target automatically, and a current target cannot be |
| 101 // provided. | 82 // provided. |
| 102 virtual int GetTargetBitrate() const = 0; | 83 virtual int GetTargetBitrate() const = 0; |
| 103 | 84 |
| 104 // Changes the target bitrate. The implementation is free to alter this value, | 85 // Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 * |
| 105 // e.g., if the desired value is outside the valid range. | 86 // NumChannels() samples). Multi-channel audio must be sample-interleaved. |
| 106 virtual void SetTargetBitrate(int bits_per_second) {} | 87 // The encoder produces zero or more bytes of output in |encoded| and |
| 107 | 88 // returns additional encoding information. |
| 108 // Tells the implementation what the projected packet loss rate is. The rate | 89 // The caller is responsible for making sure that |max_encoded_bytes| is |
| 109 // is in the range [0.0, 1.0]. This rate is typically used to adjust channel | 90 // not smaller than the number of bytes actually produced by the encoder. |
| 110 // coding efforts, such as FEC. | 91 // Encode() checks some preconditions, calls EncodeInternal() which does the |
| 111 virtual void SetProjectedPacketLossRate(double fraction) {} | 92 // actual work, and then checks some postconditions. |
| 112 | 93 EncodedInfo Encode(uint32_t rtp_timestamp, |
| 113 // This is the encode function that the inherited classes must implement. It | 94 const int16_t* audio, |
| 114 // is called from Encode in the base class. | 95 size_t num_samples_per_channel, |
| 96 size_t max_encoded_bytes, | |
| 97 uint8_t* encoded); | |
| 115 virtual EncodedInfo EncodeInternal(uint32_t rtp_timestamp, | 98 virtual EncodedInfo EncodeInternal(uint32_t rtp_timestamp, |
|
hlundin-webrtc
2015/09/07 20:00:01
I wouldn't mind a blank line here.
kwiberg-webrtc
2015/09/08 10:47:45
I did it this way on purpose to emphasize that the
| |
| 116 const int16_t* audio, | 99 const int16_t* audio, |
| 117 size_t max_encoded_bytes, | 100 size_t max_encoded_bytes, |
| 118 uint8_t* encoded) = 0; | 101 uint8_t* encoded) = 0; |
| 119 }; | |
| 120 | 102 |
| 121 class AudioEncoderMutable : public AudioEncoder { | 103 // Resets the encoder to its starting state, discarding any input that has |
| 122 public: | 104 // been fed to the encoder but not yet emitted in a packet. |
| 123 enum Application { kApplicationSpeech, kApplicationAudio }; | |
| 124 | |
| 125 // Discards unprocessed audio data. | |
| 126 virtual void Reset() = 0; | 105 virtual void Reset() = 0; |
| 127 | 106 |
| 128 // Enables codec-internal FEC, if the implementation supports it. | 107 // Enables or disables codec-internal FEC (forward error correction). Returns |
| 129 virtual bool SetFec(bool enable) = 0; | 108 // true if the codec was able to comply. The default implementation returns |
| 109 // true when asked to disable FEC and false when asked to enable it (meaning | |
| 110 // that FEC isn't supported). | |
| 111 virtual bool SetFec(bool enable); | |
| 130 | 112 |
| 131 // Enables or disables codec-internal VAD/DTX, if the implementation supports | 113 // Enables or disables codec-internal VAD/DTX. Returns true if the codec was |
| 132 // it. | 114 // able to comply. The default implementation returns true when asked to |
| 133 virtual bool SetDtx(bool enable) = 0; | 115 // disable DTX and false when asked to enable it (meaning that DTX isn't |
| 116 // supported). | |
| 117 virtual bool SetDtx(bool enable); | |
| 134 | 118 |
| 135 // Sets the application mode. The implementation is free to disregard this | 119 // Sets the application mode. Returns true if the codec was able to comply. |
| 136 // setting. | 120 // The default implementation just returns false. |
| 137 virtual bool SetApplication(Application application) = 0; | 121 enum class Application { kSpeech, kAudio }; |
| 122 virtual bool SetApplication(Application application); | |
| 138 | 123 |
| 139 // Sets an upper limit on the payload size produced by the encoder. The | 124 // Tells the encoder about the highest sample rate the decoder is expected to |
| 140 // implementation is free to disregard this setting. | 125 // use when decoding the bitstream. The encoder would typically use this |
| 141 virtual void SetMaxPayloadSize(int max_payload_size_bytes) = 0; | 126 // information to adjust the quality of the encoding. The default |
| 127 // implementation just returns true. | |
| 128 // TODO(kwiberg): Change return value to void, since it doesn't matter | |
| 129 // whether the encoder approved of the max playback rate or not. | |
| 130 virtual bool SetMaxPlaybackRate(int frequency_hz); | |
| 142 | 131 |
| 143 // Sets the maximum rate which the codec may not exceed for any packet. | 132 // Tells the encoder what the projected packet loss rate is. The rate is in |
| 144 virtual void SetMaxRate(int max_rate_bps) = 0; | 133 // the range [0.0, 1.0]. The encoder would typically use this information to |
| 134 // adjust channel coding efforts, such as FEC. The default implementation | |
| 135 // does nothing. | |
| 136 virtual void SetProjectedPacketLossRate(double fraction); | |
| 145 | 137 |
| 146 // Informs the encoder about the maximum sample rate which the decoder will | 138 // Tells the encoder what average bitrate we'd like it to produce. The |
| 147 // use when decoding the bitstream. The implementation is free to disregard | 139 // encoder is free to adjust or disregard the given bitrate (the default |
| 148 // this hint. | 140 // implementation does the latter). |
| 149 virtual bool SetMaxPlaybackRate(int frequency_hz) = 0; | 141 virtual void SetTargetBitrate(int target_bps); |
| 142 | |
| 143 // Sets the maximum bitrate which must not be exceeded for any packet. The | |
| 144 // encoder is free to adjust or disregard this value (the default | |
| 145 // implementation does the latter). | |
| 146 virtual void SetMaxBitrate(int max_bps); | |
| 147 | |
| 148 // Sets an upper limit on the size of packet payloads produced by the | |
| 149 // encoder. The encoder is free to adjust or disregard this value (the | |
| 150 // default implementation does the latter). | |
| 151 virtual void SetMaxPayloadSize(int max_payload_size_bytes); | |
| 150 }; | 152 }; |
| 151 } // namespace webrtc | 153 } // namespace webrtc |
| 152 #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ | 154 #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ |
| OLD | NEW |