Index: webrtc/modules/audio_coding/codecs/audio_decoder.h |
diff --git a/webrtc/modules/audio_coding/codecs/audio_decoder.h b/webrtc/modules/audio_coding/codecs/audio_decoder.h |
index 13581bc247705a740b04e1d9a1cac73e454b3fa3..9608c321dbc9aeeaec85ed8c93aeb20b856a3112 100644 |
--- a/webrtc/modules/audio_coding/codecs/audio_decoder.h |
+++ b/webrtc/modules/audio_coding/codecs/audio_decoder.h |
@@ -13,7 +13,13 @@ |
#include <stdlib.h> // NULL |
+#include <memory> |
+#include <vector> |
+ |
+#include "webrtc/base/array_view.h" |
+#include "webrtc/base/buffer.h" |
#include "webrtc/base/constructormagic.h" |
+#include "webrtc/base/optional.h" |
#include "webrtc/typedefs.h" |
namespace webrtc { |
@@ -33,6 +39,55 @@ class AudioDecoder { |
AudioDecoder() = default; |
virtual ~AudioDecoder() = default; |
+ class EncodedAudioFrame { |
+ public: |
+ struct DecodeResult { |
+ size_t num_decoded_samples; |
+ SpeechType speech_type; |
+ }; |
+ |
+ virtual ~EncodedAudioFrame() = default; |
+ |
+ // Returns the duration in samples-per-channel of this audio frame. |
+ // If no duration can be ascertained, returns zero. |
+ virtual size_t Duration() const = 0; |
+ |
+ // Decodes this frame of audio and writes the result in |decoded|. |
+ // |decoded| must be large enough to store as many samples as indicated by a |
+ // call to Duration() . On success, returns an rtc::Optional containing the |
+ // total number of samples across all channels, as well as whether the |
+ // decoder produced comfort noise or speech. On failure, returns an empty |
+ // rtc::Optional. Decode may be called at most once per frame object. |
+ virtual rtc::Optional<DecodeResult> Decode( |
+ rtc::ArrayView<int16_t> decoded) const = 0; |
+ }; |
+ |
+ struct ParseResult { |
+ ParseResult(); |
+ ParseResult(uint32_t timestamp, |
+ bool primary, |
+ std::unique_ptr<EncodedAudioFrame> frame); |
+ ParseResult(ParseResult&& b); |
+ ~ParseResult(); |
+ |
+ ParseResult& operator=(ParseResult&& b); |
+ |
+ // The timestamp of the frame is in samples per channel. |
+ uint32_t timestamp; |
+ bool primary; |
+ std::unique_ptr<EncodedAudioFrame> frame; |
+ }; |
+ |
+ // Let the decoder parse this payload and prepare zero or more decodable |
+ // frames. Each frame must be between 10 ms and 120 ms long. The caller must |
+ // ensure that the AudioDecoder object outlives any frame objects returned by |
+ // this call. The decoder is free to swap or move the data from the |payload| |
+ // buffer. |timestamp| is the input timestamp, in samples, corresponding to |
+ // the start of the payload. |
+ virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, |
+ uint32_t timestamp, |
+ bool is_primary); |
+ |
// Decodes |encode_len| bytes from |encoded| and writes the result in |
// |decoded|. The maximum bytes allowed to be written into |decoded| is |
// |max_decoded_bytes|. Returns the total number of samples across all |