webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h - Issue 1693823004: Use VAD to get a better speech power estimation in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h

Issue 1693823004: Use VAD to get a better speech power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@pow

Patch Set: Use f for float Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_

12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_

13	13

14 #include <complex>	14 #include <complex>

15 #include <memory>	15 #include <memory>

	16 #include <vector>

16	17

17 namespace webrtc {	18 namespace webrtc {

18	19

19 namespace intelligibility {	20 namespace intelligibility {

20	21

21 // Internal helper for computing the power of a stream of arrays.	22 // Internal helper for computing the power of a stream of arrays.

22 // The result is an array of power per position: the i-th power is the power of	23 // The result is an array of power per position: the i-th power is the power of

23 // the stream of data on the i-th positions in the input arrays.	24 // the stream of data on the i-th positions in the input arrays.

	25 template <typename T>

24 class PowerEstimator {	26 class PowerEstimator {

25 public:	27 public:

26 // Construct an instance for the given input array length (\|freqs\|), with the	28 // Construct an instance for the given input array length (\|freqs\|), with the

27 // appropriate parameters. \|decay\| is the forgetting factor.	29 // appropriate parameters. \|decay\| is the forgetting factor.

28 PowerEstimator(size_t freqs, float decay);	30 PowerEstimator(size_t freqs, float decay);

29	31

30 // Add a new data point to the series.	32 // Add a new data point to the series.

31 void Step(const std::complex<float>* data);	33 void Step(const T* data);

32	34

33 // The current power array.	35 // The current power array.

34 const float* Power();	36 const std::vector<float>& power() { return power_; };

35	37

36 private:	38 private:

37 // TODO(ekmeyerson): Switch the following running means	39 // The current power array.

38 // and histories from std::unique_ptr to std::vector.	40 std::vector<float> power_;

39 std::unique_ptr<std::complex<float>[]> running_mean_sq_;

40	41

41 // The current magnitude array.

42 std::unique_ptr<float[]> magnitude_;

43 // The current power array.

44 std::unique_ptr<float[]> power_;

45

46 const size_t num_freqs_;

47 const float decay_;	42 const float decay_;

48 };	43 };

49	44

50 // Helper class for smoothing gain changes. On each application step, the	45 // Helper class for smoothing gain changes. On each application step, the

51 // currently used gains are changed towards a set of settable target gains,	46 // currently used gains are changed towards a set of settable target gains,

52 // constrained by a limit on the magnitude of the changes.	47 // constrained by a limit on the relative changes.

53 class GainApplier {	48 class GainApplier {

54 public:	49 public:

55 GainApplier(size_t freqs, float change_limit);	50 GainApplier(size_t freqs, float relative_change_limit);

56	51

57 // Copy \|in_block\| to \|out_block\|, multiplied by the current set of gains,	52 // Copy \|in_block\| to \|out_block\|, multiplied by the current set of gains,

58 // and step the current set of gains towards the target set.	53 // and step the current set of gains towards the target set.

59 void Apply(const std::complex<float>* in_block,	54 void Apply(const std::complex<float>* in_block,

60 std::complex<float>* out_block);	55 std::complex<float>* out_block);

61	56

62 // Return the current target gain set. Modify this array to set the targets.	57 // Return the current target gain set. Modify this array to set the targets.

63 float* target() const { return target_.get(); }	58 float* target() const { return target_.get(); }

64	59

65 private:	60 private:

66 const size_t num_freqs_;	61 const size_t num_freqs_;

67 const float change_limit_;	62 const float relative_change_limit_;

68 std::unique_ptr<float[]> target_;	63 std::unique_ptr<float[]> target_;

69 std::unique_ptr<float[]> current_;	64 std::unique_ptr<float[]> current_;

70 };	65 };

71	66

72 } // namespace intelligibility	67 } // namespace intelligibility

73	68

74 } // namespace webrtc	69 } // namespace webrtc

75	70

76 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS _H_	71 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS _H_

OLD	NEW