webrtc/common_audio/vad/vad_core.c - Issue 1227203003: Update audio code to use size_t more correctly, webrtc/common_audio/ portion.

Side by Side Diff: webrtc/common_audio/vad/vad_core.c

Issue 1227203003: Update audio code to use size_t more correctly, webrtc/common_audio/ portion. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Resync Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 // type of signal is most probable.	115 // type of signal is most probable.

116 //	116 //

117 // - self [i/o] : Pointer to VAD instance	117 // - self [i/o] : Pointer to VAD instance

118 // - features [i] : Feature vector of length \|kNumChannels\|	118 // - features [i] : Feature vector of length \|kNumChannels\|

119 // = log10(energy in frequency band)	119 // = log10(energy in frequency band)

120 // - total_power [i] : Total power in audio frame.	120 // - total_power [i] : Total power in audio frame.

121 // - frame_length [i] : Number of input samples	121 // - frame_length [i] : Number of input samples

122 //	122 //

123 // - returns : the VAD decision (0 - noise, 1 - speech).	123 // - returns : the VAD decision (0 - noise, 1 - speech).

124 static int16_t GmmProbability(VadInstT* self, int16_t* features,	124 static int16_t GmmProbability(VadInstT* self, int16_t* features,

125 int16_t total_power, int frame_length) {	125 int16_t total_power, size_t frame_length) {

126 int channel, k;	126 int channel, k;

127 int16_t feature_minimum;	127 int16_t feature_minimum;

128 int16_t h0, h1;	128 int16_t h0, h1;

129 int16_t log_likelihood_ratio;	129 int16_t log_likelihood_ratio;

130 int16_t vadflag = 0;	130 int16_t vadflag = 0;

131 int16_t shifts_h0, shifts_h1;	131 int16_t shifts_h0, shifts_h1;

132 int16_t tmp_s16, tmp1_s16, tmp2_s16;	132 int16_t tmp_s16, tmp1_s16, tmp2_s16;

133 int16_t diff;	133 int16_t diff;

134 int gaussian;	134 int gaussian;

135 int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;	135 int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;

(...skipping 453 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
589 break;	589 break;

590 }	590 }

591	591

592 return return_value;	592 return return_value;

593 }	593 }

594	594

595 // Calculate VAD decision by first extracting feature values and then calculate	595 // Calculate VAD decision by first extracting feature values and then calculate

596 // probability for both speech and background noise.	596 // probability for both speech and background noise.

597	597

598 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,	598 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,

599 int frame_length) {	599 size_t frame_length) {

600 int vad;	600 int vad;

601 int i;	601 size_t i;

602 int16_t speech_nb[240]; // 30 ms in 8 kHz.	602 int16_t speech_nb[240]; // 30 ms in 8 kHz.

603 // \|tmp_mem\| is a temporary memory used by resample function, length is	603 // \|tmp_mem\| is a temporary memory used by resample function, length is

604 // frame length in 10 ms (480 samples) + 256 extra.	604 // frame length in 10 ms (480 samples) + 256 extra.

605 int32_t tmp_mem[480 + 256] = { 0 };	605 int32_t tmp_mem[480 + 256] = { 0 };

606 const int kFrameLen10ms48khz = 480;	606 const size_t kFrameLen10ms48khz = 480;

607 const int kFrameLen10ms8khz = 80;	607 const size_t kFrameLen10ms8khz = 80;

608 int num_10ms_frames = frame_length / kFrameLen10ms48khz;	608 size_t num_10ms_frames = frame_length / kFrameLen10ms48khz;

609	609

610 for (i = 0; i < num_10ms_frames; i++) {	610 for (i = 0; i < num_10ms_frames; i++) {

611 WebRtcSpl_Resample48khzTo8khz(speech_frame,	611 WebRtcSpl_Resample48khzTo8khz(speech_frame,

612 &speech_nb[i * kFrameLen10ms8khz],	612 &speech_nb[i * kFrameLen10ms8khz],

613 &inst->state_48_to_8,	613 &inst->state_48_to_8,

614 tmp_mem);	614 tmp_mem);

615 }	615 }

616	616

617 // Do VAD on an 8 kHz signal	617 // Do VAD on an 8 kHz signal

618 vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6);	618 vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6);

619	619

620 return vad;	620 return vad;

621 }	621 }

622	622

623 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,	623 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,

624 int frame_length)	624 size_t frame_length)

625 {	625 {

626 int len, vad;	626 size_t len;

	627 int vad;

627 int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB )	628 int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB )

628 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)	629 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)

629	630

630	631

631 // Downsample signal 32->16->8 before doing VAD	632 // Downsample signal 32->16->8 before doing VAD

632 WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_s tates[2]),	633 WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_s tates[2]),

633 frame_length);	634 frame_length);

634 len = frame_length / 2;	635 len = frame_length / 2;

635	636

636 WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);	637 WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);

637 len /= 2;	638 len /= 2;

638	639

639 // Do VAD on an 8 kHz signal	640 // Do VAD on an 8 kHz signal

640 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);	641 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);

641	642

642 return vad;	643 return vad;

643 }	644 }

644	645

645 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,	646 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,

646 int frame_length)	647 size_t frame_length)

647 {	648 {

648 int len, vad;	649 size_t len;

	650 int vad;

649 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)	651 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)

650	652

651 // Wideband: Downsample signal before doing VAD	653 // Wideband: Downsample signal before doing VAD

652 WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_sta tes,	654 WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_sta tes,

653 frame_length);	655 frame_length);

654	656

655 len = frame_length / 2;	657 len = frame_length / 2;

656 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);	658 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);

657	659

658 return vad;	660 return vad;

659 }	661 }

660	662

661 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,	663 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,

662 int frame_length)	664 size_t frame_length)

663 {	665 {

664 int16_t feature_vector[kNumChannels], total_power;	666 int16_t feature_vector[kNumChannels], total_power;

665	667

666 // Get power in the bands	668 // Get power in the bands

667 total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length,	669 total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length,

668 feature_vector);	670 feature_vector);

669	671

670 // Make a VAD	672 // Make a VAD

671 inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length);	673 inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length);

672	674

673 return inst->vad;	675 return inst->vad;

674 }	676 }

OLD	NEW

« no previous file with comments | « webrtc/common_audio/vad/vad_core.h ('k') | webrtc/common_audio/vad/vad_core_unittest.cc » ('j') | no next file with comments »