Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Side by Side Diff: webrtc/common_audio/vad/vad_core.c

Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master
Patch Set: Resync Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/common_audio/vad/vad_core.h ('k') | webrtc/common_audio/vad/vad_core_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 // type of signal is most probable. 115 // type of signal is most probable.
116 // 116 //
117 // - self [i/o] : Pointer to VAD instance 117 // - self [i/o] : Pointer to VAD instance
118 // - features [i] : Feature vector of length |kNumChannels| 118 // - features [i] : Feature vector of length |kNumChannels|
119 // = log10(energy in frequency band) 119 // = log10(energy in frequency band)
120 // - total_power [i] : Total power in audio frame. 120 // - total_power [i] : Total power in audio frame.
121 // - frame_length [i] : Number of input samples 121 // - frame_length [i] : Number of input samples
122 // 122 //
123 // - returns : the VAD decision (0 - noise, 1 - speech). 123 // - returns : the VAD decision (0 - noise, 1 - speech).
124 static int16_t GmmProbability(VadInstT* self, int16_t* features, 124 static int16_t GmmProbability(VadInstT* self, int16_t* features,
125 int16_t total_power, int frame_length) { 125 int16_t total_power, size_t frame_length) {
126 int channel, k; 126 int channel, k;
127 int16_t feature_minimum; 127 int16_t feature_minimum;
128 int16_t h0, h1; 128 int16_t h0, h1;
129 int16_t log_likelihood_ratio; 129 int16_t log_likelihood_ratio;
130 int16_t vadflag = 0; 130 int16_t vadflag = 0;
131 int16_t shifts_h0, shifts_h1; 131 int16_t shifts_h0, shifts_h1;
132 int16_t tmp_s16, tmp1_s16, tmp2_s16; 132 int16_t tmp_s16, tmp1_s16, tmp2_s16;
133 int16_t diff; 133 int16_t diff;
134 int gaussian; 134 int gaussian;
135 int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; 135 int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
(...skipping 453 matching lines...) Expand 10 before | Expand all | Expand 10 after
589 break; 589 break;
590 } 590 }
591 591
592 return return_value; 592 return return_value;
593 } 593 }
594 594
595 // Calculate VAD decision by first extracting feature values and then calculate 595 // Calculate VAD decision by first extracting feature values and then calculate
596 // probability for both speech and background noise. 596 // probability for both speech and background noise.
597 597
598 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, 598 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
599 int frame_length) { 599 size_t frame_length) {
600 int vad; 600 int vad;
601 int i; 601 size_t i;
602 int16_t speech_nb[240]; // 30 ms in 8 kHz. 602 int16_t speech_nb[240]; // 30 ms in 8 kHz.
603 // |tmp_mem| is a temporary memory used by resample function, length is 603 // |tmp_mem| is a temporary memory used by resample function, length is
604 // frame length in 10 ms (480 samples) + 256 extra. 604 // frame length in 10 ms (480 samples) + 256 extra.
605 int32_t tmp_mem[480 + 256] = { 0 }; 605 int32_t tmp_mem[480 + 256] = { 0 };
606 const int kFrameLen10ms48khz = 480; 606 const size_t kFrameLen10ms48khz = 480;
607 const int kFrameLen10ms8khz = 80; 607 const size_t kFrameLen10ms8khz = 80;
608 int num_10ms_frames = frame_length / kFrameLen10ms48khz; 608 size_t num_10ms_frames = frame_length / kFrameLen10ms48khz;
609 609
610 for (i = 0; i < num_10ms_frames; i++) { 610 for (i = 0; i < num_10ms_frames; i++) {
611 WebRtcSpl_Resample48khzTo8khz(speech_frame, 611 WebRtcSpl_Resample48khzTo8khz(speech_frame,
612 &speech_nb[i * kFrameLen10ms8khz], 612 &speech_nb[i * kFrameLen10ms8khz],
613 &inst->state_48_to_8, 613 &inst->state_48_to_8,
614 tmp_mem); 614 tmp_mem);
615 } 615 }
616 616
617 // Do VAD on an 8 kHz signal 617 // Do VAD on an 8 kHz signal
618 vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); 618 vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6);
619 619
620 return vad; 620 return vad;
621 } 621 }
622 622
623 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, 623 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
624 int frame_length) 624 size_t frame_length)
625 { 625 {
626 int len, vad; 626 size_t len;
627 int vad;
627 int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB ) 628 int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB )
628 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) 629 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
629 630
630 631
631 // Downsample signal 32->16->8 before doing VAD 632 // Downsample signal 32->16->8 before doing VAD
632 WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_s tates[2]), 633 WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_s tates[2]),
633 frame_length); 634 frame_length);
634 len = frame_length / 2; 635 len = frame_length / 2;
635 636
636 WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); 637 WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);
637 len /= 2; 638 len /= 2;
638 639
639 // Do VAD on an 8 kHz signal 640 // Do VAD on an 8 kHz signal
640 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); 641 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
641 642
642 return vad; 643 return vad;
643 } 644 }
644 645
645 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, 646 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
646 int frame_length) 647 size_t frame_length)
647 { 648 {
648 int len, vad; 649 size_t len;
650 int vad;
649 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) 651 int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
650 652
651 // Wideband: Downsample signal before doing VAD 653 // Wideband: Downsample signal before doing VAD
652 WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_sta tes, 654 WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_sta tes,
653 frame_length); 655 frame_length);
654 656
655 len = frame_length / 2; 657 len = frame_length / 2;
656 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); 658 vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
657 659
658 return vad; 660 return vad;
659 } 661 }
660 662
661 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, 663 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
662 int frame_length) 664 size_t frame_length)
663 { 665 {
664 int16_t feature_vector[kNumChannels], total_power; 666 int16_t feature_vector[kNumChannels], total_power;
665 667
666 // Get power in the bands 668 // Get power in the bands
667 total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, 669 total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length,
668 feature_vector); 670 feature_vector);
669 671
670 // Make a VAD 672 // Make a VAD
671 inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); 673 inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length);
672 674
673 return inst->vad; 675 return inst->vad;
674 } 676 }
OLDNEW
« no previous file with comments | « webrtc/common_audio/vad/vad_core.h ('k') | webrtc/common_audio/vad/vad_core_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698