| OLD | NEW | 
|    1 /* |    1 /* | 
|    2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |    2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 
|    3  * |    3  * | 
|    4  *  Use of this source code is governed by a BSD-style license |    4  *  Use of this source code is governed by a BSD-style license | 
|    5  *  that can be found in the LICENSE file in the root of the source |    5  *  that can be found in the LICENSE file in the root of the source | 
|    6  *  tree. An additional intellectual property rights grant can be found |    6  *  tree. An additional intellectual property rights grant can be found | 
|    7  *  in the file PATENTS.  All contributing project authors may |    7  *  in the file PATENTS.  All contributing project authors may | 
|    8  *  be found in the AUTHORS file in the root of the source tree. |    8  *  be found in the AUTHORS file in the root of the source tree. | 
|    9  */ |    9  */ | 
|   10  |   10  | 
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  115 // type of signal is most probable. |  115 // type of signal is most probable. | 
|  116 // |  116 // | 
|  117 // - self           [i/o] : Pointer to VAD instance |  117 // - self           [i/o] : Pointer to VAD instance | 
|  118 // - features       [i]   : Feature vector of length |kNumChannels| |  118 // - features       [i]   : Feature vector of length |kNumChannels| | 
|  119 //                          = log10(energy in frequency band) |  119 //                          = log10(energy in frequency band) | 
|  120 // - total_power    [i]   : Total power in audio frame. |  120 // - total_power    [i]   : Total power in audio frame. | 
|  121 // - frame_length   [i]   : Number of input samples |  121 // - frame_length   [i]   : Number of input samples | 
|  122 // |  122 // | 
|  123 // - returns              : the VAD decision (0 - noise, 1 - speech). |  123 // - returns              : the VAD decision (0 - noise, 1 - speech). | 
|  124 static int16_t GmmProbability(VadInstT* self, int16_t* features, |  124 static int16_t GmmProbability(VadInstT* self, int16_t* features, | 
|  125                               int16_t total_power, int frame_length) { |  125                               int16_t total_power, size_t frame_length) { | 
|  126   int channel, k; |  126   int channel, k; | 
|  127   int16_t feature_minimum; |  127   int16_t feature_minimum; | 
|  128   int16_t h0, h1; |  128   int16_t h0, h1; | 
|  129   int16_t log_likelihood_ratio; |  129   int16_t log_likelihood_ratio; | 
|  130   int16_t vadflag = 0; |  130   int16_t vadflag = 0; | 
|  131   int16_t shifts_h0, shifts_h1; |  131   int16_t shifts_h0, shifts_h1; | 
|  132   int16_t tmp_s16, tmp1_s16, tmp2_s16; |  132   int16_t tmp_s16, tmp1_s16, tmp2_s16; | 
|  133   int16_t diff; |  133   int16_t diff; | 
|  134   int gaussian; |  134   int gaussian; | 
|  135   int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; |  135   int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; | 
| (...skipping 453 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  589       break; |  589       break; | 
|  590   } |  590   } | 
|  591  |  591  | 
|  592   return return_value; |  592   return return_value; | 
|  593 } |  593 } | 
|  594  |  594  | 
|  595 // Calculate VAD decision by first extracting feature values and then calculate |  595 // Calculate VAD decision by first extracting feature values and then calculate | 
|  596 // probability for both speech and background noise. |  596 // probability for both speech and background noise. | 
|  597  |  597  | 
|  598 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, |  598 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, | 
|  599                            int frame_length) { |  599                            size_t frame_length) { | 
|  600   int vad; |  600   int vad; | 
|  601   int i; |  601   size_t i; | 
|  602   int16_t speech_nb[240];  // 30 ms in 8 kHz. |  602   int16_t speech_nb[240];  // 30 ms in 8 kHz. | 
|  603   // |tmp_mem| is a temporary memory used by resample function, length is |  603   // |tmp_mem| is a temporary memory used by resample function, length is | 
|  604   // frame length in 10 ms (480 samples) + 256 extra. |  604   // frame length in 10 ms (480 samples) + 256 extra. | 
|  605   int32_t tmp_mem[480 + 256] = { 0 }; |  605   int32_t tmp_mem[480 + 256] = { 0 }; | 
|  606   const int kFrameLen10ms48khz = 480; |  606   const size_t kFrameLen10ms48khz = 480; | 
|  607   const int kFrameLen10ms8khz = 80; |  607   const size_t kFrameLen10ms8khz = 80; | 
|  608   int num_10ms_frames = frame_length / kFrameLen10ms48khz; |  608   size_t num_10ms_frames = frame_length / kFrameLen10ms48khz; | 
|  609  |  609  | 
|  610   for (i = 0; i < num_10ms_frames; i++) { |  610   for (i = 0; i < num_10ms_frames; i++) { | 
|  611     WebRtcSpl_Resample48khzTo8khz(speech_frame, |  611     WebRtcSpl_Resample48khzTo8khz(speech_frame, | 
|  612                                   &speech_nb[i * kFrameLen10ms8khz], |  612                                   &speech_nb[i * kFrameLen10ms8khz], | 
|  613                                   &inst->state_48_to_8, |  613                                   &inst->state_48_to_8, | 
|  614                                   tmp_mem); |  614                                   tmp_mem); | 
|  615   } |  615   } | 
|  616  |  616  | 
|  617   // Do VAD on an 8 kHz signal |  617   // Do VAD on an 8 kHz signal | 
|  618   vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); |  618   vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); | 
|  619  |  619  | 
|  620   return vad; |  620   return vad; | 
|  621 } |  621 } | 
|  622  |  622  | 
|  623 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, |  623 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, | 
|  624                            int frame_length) |  624                            size_t frame_length) | 
|  625 { |  625 { | 
|  626     int len, vad; |  626     size_t len; | 
 |  627     int vad; | 
|  627     int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB
     ) |  628     int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB
     ) | 
|  628     int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) |  629     int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) | 
|  629  |  630  | 
|  630  |  631  | 
|  631     // Downsample signal 32->16->8 before doing VAD |  632     // Downsample signal 32->16->8 before doing VAD | 
|  632     WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_s
     tates[2]), |  633     WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_s
     tates[2]), | 
|  633                            frame_length); |  634                            frame_length); | 
|  634     len = frame_length / 2; |  635     len = frame_length / 2; | 
|  635  |  636  | 
|  636     WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states,
      len); |  637     WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states,
      len); | 
|  637     len /= 2; |  638     len /= 2; | 
|  638  |  639  | 
|  639     // Do VAD on an 8 kHz signal |  640     // Do VAD on an 8 kHz signal | 
|  640     vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); |  641     vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); | 
|  641  |  642  | 
|  642     return vad; |  643     return vad; | 
|  643 } |  644 } | 
|  644  |  645  | 
|  645 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, |  646 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, | 
|  646                            int frame_length) |  647                            size_t frame_length) | 
|  647 { |  648 { | 
|  648     int len, vad; |  649     size_t len; | 
 |  650     int vad; | 
|  649     int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) |  651     int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) | 
|  650  |  652  | 
|  651     // Wideband: Downsample signal before doing VAD |  653     // Wideband: Downsample signal before doing VAD | 
|  652     WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_sta
     tes, |  654     WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_sta
     tes, | 
|  653                            frame_length); |  655                            frame_length); | 
|  654  |  656  | 
|  655     len = frame_length / 2; |  657     len = frame_length / 2; | 
|  656     vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); |  658     vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); | 
|  657  |  659  | 
|  658     return vad; |  660     return vad; | 
|  659 } |  661 } | 
|  660  |  662  | 
|  661 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, |  663 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, | 
|  662                           int frame_length) |  664                           size_t frame_length) | 
|  663 { |  665 { | 
|  664     int16_t feature_vector[kNumChannels], total_power; |  666     int16_t feature_vector[kNumChannels], total_power; | 
|  665  |  667  | 
|  666     // Get power in the bands |  668     // Get power in the bands | 
|  667     total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, |  669     total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, | 
|  668                                               feature_vector); |  670                                               feature_vector); | 
|  669  |  671  | 
|  670     // Make a VAD |  672     // Make a VAD | 
|  671     inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); |  673     inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); | 
|  672  |  674  | 
|  673     return inst->vad; |  675     return inst->vad; | 
|  674 } |  676 } | 
| OLD | NEW |