OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/ns/nsx_core.h" | 11 #include "webrtc/modules/audio_processing/ns/nsx_core.h" |
12 | 12 |
13 #include <arm_neon.h> | 13 #include <arm_neon.h> |
14 #include <assert.h> | 14 |
| 15 #include "webrtc/base/checks.h" |
15 | 16 |
16 // Constants to compensate for shifting signal log(2^shifts). | 17 // Constants to compensate for shifting signal log(2^shifts). |
17 const int16_t WebRtcNsx_kLogTable[9] = { | 18 const int16_t WebRtcNsx_kLogTable[9] = { |
18 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 | 19 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 |
19 }; | 20 }; |
20 | 21 |
21 const int16_t WebRtcNsx_kCounterDiv[201] = { | 22 const int16_t WebRtcNsx_kCounterDiv[201] = { |
22 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, | 23 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, |
23 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, | 24 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, |
24 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, | 25 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
137 int16_t* q_noise) { | 138 int16_t* q_noise) { |
138 int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; | 139 int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; |
139 int16_t countProd, delta, zeros, frac; | 140 int16_t countProd, delta, zeros, frac; |
140 int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; | 141 int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; |
141 const int16_t log2_const = 22713; | 142 const int16_t log2_const = 22713; |
142 const int16_t width_factor = 21845; | 143 const int16_t width_factor = 21845; |
143 | 144 |
144 size_t i, s, offset; | 145 size_t i, s, offset; |
145 | 146 |
146 tabind = inst->stages - inst->normData; | 147 tabind = inst->stages - inst->normData; |
147 assert(tabind < 9); | 148 RTC_DCHECK_LT(tabind, 9); |
148 assert(tabind > -9); | 149 RTC_DCHECK_GT(tabind, -9); |
149 if (tabind < 0) { | 150 if (tabind < 0) { |
150 logval = -WebRtcNsx_kLogTable[-tabind]; | 151 logval = -WebRtcNsx_kLogTable[-tabind]; |
151 } else { | 152 } else { |
152 logval = WebRtcNsx_kLogTable[tabind]; | 153 logval = WebRtcNsx_kLogTable[tabind]; |
153 } | 154 } |
154 | 155 |
155 int16x8_t logval_16x8 = vdupq_n_s16(logval); | 156 int16x8_t logval_16x8 = vdupq_n_s16(logval); |
156 | 157 |
157 // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) | 158 // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) |
158 // magn is in Q(-stages), and the real lmagn values are: | 159 // magn is in Q(-stages), and the real lmagn values are: |
159 // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) | 160 // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) |
160 // lmagn in Q8 | 161 // lmagn in Q8 |
161 for (i = 0; i < inst->magnLen; i++) { | 162 for (i = 0; i < inst->magnLen; i++) { |
162 if (magn[i]) { | 163 if (magn[i]) { |
163 zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); | 164 zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); |
164 frac = (int16_t)((((uint32_t)magn[i] << zeros) | 165 frac = (int16_t)((((uint32_t)magn[i] << zeros) |
165 & 0x7FFFFFFF) >> 23); | 166 & 0x7FFFFFFF) >> 23); |
166 assert(frac < 256); | 167 RTC_DCHECK_LT(frac, 256); |
167 // log2(magn(i)) | 168 // log2(magn(i)) |
168 log2 = (int16_t)(((31 - zeros) << 8) | 169 log2 = (int16_t)(((31 - zeros) << 8) |
169 + WebRtcNsx_kLogTableFrac[frac]); | 170 + WebRtcNsx_kLogTableFrac[frac]); |
170 // log2(magn(i))*log(2) | 171 // log2(magn(i))*log(2) |
171 lmagn[i] = (int16_t)((log2 * log2_const) >> 15); | 172 lmagn[i] = (int16_t)((log2 * log2_const) >> 15); |
172 // + log(2^stages) | 173 // + log(2^stages) |
173 lmagn[i] += logval; | 174 lmagn[i] += logval; |
174 } else { | 175 } else { |
175 lmagn[i] = logval; | 176 lmagn[i] = logval; |
176 } | 177 } |
177 } | 178 } |
178 | 179 |
179 int16x4_t Q3_16x4 = vdup_n_s16(3); | 180 int16x4_t Q3_16x4 = vdup_n_s16(3); |
180 int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); | 181 int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); |
181 int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); | 182 int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); |
182 | 183 |
183 int16_t factor = FACTOR_Q7; | 184 int16_t factor = FACTOR_Q7; |
184 if (inst->blockIndex < END_STARTUP_LONG) | 185 if (inst->blockIndex < END_STARTUP_LONG) |
185 factor = FACTOR_Q7_STARTUP; | 186 factor = FACTOR_Q7_STARTUP; |
186 | 187 |
187 // Loop over simultaneous estimates | 188 // Loop over simultaneous estimates |
188 for (s = 0; s < SIMULT; s++) { | 189 for (s = 0; s < SIMULT; s++) { |
189 offset = s * inst->magnLen; | 190 offset = s * inst->magnLen; |
190 | 191 |
191 // Get counter values from state | 192 // Get counter values from state |
192 counter = inst->noiseEstCounter[s]; | 193 counter = inst->noiseEstCounter[s]; |
193 assert(counter < 201); | 194 RTC_DCHECK_LT(counter, 201); |
194 countDiv = WebRtcNsx_kCounterDiv[counter]; | 195 countDiv = WebRtcNsx_kCounterDiv[counter]; |
195 countProd = (int16_t)(counter * countDiv); | 196 countProd = (int16_t)(counter * countDiv); |
196 | 197 |
197 // quant_est(...) | 198 // quant_est(...) |
198 int16_t deltaBuff[8]; | 199 int16_t deltaBuff[8]; |
199 int16x4_t tmp16x4_0; | 200 int16x4_t tmp16x4_0; |
200 int16x4_t tmp16x4_1; | 201 int16x4_t tmp16x4_1; |
201 int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); | 202 int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); |
202 int16x8_t countProd_16x8 = vdupq_n_s16(countProd); | 203 int16x8_t countProd_16x8 = vdupq_n_s16(countProd); |
203 int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv); | 204 int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv); |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
347 | 348 |
348 for (i = 0; i < inst->magnLen; i++) { | 349 for (i = 0; i < inst->magnLen; i++) { |
349 noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) | 350 noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) |
350 } | 351 } |
351 (*q_noise) = (int16_t)inst->qNoise; | 352 (*q_noise) = (int16_t)inst->qNoise; |
352 } | 353 } |
353 | 354 |
354 // Filter the data in the frequency domain, and create spectrum. | 355 // Filter the data in the frequency domain, and create spectrum. |
355 void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, | 356 void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, |
356 int16_t* freq_buf) { | 357 int16_t* freq_buf) { |
357 assert(inst->magnLen % 8 == 1); | 358 RTC_DCHECK_EQ(1, inst->magnLen % 8); |
358 assert(inst->anaLen2 % 16 == 0); | 359 RTC_DCHECK_EQ(0, inst->anaLen2 % 16); |
359 | 360 |
360 // (1) Filtering. | 361 // (1) Filtering. |
361 | 362 |
362 // Fixed point C code for the next block is as follows: | 363 // Fixed point C code for the next block is as follows: |
363 // for (i = 0; i < inst->magnLen; i++) { | 364 // for (i = 0; i < inst->magnLen; i++) { |
364 // inst->real[i] = (int16_t)((inst->real[i] * | 365 // inst->real[i] = (int16_t)((inst->real[i] * |
365 // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) | 366 // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) |
366 // inst->imag[i] = (int16_t)((inst->imag[i] * | 367 // inst->imag[i] = (int16_t)((inst->imag[i] * |
367 // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) | 368 // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) |
368 // } | 369 // } |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
438 } | 439 } |
439 freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; | 440 freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; |
440 freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; | 441 freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; |
441 } | 442 } |
442 | 443 |
443 // For the noise supress process, synthesis, read out fully processed segment, | 444 // For the noise supress process, synthesis, read out fully processed segment, |
444 // and update synthesis buffer. | 445 // and update synthesis buffer. |
445 void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, | 446 void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, |
446 int16_t* out_frame, | 447 int16_t* out_frame, |
447 int16_t gain_factor) { | 448 int16_t gain_factor) { |
448 assert(inst->anaLen % 16 == 0); | 449 RTC_DCHECK_EQ(0, inst->anaLen % 16); |
449 assert(inst->blockLen10ms % 16 == 0); | 450 RTC_DCHECK_EQ(0, inst->blockLen10ms % 16); |
450 | 451 |
451 int16_t* preal_start = inst->real; | 452 int16_t* preal_start = inst->real; |
452 const int16_t* pwindow = inst->window; | 453 const int16_t* pwindow = inst->window; |
453 int16_t* preal_end = preal_start + inst->anaLen; | 454 int16_t* preal_end = preal_start + inst->anaLen; |
454 int16_t* psynthesis_buffer = inst->synthesisBuffer; | 455 int16_t* psynthesis_buffer = inst->synthesisBuffer; |
455 | 456 |
456 while (preal_start < preal_end) { | 457 while (preal_start < preal_end) { |
457 // Loop unroll. | 458 // Loop unroll. |
458 int16x8_t window_0 = vld1q_s16(pwindow); | 459 int16x8_t window_0 = vld1q_s16(pwindow); |
459 int16x8_t real_0 = vld1q_s16(preal_start); | 460 int16x8_t real_0 = vld1q_s16(preal_start); |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
530 int16x8_t zero = vdupq_n_s16(0); | 531 int16x8_t zero = vdupq_n_s16(0); |
531 for (;p_start < p_end; p_start += 8) { | 532 for (;p_start < p_end; p_start += 8) { |
532 vst1q_s16(p_start, zero); | 533 vst1q_s16(p_start, zero); |
533 } | 534 } |
534 } | 535 } |
535 | 536 |
536 // Update analysis buffer for lower band, and window data before FFT. | 537 // Update analysis buffer for lower band, and window data before FFT. |
537 void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, | 538 void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, |
538 int16_t* out, | 539 int16_t* out, |
539 int16_t* new_speech) { | 540 int16_t* new_speech) { |
540 assert(inst->blockLen10ms % 16 == 0); | 541 RTC_DCHECK_EQ(0, inst->blockLen10ms % 16); |
541 assert(inst->anaLen % 16 == 0); | 542 RTC_DCHECK_EQ(0, inst->anaLen % 16); |
542 | 543 |
543 // For lower band update analysis buffer. | 544 // For lower band update analysis buffer. |
544 // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, | 545 // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, |
545 // (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); | 546 // (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); |
546 int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms; | 547 int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms; |
547 int16_t* p_end_src = inst->analysisBuffer + inst->anaLen; | 548 int16_t* p_end_src = inst->analysisBuffer + inst->anaLen; |
548 int16_t* p_start_dst = inst->analysisBuffer; | 549 int16_t* p_start_dst = inst->analysisBuffer; |
549 while (p_start_src < p_end_src) { | 550 while (p_start_src < p_end_src) { |
550 int16x8_t frame = vld1q_s16(p_start_src); | 551 int16x8_t frame = vld1q_s16(p_start_src); |
551 vst1q_s16(p_start_dst, frame); | 552 vst1q_s16(p_start_dst, frame); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
596 p_start_out += 8; | 597 p_start_out += 8; |
597 } | 598 } |
598 int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer)); | 599 int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer)); |
599 int32x4_t tmp32_high = vmull_s16(vget_high_s16(window), | 600 int32x4_t tmp32_high = vmull_s16(vget_high_s16(window), |
600 vget_high_s16(buffer)); | 601 vget_high_s16(buffer)); |
601 | 602 |
602 int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14); | 603 int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14); |
603 int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14); | 604 int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14); |
604 vst1q_s16(p_start_out, vcombine_s16(result_low, result_high)); | 605 vst1q_s16(p_start_out, vcombine_s16(result_low, result_high)); |
605 } | 606 } |
OLD | NEW |