OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h" | 11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h" |
12 | 12 |
13 #include <arm_neon.h> | 13 #include <arm_neon.h> |
14 #include <assert.h> | |
15 | 14 |
| 15 #include "webrtc/base/checks.h" |
16 #include "webrtc/common_audio/signal_processing/include/real_fft.h" | 16 #include "webrtc/common_audio/signal_processing/include/real_fft.h" |
17 | 17 |
18 // TODO(kma): Re-write the corresponding assembly file, the offset | 18 // TODO(kma): Re-write the corresponding assembly file, the offset |
19 // generating script and makefile, to replace these C functions. | 19 // generating script and makefile, to replace these C functions. |
20 | 20 |
21 static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { | 21 static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { |
22 #if defined(WEBRTC_ARCH_ARM64) | 22 #if defined(WEBRTC_ARCH_ARM64) |
23 *(ptr) = vaddvq_u32(v); | 23 *(ptr) = vaddvq_u32(v); |
24 #else | 24 #else |
25 uint32x2_t tmp_v; | 25 uint32x2_t tmp_v; |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
97 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], | 97 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], |
98 far_spectrum[PART_LEN]); | 98 far_spectrum[PART_LEN]); |
99 *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; | 99 *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; |
100 *far_energy += (uint32_t)far_spectrum[PART_LEN]; | 100 *far_energy += (uint32_t)far_spectrum[PART_LEN]; |
101 *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; | 101 *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; |
102 } | 102 } |
103 | 103 |
104 void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, | 104 void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, |
105 const uint16_t* far_spectrum, | 105 const uint16_t* far_spectrum, |
106 int32_t* echo_est) { | 106 int32_t* echo_est) { |
107 assert((uintptr_t)echo_est % 32 == 0); | 107 RTC_DCHECK_EQ(0u, (uintptr_t)echo_est % 32); |
108 assert((uintptr_t)(aecm->channelStored) % 16 == 0); | 108 RTC_DCHECK_EQ(0u, (uintptr_t)aecm->channelStored % 16); |
109 assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0); | 109 RTC_DCHECK_EQ(0u, (uintptr_t)aecm->channelAdapt16 % 16); |
110 | 110 |
111 // This is C code of following optimized code. | 111 // This is C code of following optimized code. |
112 // During startup we store the channel every block. | 112 // During startup we store the channel every block. |
113 // memcpy(aecm->channelStored, | 113 // memcpy(aecm->channelStored, |
114 // aecm->channelAdapt16, | 114 // aecm->channelAdapt16, |
115 // sizeof(int16_t) * PART_LEN1); | 115 // sizeof(int16_t) * PART_LEN1); |
116 // Recalculate echo estimate | 116 // Recalculate echo estimate |
117 // for (i = 0; i < PART_LEN; i += 4) { | 117 // for (i = 0; i < PART_LEN; i += 4) { |
118 // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], | 118 // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], |
119 // far_spectrum[i]); | 119 // far_spectrum[i]); |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
154 start_adapt_p += 8; | 154 start_adapt_p += 8; |
155 start_stored_p += 8; | 155 start_stored_p += 8; |
156 echo_est_p += 8; | 156 echo_est_p += 8; |
157 } | 157 } |
158 aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; | 158 aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; |
159 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], | 159 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], |
160 far_spectrum[PART_LEN]); | 160 far_spectrum[PART_LEN]); |
161 } | 161 } |
162 | 162 |
163 void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { | 163 void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { |
164 assert((uintptr_t)(aecm->channelStored) % 16 == 0); | 164 RTC_DCHECK_EQ(0u, (uintptr_t)aecm->channelStored % 16); |
165 assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0); | 165 RTC_DCHECK_EQ(0u, (uintptr_t)aecm->channelAdapt16 % 16); |
166 assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0); | 166 RTC_DCHECK_EQ(0u, (uintptr_t)aecm->channelAdapt32 % 32); |
167 | 167 |
168 // The C code of following optimized code. | 168 // The C code of following optimized code. |
169 // for (i = 0; i < PART_LEN1; i++) { | 169 // for (i = 0; i < PART_LEN1; i++) { |
170 // aecm->channelAdapt16[i] = aecm->channelStored[i]; | 170 // aecm->channelAdapt16[i] = aecm->channelStored[i]; |
171 // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( | 171 // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( |
172 // (int32_t)aecm->channelStored[i], 16); | 172 // (int32_t)aecm->channelStored[i], 16); |
173 // } | 173 // } |
174 | 174 |
175 int16_t* start_stored_p = aecm->channelStored; | 175 int16_t* start_stored_p = aecm->channelStored; |
176 int16_t* start_adapt16_p = aecm->channelAdapt16; | 176 int16_t* start_adapt16_p = aecm->channelAdapt16; |
(...skipping 13 matching lines...) Expand all Loading... |
190 vst1q_s32(start_adapt32_p, adapt32_v_low); | 190 vst1q_s32(start_adapt32_p, adapt32_v_low); |
191 vst1q_s32(start_adapt32_p + 4, adapt32_v_high); | 191 vst1q_s32(start_adapt32_p + 4, adapt32_v_high); |
192 | 192 |
193 start_stored_p += 8; | 193 start_stored_p += 8; |
194 start_adapt16_p += 8; | 194 start_adapt16_p += 8; |
195 start_adapt32_p += 8; | 195 start_adapt32_p += 8; |
196 } | 196 } |
197 aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; | 197 aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; |
198 aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; | 198 aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; |
199 } | 199 } |
OLD | NEW |