| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 11 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
| 12 | 12 |
| 13 #include <arm_neon.h> | 13 #include <arm_neon.h> |
| 14 | 14 |
| 15 static inline void DotProductWithScaleNeon(int32_t* cross_correlation, | 15 static inline void DotProductWithScaleNeon(int32_t* cross_correlation, |
| 16 const int16_t* vector1, | 16 const int16_t* vector1, |
| 17 const int16_t* vector2, | 17 const int16_t* vector2, |
| 18 int length, | 18 size_t length, |
| 19 int scaling) { | 19 int scaling) { |
| 20 int i = 0; | 20 size_t i = 0; |
| 21 int len1 = length >> 3; | 21 size_t len1 = length >> 3; |
| 22 int len2 = length & 7; | 22 size_t len2 = length & 7; |
| 23 int64x2_t sum0 = vdupq_n_s64(0); | 23 int64x2_t sum0 = vdupq_n_s64(0); |
| 24 int64x2_t sum1 = vdupq_n_s64(0); | 24 int64x2_t sum1 = vdupq_n_s64(0); |
| 25 | 25 |
| 26 if (length < 0) { | |
| 27 *cross_correlation = 0; | |
| 28 return; | |
| 29 } | |
| 30 | |
| 31 for (i = len1; i > 0; i -= 1) { | 26 for (i = len1; i > 0; i -= 1) { |
| 32 int16x8_t seq1_16x8 = vld1q_s16(vector1); | 27 int16x8_t seq1_16x8 = vld1q_s16(vector1); |
| 33 int16x8_t seq2_16x8 = vld1q_s16(vector2); | 28 int16x8_t seq2_16x8 = vld1q_s16(vector2); |
| 34 #if defined(WEBRTC_ARCH_ARM64) | 29 #if defined(WEBRTC_ARCH_ARM64) |
| 35 int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), | 30 int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), |
| 36 vget_low_s16(seq2_16x8)); | 31 vget_low_s16(seq2_16x8)); |
| 37 int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8); | 32 int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8); |
| 38 #else | 33 #else |
| 39 int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), | 34 int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), |
| 40 vget_low_s16(seq2_16x8)); | 35 vget_low_s16(seq2_16x8)); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 65 sum2 = vadd_s64(sum2, vdup_n_s64(sum_res)); | 60 sum2 = vadd_s64(sum2, vdup_n_s64(sum_res)); |
| 66 sum2 = vshl_s64(sum2, shift); | 61 sum2 = vshl_s64(sum2, shift); |
| 67 vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0); | 62 vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0); |
| 68 #endif | 63 #endif |
| 69 } | 64 } |
| 70 | 65 |
| 71 /* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */ | 66 /* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */ |
| 72 void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, | 67 void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, |
| 73 const int16_t* seq1, | 68 const int16_t* seq1, |
| 74 const int16_t* seq2, | 69 const int16_t* seq2, |
| 75 int16_t dim_seq, | 70 size_t dim_seq, |
| 76 int16_t dim_cross_correlation, | 71 size_t dim_cross_correlation, |
| 77 int right_shifts, | 72 int right_shifts, |
| 78 int step_seq2) { | 73 int step_seq2) { |
| 79 int i = 0; | 74 size_t i = 0; |
| 80 | 75 |
| 81 for (i = 0; i < dim_cross_correlation; i++) { | 76 for (i = 0; i < dim_cross_correlation; i++) { |
| 82 const int16_t* seq1_ptr = seq1; | 77 const int16_t* seq1_ptr = seq1; |
| 83 const int16_t* seq2_ptr = seq2 + (step_seq2 * i); | 78 const int16_t* seq2_ptr = seq2 + (step_seq2 * i); |
| 84 | 79 |
| 85 DotProductWithScaleNeon(cross_correlation, | 80 DotProductWithScaleNeon(cross_correlation, |
| 86 seq1_ptr, | 81 seq1_ptr, |
| 87 seq2_ptr, | 82 seq2_ptr, |
| 88 dim_seq, | 83 dim_seq, |
| 89 right_shifts); | 84 right_shifts); |
| 90 cross_correlation++; | 85 cross_correlation++; |
| 91 } | 86 } |
| 92 } | 87 } |
| OLD | NEW |