webrtc/common_audio/fir_filter_neon.cc - Issue 2535643002: Replace some asserts with DCHECKs

Side by Side Diff: webrtc/common_audio/fir_filter_neon.cc

Issue 2535643002: Replace some asserts with DCHECKs (Closed)

Patch Set: Don't use the enum hack Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/common_audio/fir_filter.cc ('k') | webrtc/common_audio/fir_filter_sse.cc » ('j') | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/common_audio/fir_filter_neon.h"	11 #include "webrtc/common_audio/fir_filter_neon.h"

12	12

13 #include <arm_neon.h>	13 #include <arm_neon.h>

14 #include <assert.h>

15 #include <string.h>	14 #include <string.h>

16	15

	16 #include "webrtc/base/checks.h"

17 #include "webrtc/system_wrappers/include/aligned_malloc.h"	17 #include "webrtc/system_wrappers/include/aligned_malloc.h"

18	18

19 namespace webrtc {	19 namespace webrtc {

20	20

21 FIRFilterNEON::FIRFilterNEON(const float* coefficients,	21 FIRFilterNEON::FIRFilterNEON(const float* coefficients,

22 size_t coefficients_length,	22 size_t coefficients_length,

23 size_t max_input_length)	23 size_t max_input_length)

24 : // Closest higher multiple of four.	24 : // Closest higher multiple of four.

25 coefficients_length_((coefficients_length + 3) & ~0x03),	25 coefficients_length_((coefficients_length + 3) & ~0x03),

26 state_length_(coefficients_length_ - 1),	26 state_length_(coefficients_length_ - 1),

27 coefficients_(static_cast<float*>(	27 coefficients_(static_cast<float*>(

28 AlignedMalloc(sizeof(float) * coefficients_length_, 16))),	28 AlignedMalloc(sizeof(float) * coefficients_length_, 16))),

29 state_(static_cast<float*>(	29 state_(static_cast<float*>(

30 AlignedMalloc(sizeof(float) * (max_input_length + state_length_),	30 AlignedMalloc(sizeof(float) * (max_input_length + state_length_),

31 16))) {	31 16))) {

32 // Add zeros at the end of the coefficients.	32 // Add zeros at the end of the coefficients.

33 size_t padding = coefficients_length_ - coefficients_length;	33 size_t padding = coefficients_length_ - coefficients_length;

34 memset(coefficients_.get(), 0.f, padding * sizeof(coefficients_[0]));	34 memset(coefficients_.get(), 0.f, padding * sizeof(coefficients_[0]));

35 // The coefficients are reversed to compensate for the order in which the	35 // The coefficients are reversed to compensate for the order in which the

36 // input samples are acquired (most recent last).	36 // input samples are acquired (most recent last).

37 for (size_t i = 0; i < coefficients_length; ++i) {	37 for (size_t i = 0; i < coefficients_length; ++i) {

38 coefficients_[i + padding] = coefficients[coefficients_length - i - 1];	38 coefficients_[i + padding] = coefficients[coefficients_length - i - 1];

39 }	39 }

40 memset(state_.get(),	40 memset(state_.get(),

41 0.f,	41 0.f,

42 (max_input_length + state_length_) * sizeof(state_[0]));	42 (max_input_length + state_length_) * sizeof(state_[0]));

43 }	43 }

44	44

45 void FIRFilterNEON::Filter(const float* in, size_t length, float* out) {	45 void FIRFilterNEON::Filter(const float* in, size_t length, float* out) {

46 assert(length > 0);	46 RTC_DCHECK_GT(length, 0);

47	47

48 memcpy(&state_[state_length_], in, length * sizeof(*in));	48 memcpy(&state_[state_length_], in, length * sizeof(*in));

49	49

50 // Convolves the input signal \|in\| with the filter kernel \|coefficients_\|	50 // Convolves the input signal \|in\| with the filter kernel \|coefficients_\|

51 // taking into account the previous state.	51 // taking into account the previous state.

52 for (size_t i = 0; i < length; ++i) {	52 for (size_t i = 0; i < length; ++i) {

53 float* in_ptr = &state_[i];	53 float* in_ptr = &state_[i];

54 float* coef_ptr = coefficients_.get();	54 float* coef_ptr = coefficients_.get();

55	55

56 float32x4_t m_sum = vmovq_n_f32(0);	56 float32x4_t m_sum = vmovq_n_f32(0);

57 float32x4_t m_in;	57 float32x4_t m_in;

58	58

59 for (size_t j = 0; j < coefficients_length_; j += 4) {	59 for (size_t j = 0; j < coefficients_length_; j += 4) {

60 m_in = vld1q_f32(in_ptr + j);	60 m_in = vld1q_f32(in_ptr + j);

61 m_sum = vmlaq_f32(m_sum, m_in, vld1q_f32(coef_ptr + j));	61 m_sum = vmlaq_f32(m_sum, m_in, vld1q_f32(coef_ptr + j));

62 }	62 }

63	63

64 float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum));	64 float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum));

65 out[i] = vget_lane_f32(vpadd_f32(m_half, m_half), 0);	65 out[i] = vget_lane_f32(vpadd_f32(m_half, m_half), 0);

66 }	66 }

67	67

68 // Update current state.	68 // Update current state.

69 memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0]));	69 memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0]));

70 }	70 }

71	71

72 } // namespace webrtc	72 } // namespace webrtc

OLD	NEW