Skip to content

Commit 7c91c4e

Browse files
Audio: aec: optimize acoustic echo cancellation processing
This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry <malladi.sastry@intel.com>
1 parent 3681e09 commit 7c91c4e

2 files changed

Lines changed: 274 additions & 87 deletions

File tree

src/audio/google/google_rtc_audio_processing.c

Lines changed: 207 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -791,8 +791,6 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
791791
size_t dst_buf_size;
792792

793793
size_t num_of_bytes_to_process;
794-
size_t channel;
795-
size_t buffer_offset;
796794

797795
struct sof_source *ref_stream, *src_stream;
798796
struct sof_sink *dst_stream;
@@ -822,23 +820,58 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
822820
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
823821
* 16int: linearize buffer, skip channels if > Max
824822
*/
825-
buffer_offset = 0;
826-
for (int i = 0; i < cd->num_frames; i++) {
827-
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
823+
/* Reduce cycle waste by streamlining the inner loop,
824+
* converting from array indexing to pointer arithmetic,
825+
* and putting data copy verification outside the loop.
826+
*/
827+
const int16_t *ref_data_end = ref + cd->num_frames * cd->num_aec_reference_channels;
828+
829+
/* Check that ref is within the valid range of the ref_buf buffer */
830+
if (!ref || ref < (int16_t *)ref_buf_start || ref >= (int16_t *)ref_buf_end) {
831+
/* ref does not point to valid int16_t data,
832+
* return -EINVAL immediately to indicate an invalid argument was passed
833+
*/
834+
return -EINVAL;
835+
}
836+
828837
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
829-
cd->aec_reference_buffer_ptrs[channel][i] =
830-
convert_int16_to_float(ref[channel]);
838+
float **ref_ptr = cd->aec_reference_buffer_ptrs;
839+
int s_chan;
840+
int i;
841+
842+
/* Loop over frames and channels, converting data from int16 to float */
843+
for (i = 0; i < cd->num_frames; ++i) {
844+
for (s_chan = 0; s_chan < cd->num_aec_reference_channels; ++s_chan) {
845+
(*ref_ptr)[s_chan] = convert_int16_to_float(*ref++);
846+
ref_ptr++;
847+
}
848+
}
849+
831850
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
832-
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
833-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
851+
int16_t *ref_buf = cd->aec_reference_buffer;
852+
size_t sizeofrefbuffer = sizeof(cd->aec_reference_buffer);
834853

835-
}
854+
/* Use memcpy to copy the data from ref buffer to ref_buf buffer until it reaches
855+
* ref_data_end
856+
* This assumes that the data in the ref buffer is contiguous
857+
*/
858+
size_t num_bytes = (ref_data_end - ref) * sizeof(*ref);
836859

837-
ref += cd->num_aec_reference_channels;
838-
if ((void *)ref >= (void *)ref_buf_end)
839-
ref = (void *)ref_buf_start;
860+
if (num_bytes > sizeofrefbuffer) {
861+
/* Handle the error: the source data is too large to fit in the
862+
* destination buffer
863+
*/
864+
return -EINVAL;
840865
}
841866

867+
memcpy(ref_buf, ref, num_bytes);
868+
869+
/* Update the ref and ref_buf pointers */
870+
ref = ref_data_end;
871+
ref_buf += (ref_data_end - ref);
872+
873+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
874+
842875
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
843876
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
844877
(const float **)
@@ -856,23 +889,64 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
856889
assert(!ret);
857890
src_buf_end = src_buf_start + src_buf_size;
858891

859-
buffer_offset = 0;
860-
for (int i = 0; i < cd->num_frames; i++) {
861-
for (channel = 0; channel < cd->num_capture_channels; channel++)
892+
/* The second optimization eliminates the inner loop
893+
* and replaces it with pointer arithmetic for speedier access.
894+
* To reduce cycle waste, the data copy check is moved outside of the loop.
895+
*/
896+
const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;
897+
898+
/* Check if the calculated end of the source buffer exceeds the actual end of the buffer */
899+
src_end = (int16_t *)cir_buf_wrap((void *)src_end,
900+
(void *)src_buf_start, (void *)src_buf_end);
901+
862902
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
863-
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
903+
/* Declare a pointer to the process buffer */
904+
float **proc_ptr = cd->process_buffer_ptrs;
905+
906+
/* Check for null pointers and buffer overflows */
907+
if (!src || !proc_ptr || src >= (const int16_t *)src_end)
908+
/* If there's an error, return -EINVAL immediately to indicate an
909+
* invalid argument was passed
910+
*/
911+
return -EINVAL;
912+
913+
/* If there's no error, continue processing */
914+
while (src != (const int16_t *)src_end) {
915+
/* If the source pointer has reached or exceeded the end of the source
916+
* buffer, wrap it back to the start
917+
*/
918+
src = (int16_t *)cir_buf_wrap((void *)src,
919+
(void *)src_buf_start, (void *)src_buf_end);
920+
/* Convert the source data from int16_t to float and store it in the
921+
* process buffer
922+
*/
923+
*proc_ptr++ = convert_int16_to_float(src++);
924+
}
925+
864926
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
865-
cd->process_buffer[buffer_offset++] = src[channel];
866-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
927+
/* Declare a pointer to the process buffer */
928+
int16_t *proc_buf = cd->process_buffer;
867929

868-
/* move pointer to next frame
869-
* number of incoming channels may be < cd->num_capture_channels
930+
/* Check for null pointers and buffer overflows */
931+
if (!src || !proc_buf || src >= (int16_t *)src_end)
932+
/* If there's an error, return -EINVAL immediately to indicate an
933+
* invalid argument was passed
870934
*/
871-
src += cd->config.output_fmt.channels_count;
872-
if ((void *)src >= (void *)src_buf_end)
873-
src = (void *)src_buf_start;
935+
return -EINVAL;
936+
937+
/* If there's no error, continue processing */
938+
while (src != (int16_t *)src_end) {
939+
/* If the source pointer has reached or exceeded the end of the source
940+
* buffer, wrap it back to the start
941+
*/
942+
src = (int16_t *)cir_buf_wrap((void *)src,
943+
(void *)src_buf_start, (void *)src_buf_end);
944+
/* Copy the source data to the process buffer */
945+
*proc_buf++ = *src++;
874946
}
875947

948+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
949+
876950
source_release_data(src_stream, num_of_bytes_to_process);
877951

878952
/* call the library, use same in/out buffers */
@@ -893,26 +967,47 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
893967
dst_buf_end = dst_buf_start + dst_buf_size;
894968

895969
/* process all channels in output stream */
896-
buffer_offset = 0;
897-
for (int i = 0; i < cd->num_frames; i++) {
898-
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
899-
/* set data in processed channels, zeroize not processed */
900-
if (channel < cd->num_capture_channels)
970+
/* Calculate the end of the destination buffer based on the number of frames and
971+
* channels
972+
*/
973+
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;
974+
975+
/* Check if the calculated end of the destination buffer exceeds the actual end
976+
* of the buffer
977+
*/
978+
dst_end = (int16_t *)cir_buf_wrap((void *)dst_end,
979+
(void *)dst_buf_start, (void *)dst_buf_end);
980+
901981
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
902-
dst[channel] = convert_float_to_int16(
903-
cd->process_buffer_ptrs[channel][i]);
982+
float **proc_ptr = cd->process_buffer_ptrs;
983+
984+
/* Check for null pointers and buffer overflows */
985+
if (!dst || !proc_ptr || dst >= dst_end || *proc_ptr >= *proc_ptr + cd->num_frames)
986+
/* If there's an error, return -EINVAL immediately to indicate an
987+
* invalid argument was passed
988+
*/
989+
return -EINVAL;
990+
991+
/* Convert data from float to int16_t and store it in the destination buffer */
992+
for (; dst != dst_end; ++dst, ++proc_ptr)
993+
*dst = convert_float_to_int16(*proc_ptr);
994+
904995
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
905-
dst[channel] = cd->process_buffer[buffer_offset++];
906-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
907-
else
908-
dst[channel] = 0;
909-
}
996+
int16_t *process_buffer = cd->process_buffer;
910997

911-
dst += cd->config.output_fmt.channels_count;
912-
if ((void *)dst >= (void *)dst_buf_end)
913-
dst = (void *)dst_buf_start;
914-
}
998+
/* Check for null pointers and buffer overflows */
999+
if (!dst || !process_buffer || dst >= dst_end ||
1000+
process_buffer >= process_buffer + cd->num_frames)
1001+
/* If there's an error, return -EINVAL immediately to indicate an
1002+
* invalid argument was passed
1003+
*/
1004+
return -EINVAL;
9151005

1006+
/* Copy the data from the process buffer to the destination buffer */
1007+
for (; dst != dst_end; ++dst, ++process_buffer)
1008+
*dst = *process_buffer;
1009+
1010+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
9161011
sink_commit_buffer(dst_stream, num_of_bytes_to_process);
9171012

9181013
return 0;
@@ -928,6 +1023,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9281023
int16_t *src, *dst, *ref;
9291024
uint32_t num_aec_reference_frames;
9301025
uint32_t num_aec_reference_bytes;
1026+
int ref_channels;
1027+
int aec_ref_product;
9311028
int num_samples_remaining;
9321029
int num_frames_remaining;
9331030
int channel;
@@ -950,25 +1047,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9501047
ref_stream = ref_streamb->data;
9511048
ref = audio_stream_get_rptr(ref_stream);
9521049

1050+
/* Pre-calculate the number of channels in the reference stream for efficiency */
1051+
ref_channels = audio_stream_get_channels(ref_stream);
1052+
1053+
/* Pre-calculate the product of the number of AEC reference channels and the AEC
1054+
* reference frame index
1055+
*/
1056+
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1057+
9531058
num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
9541059
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
9551060

956-
num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
1061+
num_samples_remaining = num_aec_reference_frames * ref_channels;
9571062
while (num_samples_remaining) {
9581063
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
9591064
n = MIN(num_samples_remaining, nmax);
9601065
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
961-
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1066+
j = aec_ref_product;
9621067
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
9631068
cd->aec_reference_buffer[j++] = ref[channel];
964-
965-
ref += audio_stream_get_channels(ref_stream);
1069+
ref += ref_channels;
9661070
++cd->aec_reference_frame_index;
967-
9681071
if (cd->aec_reference_frame_index == cd->num_frames) {
9691072
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
970-
cd->aec_reference_buffer);
1073+
cd->aec_reference_buffer);
9711074
cd->aec_reference_frame_index = 0;
1075+
/* Reset the product as the frame index is reset */
1076+
aec_ref_product = 0;
9721077
}
9731078
}
9741079
num_samples_remaining -= n;
@@ -984,6 +1089,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9841089
src = audio_stream_get_rptr(mic_stream);
9851090
dst = audio_stream_get_wptr(out_stream);
9861091

1092+
/* Move out of loop */
1093+
int mic_stream_channels = audio_stream_get_channels(mic_stream);
9871094
frames = input_buffers[cd->raw_microphone_source].size;
9881095
num_frames_remaining = frames;
9891096

@@ -993,34 +1100,66 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9931100
nmax = audio_stream_frames_without_wrap(out_stream, dst);
9941101
n = MIN(n, nmax);
9951102
for (i = 0; i < n; i++) {
996-
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
997-
cd->num_capture_channels]),
998-
cd->num_frames * cd->num_capture_channels *
999-
sizeof(cd->raw_mic_buffer[0]), src,
1000-
sizeof(int16_t) * cd->num_capture_channels);
1001-
++cd->raw_mic_buffer_frame_index;
1002-
1003-
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
1004-
sizeof(cd->output_buffer[0]),
1005-
&(cd->output_buffer[cd->output_buffer_frame_index *
1006-
cd->num_capture_channels]),
1007-
sizeof(int16_t) * cd->num_capture_channels);
1008-
++cd->output_buffer_frame_index;
1009-
1010-
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1011-
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1012-
cd->raw_mic_buffer,
1013-
cd->output_buffer);
1014-
cd->output_buffer_frame_index = 0;
1015-
cd->raw_mic_buffer_frame_index = 0;
1103+
/* If we haven't filled the buffer yet, copy the data */
1104+
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
1105+
size_t num_bytes = sizeof(int16_t) * cd->num_capture_channels;
1106+
size_t buffer_size = sizeof(cd->raw_mic_buffer);
1107+
size_t frame_index = cd->raw_mic_buffer_frame_index;
1108+
size_t buffer_used = frame_index * sizeof(int16_t);
1109+
size_t buffer_remaining = buffer_size - buffer_used;
1110+
1111+
if (num_bytes <= buffer_remaining) {
1112+
int16_t *buffer_start = cd->raw_mic_buffer;
1113+
size_t offset = frame_index * cd->num_capture_channels;
1114+
1115+
buffer_start += offset;
1116+
memcpy(buffer_start, src, num_bytes);
1117+
++cd->raw_mic_buffer_frame_index;
1118+
} else {
1119+
/* The source data is too big to fit in the
1120+
* destination buffer.
1121+
*/
1122+
return -EINVAL;
1123+
}
10161124
}
10171125

1018-
src += audio_stream_get_channels(mic_stream);
1019-
dst += audio_stream_get_channels(out_stream);
1126+
if (cd->output_buffer_frame_index < cd->num_frames) {
1127+
size_t num_bytes = sizeof(int16_t) * cd->num_capture_channels;
1128+
size_t buffer_size = sizeof(cd->output_buffer);
1129+
size_t frame_index = cd->output_buffer_frame_index;
1130+
size_t buffer_used = frame_index * sizeof(int16_t);
1131+
size_t buffer_remaining = buffer_size - buffer_used;
1132+
1133+
if (num_bytes <= buffer_remaining) {
1134+
int16_t *output_start = cd->output_buffer;
1135+
size_t offset = frame_index * cd->num_capture_channels;
1136+
1137+
output_start += offset;
1138+
memcpy(dst, output_start, num_bytes);
1139+
++cd->output_buffer_frame_index;
1140+
} else {
1141+
/* The source data is too big to fit in the
1142+
* destination buffer.
1143+
*/
1144+
return -EINVAL;
1145+
}
1146+
}
1147+
1148+
src += mic_stream_channels;
1149+
dst += mic_stream_channels;
10201150
}
10211151
num_frames_remaining -= n;
10221152
src = audio_stream_wrap(mic_stream, src);
10231153
dst = audio_stream_wrap(out_stream, dst);
1154+
1155+
/* If we've filled the buffer, process the data */
1156+
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1157+
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1158+
cd->raw_mic_buffer,
1159+
cd->output_buffer);
1160+
cd->output_buffer_frame_index = 0;
1161+
cd->raw_mic_buffer_frame_index = 0;
1162+
}
10241163
}
10251164

10261165
module_update_buffer_position(&input_buffers[cd->raw_microphone_source],

0 commit comments

Comments
 (0)