Skip to content

Commit c069197

Browse files
ShriramShastrykv2019i
authored andcommitted
Math: Library: Add the hifi4 exponential function
The 32-bit HiFi4 exponential library function has an accuracy of 1e-4, a unit in last place error of 5.60032793, and output ranges from 0.0067379470 to 148.4131591026 for inputs from -5 to +5 (Q4.28) (Q9.23). Signed-off-by: ShriramShastry <malladi.sastry@intel.com>
1 parent 75e8f4b commit c069197

7 files changed

Lines changed: 262 additions & 20 deletions

File tree

src/include/sof/math/exp_fcn.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@
1010

1111
#include <stdint.h>
1212

13+
#if defined(__XCC__)
14+
/* HiFi */
15+
#include <xtensa/config/core-isa.h>
16+
#if XCHAL_HAVE_HIFI4 == 1
17+
#define SOFM_EXPONENTIAL_HIFI4 1
18+
#endif
19+
#else
20+
/* !XCC */
21+
#define EXPONENTIAL_GENERIC 1
22+
23+
#endif
24+
1325
int32_t sofm_exp_int32(int32_t x);
1426

1527
#endif

src/math/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ if(CONFIG_SQRT_FIXED)
1414
add_local_sources(sof sqrt_int16.c)
1515
endif()
1616

17-
if(CONFIG_EXP_FIXED)
18-
add_local_sources(sof exp_fcn.c)
17+
if(CONFIG_MATH_EXP)
18+
add_local_sources(sof exp_fcn.c exp_fcn_hifi4.c)
1919
endif()
2020

2121
if(CONFIG_MATH_DECIBELS)

src/math/Kconfig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ config SQRT_FIXED
3838
to calculate square root.square function having positive number
3939
y as input and return the positive number x multiplied by itself (squared)
4040

41-
config EXP_FIXED
41+
config MATH_EXP
4242
bool "Exponential functions"
4343
default n
4444
help
4545
By selecting this, the 32-bit sofm_exp_int32() function can be used to calculate
4646
exponential values. With a maximum ulp of 5, an exponential function with
47-
an input range of -5 to +5 y gives positive numbers between 0.00673794699908547 and
47+
an input range of -5 to +5 gives positive numbers between 0.00673794699908547 and
4848
148.413159102577. The precision of this function is 1e-4.
4949

5050
config NATURAL_LOGARITHM_FIXED

src/math/exp_fcn.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,21 @@
66
*
77
*/
88

9-
/* Include Files */
109
#include <sof/math/exp_fcn.h>
1110
#include <sof/math/numbers.h>
1211
#include <sof/common.h>
1312
#include <rtos/bit.h>
1413
#include <stdbool.h>
1514
#include <stdint.h>
1615

17-
#define BIT_MASK_Q62P2 0x4000000000000000LL
18-
#define CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884
19-
#define BIT_MASK_LOW_Q27P5 0x8000000
20-
#define QUOTIENT_SCALE BIT(30)
21-
#define TERMS_Q23P9 8388608
22-
#define LSHIFT_BITS 8192
16+
#if defined(EXPONENTIAL_GENERIC)
17+
18+
#define SOFM_BIT_MASK_Q62P2 0x4000000000000000LL
19+
#define SOFM_CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884
20+
#define SOFM_BIT_MASK_LOW_Q27P5 0x8000000
21+
#define SOFM_QUOTIENT_SCALE BIT(30)
22+
#define SOFM_TERMS_Q23P9 8388608
23+
#define SOFM_LSHIFT_BITS 8192
2324

2425
/* inv multiplication lookup table */
2526
/* LUT = ceil(1/factorial(b_n) * 2 ^ 63) */
@@ -152,7 +153,7 @@ static inline int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
152153
uint64_t u64_rlo;
153154

154155
mul_s64(a, b, &u64_rhi, &u64_rlo);
155-
const bool roundup = (u64_rlo & BIT_MASK_LOW_Q27P5) != 0;
156+
const bool roundup = (u64_rlo & SOFM_BIT_MASK_LOW_Q27P5) != 0;
156157

157158
u64_rlo = (u64_rhi << 36 | u64_rlo >> 28) + (roundup ? 1 : 0);
158159
return u64_rlo;
@@ -179,8 +180,8 @@ int32_t sofm_exp_int32(int32_t x)
179180
uint64_t ou0Lo;
180181
int64_t qt;
181182
int32_t b_n;
182-
int32_t ts = TERMS_Q23P9; /* Q23.9 */
183-
int64_t dividend = (x + LSHIFT_BITS) >> 14; /* x in Q50.14 */
183+
int32_t ts = SOFM_TERMS_Q23P9; /* Q23.9 */
184+
int64_t dividend = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */
184185
static const int32_t i_emin = -1342177280; /* Q4.28 */
185186
static const int32_t o_emin = 56601; /* Q9.23 */
186187
static const int32_t i_emax = 1342177280; /* Q4.28 */
@@ -195,22 +196,23 @@ int32_t sofm_exp_int32(int32_t x)
195196
return o_emax; /* 148.4131494760513306 in Q9.23 */
196197

197198
/* pre-computation of 1st & 2nd terms */
198-
mul_s64(dividend, BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
199+
mul_s64(dividend, SOFM_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
199200
qt = (ou0Hi << 46) | (ou0Lo >> 18);/* Q6.26 */
200-
ts += (int32_t)((qt >> 35) + ((qt & QUOTIENT_SCALE) >> 18));
201+
ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) >> 18));
201202
dividend = lomul_s64_sr_sat_near(dividend, x);
202203
for (b_n = 0; b_n < ARRAY_SIZE(exp_iv_ilookup); b_n++) {
203204
mul_s64(dividend, exp_iv_ilookup[b_n], &ou0Hi, &ou0Lo);
204205
qt = (ou0Hi << 45) | (ou0Lo >> 19);
205206

206207
/* sum of the remaining terms */
207-
ts += (int32_t)((qt >> 35) + ((qt & QUOTIENT_SCALE) ? 1 : 0));
208+
ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) ? 1 : 0));
208209
dividend = lomul_s64_sr_sat_near(dividend, x);
209210

210211
qt = ABS(qt);
211-
/* For inputs between -5 and 5, (qt < CONVERG_ERROR) is always true */
212-
if (qt < CONVERG_ERROR)
212+
/* For inputs between -5 and 5, (qt < SOFM_CONVERG_ERROR) is always true */
213+
if (qt < SOFM_CONVERG_ERROR)
213214
break;
214215
}
215216
return ts;
216217
}
218+
#endif

src/math/exp_fcn_hifi4.c

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
/*
3+
*Copyright(c) 2023 Intel Corporation. All rights reserved.
4+
*
5+
* Author: Shriram Shastry <malladi.sastry@linux.intel.com>
6+
*
7+
*/
8+
9+
#include <sof/common.h>
10+
#include <stdbool.h>
11+
#include <stdint.h>
12+
#include <stddef.h>
13+
14+
#if defined(SOFM_EXPONENTIAL_HIFI4)
15+
#include <xtensa/tie/xt_hifi4.h>
16+
17+
#define SOFM_BIT_MASK_LOW_Q27P5 0x0000000008000000
18+
#define SOFM_BIT_MASK_Q62P2 0x4000000000000000
19+
#define SOFM_CONVERG_ERROR 0x1A36E2EB4000LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */
20+
#define SOFM_QUOTIENT_SCALE 0x400000000
21+
#define SOFM_TERMS_Q23P9 0x800000
22+
#define SOFM_LSHIFT_BITS BIT(13)
23+
24+
/*
25+
* Arguments : int64_t in_0
26+
* int64_t in_1
27+
* uint64_t *ptroutbitshi
28+
* uint64_t *ptroutbitslo
29+
* Return Type : void
30+
* Description:Perform element-wise multiplication on in_0 and in_1
31+
* while keeping the required product word length and fractional
32+
* length in mind. mul_s64 function divide the 64-bit quantities
33+
* into two 32-bit words,multiply the low words to produce the
34+
* lowest and second-lowest words in the result, then both pairs
35+
* of low and high words from different numbers to produce the
36+
* second and third lowest words in the result, and finally both
37+
* high words to produce the two highest words in the outcome.
38+
* Add them all up, taking carry into consideration.
39+
*
40+
* The 64 x 64 bit multiplication of operands in_0 and in_1 is
41+
* shown in the image below. The 64-bit operand in_0,in_1 is
42+
* represented by the notation in0_H, in1_H for the top 32 bits
43+
* and in0_L, in1_L for the bottom 32 bits.
44+
*
45+
* in0_H : in0_L
46+
* x in1_H : in1_L
47+
* ---------------------
48+
* P0 in0_L x in1_L
49+
* P1 in0_H x in1_L 64 bit inner multiplication
50+
* P2 in0_L x in1_H 64 bit inner multiplication
51+
* P3 in0_H x in1_H
52+
* --------------------
53+
* [64 x 64 bit multiplication] sum of inner products
54+
* All combinations are multiplied by one another and then added.
55+
* Each inner product is moved into its proper power location.given the names
56+
* of the inner products, redoing the addition where 000 represents 32 zero
57+
* bits.The inner products can be added together in 64 bit addition.The sum
58+
* of two 64-bit numbers yields a 65-bit output.
59+
* (P0H:P0L)
60+
* P1H(P1L:000)
61+
* P2H(P2L:000)
62+
* P3H:P3L(000:000)
63+
* .......(aaa:P0L)
64+
* By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0.
65+
* The partial result is then multiplied by P2L:000.
66+
* We call it CRY1 because it has the potential to carry again.
67+
* (CRY0 + CRY1)P0H:P0L
68+
* ( P1H)P1L:000
69+
* ( P2H)P2L:000
70+
* (P3H: P3L)000:000
71+
* --------------------
72+
* (ccc:bbb)aaa:P0L
73+
* P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1.This increase will
74+
* not result in an overflow.
75+
*
76+
*/
77+
static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *ptroutbitshi, ae_int64 *ptroutbitslo)
78+
{
79+
ae_int64 producthihi, producthilo, productlolo;
80+
ae_int64 producthi, carry, product_hl_lh_h, product_hl_lh_l;
81+
82+
ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(in_0);
83+
ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(in_1);
84+
85+
ae_ep ep_lolo = AE_MOVEA(0);
86+
ae_ep ep_hilo = AE_MOVEA(0);
87+
ae_ep ep_HL_LH = AE_MOVEA(0);
88+
89+
producthihi = AE_MUL32_HH(in0_32, in1_32);
90+
91+
/* AE_MULZAAD32USEP.HL.LH - Unsigned lower parts and signed higher 32-bit parts dual */
92+
/* multiply and accumulate operation on two 32x2 operands with 72-bit output */
93+
/* Input-32x32-bit(in1_32xin0_32)into 72-bit multiplication operations */
94+
/* Output-lower 64 bits of the result are stored in producthilo */
95+
/* Output-upper eight bits are stored in ep_hilo */
96+
AE_MULZAAD32USEP_HL_LH(ep_hilo, producthilo, in1_32, in0_32);
97+
productlolo = AE_MUL32U_LL(in0_32, in1_32);
98+
99+
product_hl_lh_h = AE_SRAI72(ep_hilo, producthilo, 32);
100+
product_hl_lh_l = AE_SLAI64(producthilo, 32);
101+
102+
/* The AE_ADD72 procedure adds two 72-bit elements. The first 72-bit value is created */
103+
/* by concatenating the MSBs and LSBs of operands ep[7:0] and d[63:0]. Similarly, the */
104+
/* second value is created by concatenating bits from operands ep1[7:0] and d1[63:0]. */
105+
AE_ADD72(ep_lolo, productlolo, ep_HL_LH, product_hl_lh_l);
106+
107+
carry = AE_SRAI72(ep_lolo, productlolo, 32);
108+
109+
carry = AE_SRLI64(carry, 32);
110+
producthi = AE_ADD64(producthihi, carry);
111+
112+
producthi = AE_ADD64(producthi, product_hl_lh_h);
113+
114+
*ptroutbitslo = productlolo;
115+
*ptroutbitshi = producthi;
116+
}
117+
118+
/*
119+
* Arguments : int64_t a
120+
* int64_t b
121+
* Return Type : int64_t
122+
*/
123+
static int64_t mul_s64_sr_sat_near(int64_t a, int64_t b)
124+
{
125+
ae_int64 result;
126+
ae_int64 u64_chi;
127+
ae_int64 u64_clo;
128+
ae_int64 temp;
129+
130+
mul_s64(a, b, &u64_chi, &u64_clo);
131+
132+
ae_int64 roundup = AE_AND64(u64_clo, SOFM_SOFM_BIT_MASK_LOW_Q27P5);
133+
134+
roundup = AE_SRLI64(roundup, 27);
135+
temp = AE_OR64(AE_SLAI64(u64_chi, 36), AE_SRLI64(u64_clo, 28));
136+
137+
return AE_ADD64(temp, roundup);
138+
}
139+
140+
static ae_int64 onebyfact_Q63[19] = {
141+
4611686018427387904LL,
142+
1537228672809129301LL,
143+
384307168202282325LL,
144+
76861433640456465LL,
145+
12810238940076077LL,
146+
1830034134296582LL,
147+
228754266787072LL,
148+
25417140754119LL,
149+
2541714075411LL,
150+
231064915946LL,
151+
19255409662LL,
152+
1481185358LL,
153+
105798954LL,
154+
7053264LL,
155+
440829LL,
156+
25931LL,
157+
1441LL,
158+
76LL,
159+
4LL
160+
};
161+
162+
/* f(x) = a^x, x is variable and a is base
163+
*
164+
* Arguments : int32_t x(Q4.28)
165+
* input range : -5 to 5
166+
*
167+
* Return Type : int32_t ts(Q9.23)
168+
* output range 0.0067465305 to 148.4131488800
169+
*+------------------+-----------------+--------+--------+
170+
*| x | ts (returntype) | x | ts |
171+
*+----+-----+-------+----+----+-------+--------+--------+
172+
*|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
173+
*+----+-----+-------+----+----+-------+--------+--------+
174+
*| 32 | 28 | 1 | 32 | 23 | 0 | 4.28 | 9.23 |
175+
*+------------------+-----------------+--------+--------+
176+
*/
177+
int32_t sofm_exp_int32(int32_t x)
178+
{
179+
ae_int64 outhi;
180+
ae_int64 outlo;
181+
ae_int64 qt;
182+
ae_int64 onebyfact;
183+
ae_int64 temp;
184+
185+
ae_int64 *ponebyfact_Q63 = &onebyfact_Q63[0];
186+
ae_int64 ts = SOFM_TERMS_Q23P9;
187+
ae_int64 mp = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */;
188+
xtbool flag;
189+
190+
int64_t qt_temp;
191+
int64_t b_n;
192+
193+
mul_s64(mp, SOFM_BIT_MASK_Q62P2, &outhi, &outlo);
194+
qt = AE_OR64(AE_SLAI64(outhi, 46), AE_SRLI64(outlo, 18));
195+
196+
temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35);
197+
198+
ts = AE_ADD64(ts, temp);
199+
200+
mp = mul_s64_sr_sat_near(mp, (int64_t)x);
201+
202+
for (b_n = 0; b_n < 64;) {
203+
AE_L64_IP(onebyfact, ponebyfact_Q63, 8);
204+
205+
mul_s64(mp, onebyfact, &outhi, &outlo);
206+
qt = AE_OR64(AE_SLAI64(outhi, 45), AE_SRLI64(outlo, 19));
207+
208+
temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35);
209+
ts = AE_ADD64(ts, temp);
210+
211+
mp = mul_s64_sr_sat_near(mp, (int64_t)x);
212+
213+
const ae_int64 sign = AE_NEG64(qt);
214+
215+
flag = AE_LT64(qt, 0);
216+
AE_MOVT64(qt, sign, flag);
217+
218+
if (!(qt < (ae_int64)SOFM_CONVERG_ERROR))
219+
b_n++;
220+
else
221+
b_n = 64;
222+
}
223+
224+
return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
225+
}
226+
#endif

test/cmocka/src/math/arithmetic/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ cmocka_test(base2_logarithm
1313
cmocka_test(exponential
1414
exponential.c
1515
${PROJECT_SOURCE_DIR}/src/math/exp_fcn.c
16+
${PROJECT_SOURCE_DIR}/src/math/exp_fcn_hifi4.c
1617
)
1718
cmocka_test(square_root
1819
square_root.c

zephyr/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,9 @@ zephyr_library_sources_ifdef(CONFIG_SQRT_FIXED
700700
${SOF_MATH_PATH}/sqrt_int16.c
701701
)
702702

703-
zephyr_library_sources_ifdef(CONFIG_EXP_FIXED
703+
zephyr_library_sources_ifdef(CONFIG_MATH_EXP
704704
${SOF_MATH_PATH}/exp_fcn.c
705+
${SOF_MATH_PATH}/exp_fcn_hifi4.c
705706
)
706707

707708
zephyr_library_sources_ifdef(CONFIG_COMP_UP_DOWN_MIXER

0 commit comments

Comments
 (0)