Optimized EstCodeLpcCoef() for iSAC with intrinsics in Android-Neon platform.
Cycles of the whole iSAC codec was reduced by 7.9%, measured by offline file test, with time() function. Bit exact. ** Code style cleanup is not considered in this CL. ** Review URL: https://webrtc-codereview.appspot.com/1069004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3643 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
5d3ced5df0
commit
23da8622c0
@ -27,6 +27,40 @@
|
|||||||
#include "settings.h"
|
#include "settings.h"
|
||||||
#include "signal_processing_library.h"
|
#include "signal_processing_library.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Eenumerations for arguments to functions WebRtcIsacfix_MatrixProduct1()
|
||||||
|
* and WebRtcIsacfix_MatrixProduct2().
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum matrix_index_factor {
|
||||||
|
kTIndexFactor1 = 1,
|
||||||
|
kTIndexFactor2 = 2,
|
||||||
|
kTIndexFactor3 = SUBFRAMES,
|
||||||
|
kTIndexFactor4 = LPC_SHAPE_ORDER
|
||||||
|
};
|
||||||
|
|
||||||
|
enum matrix_index_step {
|
||||||
|
kTIndexStep1 = 1,
|
||||||
|
kTIndexStep2 = SUBFRAMES,
|
||||||
|
kTIndexStep3 = LPC_SHAPE_ORDER
|
||||||
|
};
|
||||||
|
|
||||||
|
enum matrixprod_loop_count {
|
||||||
|
kTLoopCount1 = SUBFRAMES,
|
||||||
|
kTLoopCount2 = 2,
|
||||||
|
kTLoopCount3 = LPC_SHAPE_ORDER
|
||||||
|
};
|
||||||
|
|
||||||
|
enum matrix1_shift_value {
|
||||||
|
kTMatrix1_shift0 = 0,
|
||||||
|
kTMatrix1_shift1 = 1,
|
||||||
|
kTMatrix1_shift5 = 5
|
||||||
|
};
|
||||||
|
|
||||||
|
enum matrixprod_init_case {
|
||||||
|
kTInitCase0 = 0,
|
||||||
|
kTInitCase1 = 1
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
This function implements the fix-point correspondant function to lrint.
|
This function implements the fix-point correspondant function to lrint.
|
||||||
@ -775,6 +809,115 @@ static void Lar2polyFix(WebRtc_Word32 *larsQ17,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function WebRtcIsacfix_MatrixProduct1C() does one form of matrix multiplication.
|
||||||
|
It first shifts input data of one matrix, determines the right indexes for the
|
||||||
|
two matrixes, multiply them, and write the results into an output buffer.
|
||||||
|
|
||||||
|
Note that two factors (or, multipliers) determine the initialization values of
|
||||||
|
the variable |matrix1_index| in the code. The relationship is
|
||||||
|
|matrix1_index| = |matrix1_index_factor1| * |matrix1_index_factor2|, where
|
||||||
|
|matrix1_index_factor1| is given by the argument while |matrix1_index_factor2|
|
||||||
|
is determined by the value of argument |matrix1_index_init_case|;
|
||||||
|
|matrix1_index_factor2| is the value of the outmost loop counter j (when
|
||||||
|
|matrix1_index_init_case| is 0), or the value of the middle loop counter k (when
|
||||||
|
|matrix1_index_init_case| is non-zero).
|
||||||
|
|
||||||
|
|matrix0_index| is determined the same way.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
matrix0[]: matrix0 data in Q15 domain.
|
||||||
|
matrix1[]: matrix1 data.
|
||||||
|
matrix_product[]: output data (matrix product).
|
||||||
|
matrix1_index_factor1: The first of two factors determining the
|
||||||
|
initialization value of matrix1_index.
|
||||||
|
matrix0_index_factor1: The first of two factors determining the
|
||||||
|
initialization value of matrix0_index.
|
||||||
|
matrix1_index_init_case: Case number for selecting the second of two
|
||||||
|
factors determining the initialization value
|
||||||
|
of matrix1_index and matrix0_index.
|
||||||
|
matrix1_index_step: Incremental step for matrix1_index.
|
||||||
|
matrix0_index_step: Incremental step for matrix0_index.
|
||||||
|
inner_loop_count: Maximum count of the inner loop.
|
||||||
|
mid_loop_count: Maximum count of the intermediate loop.
|
||||||
|
shift: Left shift value for matrix1.
|
||||||
|
*/
|
||||||
|
void WebRtcIsacfix_MatrixProduct1C(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix1_index_factor1,
|
||||||
|
const int matrix0_index_factor1,
|
||||||
|
const int matrix1_index_init_case,
|
||||||
|
const int matrix1_index_step,
|
||||||
|
const int matrix0_index_step,
|
||||||
|
const int inner_loop_count,
|
||||||
|
const int mid_loop_count,
|
||||||
|
const int shift) {
|
||||||
|
int j = 0, k = 0, n = 0;
|
||||||
|
int matrix0_index = 0, matrix1_index = 0, matrix_prod_index = 0;
|
||||||
|
int* matrix0_index_factor2 = &k;
|
||||||
|
int* matrix1_index_factor2 = &j;
|
||||||
|
if (matrix1_index_init_case != 0) {
|
||||||
|
matrix0_index_factor2 = &j;
|
||||||
|
matrix1_index_factor2 = &k;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
matrix_prod_index = mid_loop_count * j;
|
||||||
|
for (k = 0; k < mid_loop_count; k++) {
|
||||||
|
int32_t sum32 = 0;
|
||||||
|
matrix0_index = matrix0_index_factor1 * (*matrix0_index_factor2);
|
||||||
|
matrix1_index = matrix1_index_factor1 * (*matrix1_index_factor2);
|
||||||
|
for (n = 0; n < inner_loop_count; n++) {
|
||||||
|
sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
|
||||||
|
matrix1[matrix1_index] << shift));
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
matrix1_index += matrix1_index_step;
|
||||||
|
}
|
||||||
|
matrix_product[matrix_prod_index] = sum32;
|
||||||
|
matrix_prod_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function WebRtcIsacfix_MatrixProduct2C() returns the product of two matrixes,
|
||||||
|
one of which has two columns. It first has to determine the correct index of
|
||||||
|
the first matrix before doing the actual element multiplication.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
matrix0[]: A matrix in Q15 domain.
|
||||||
|
matrix1[]: A matrix in Q21 domain.
|
||||||
|
matrix_product[]: Output data in Q17 domain.
|
||||||
|
matrix0_index_factor: A factor determining the initialization value
|
||||||
|
of matrix0_index.
|
||||||
|
matrix0_index_step: Incremental step for matrix0_index.
|
||||||
|
*/
|
||||||
|
void WebRtcIsacfix_MatrixProduct2C(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix0_index_factor,
|
||||||
|
const int matrix0_index_step) {
|
||||||
|
int j = 0, n = 0;
|
||||||
|
int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0;
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
int32_t sum32 = 0, sum32_2 = 0;
|
||||||
|
matrix1_index = 0;
|
||||||
|
matrix0_index = matrix0_index_factor * j;
|
||||||
|
for (n = SUBFRAMES; n > 0; n--) {
|
||||||
|
sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
|
||||||
|
matrix1[matrix1_index]));
|
||||||
|
sum32_2 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
|
||||||
|
matrix1[matrix1_index + 1]));
|
||||||
|
matrix1_index += 2;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
matrix_product[matrix_prod_index] = sum32 >> 3;
|
||||||
|
matrix_product[matrix_prod_index + 1] = sum32_2 >> 3;
|
||||||
|
matrix_prod_index += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int WebRtcIsacfix_DecodeLpc(WebRtc_Word32 *gain_lo_hiQ17,
|
int WebRtcIsacfix_DecodeLpc(WebRtc_Word32 *gain_lo_hiQ17,
|
||||||
WebRtc_Word16 *LPCCoef_loQ15,
|
WebRtc_Word16 *LPCCoef_loQ15,
|
||||||
WebRtc_Word16 *LPCCoef_hiQ15,
|
WebRtc_Word16 *LPCCoef_hiQ15,
|
||||||
@ -801,7 +944,7 @@ int WebRtcIsacfix_DecodeLpcCoef(Bitstr_dec *streamdata,
|
|||||||
{
|
{
|
||||||
int j, k, n;
|
int j, k, n;
|
||||||
int err;
|
int err;
|
||||||
WebRtc_Word16 pos, pos2, posg, poss, offsg, offss, offs2;
|
WebRtc_Word16 pos, pos2, posg, poss;
|
||||||
WebRtc_Word16 gainpos;
|
WebRtc_Word16 gainpos;
|
||||||
WebRtc_Word16 model;
|
WebRtc_Word16 model;
|
||||||
WebRtc_Word16 index_QQ[KLT_ORDER_SHAPE];
|
WebRtc_Word16 index_QQ[KLT_ORDER_SHAPE];
|
||||||
@ -842,31 +985,17 @@ int WebRtcIsacfix_DecodeLpcCoef(Bitstr_dec *streamdata,
|
|||||||
/* inverse KLT */
|
/* inverse KLT */
|
||||||
|
|
||||||
/* left transform */ // Transpose matrix!
|
/* left transform */ // Transpose matrix!
|
||||||
offsg = 0;
|
WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT1GainQ15[model], tmpcoeffs_gQ17,
|
||||||
offss = 0;
|
tmpcoeffs2_gQ21, kTIndexFactor2, kTIndexFactor2,
|
||||||
posg = 0;
|
kTInitCase0, kTIndexStep1, kTIndexStep1,
|
||||||
|
kTLoopCount2, kTLoopCount2, kTMatrix1_shift5);
|
||||||
|
|
||||||
poss = 0;
|
poss = 0;
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
for (j=0; j<SUBFRAMES; j++) {
|
||||||
offs2 = 0;
|
|
||||||
for (k=0; k<2; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = offsg;
|
|
||||||
pos2 = offs2;
|
|
||||||
for (n=0; n<2; n++) {
|
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[model][pos2], tmpcoeffs_gQ17[pos]<<5)); // (Q15*Q17)>>(16-5) = Q21
|
|
||||||
pos++;
|
|
||||||
pos2++;
|
|
||||||
}
|
|
||||||
tmpcoeffs2_gQ21[posg] = sumQQ; //Q21
|
|
||||||
posg++;
|
|
||||||
offs2 += 2;
|
|
||||||
}
|
|
||||||
offs2 = 0;
|
|
||||||
|
|
||||||
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
||||||
sumQQ = 0;
|
sumQQ = 0;
|
||||||
pos = offss;
|
pos = LPC_SHAPE_ORDER * j;
|
||||||
pos2 = offs2;
|
pos2 = LPC_SHAPE_ORDER * k;
|
||||||
for (n=0; n<LPC_SHAPE_ORDER; n++) {
|
for (n=0; n<LPC_SHAPE_ORDER; n++) {
|
||||||
sumQQ += WEBRTC_SPL_MUL_16_16_RSFT(tmpcoeffs_sQ10[pos], WebRtcIsacfix_kT1ShapeQ15[model][pos2], 7); // (Q10*Q15)>>7 = Q18
|
sumQQ += WEBRTC_SPL_MUL_16_16_RSFT(tmpcoeffs_sQ10[pos], WebRtcIsacfix_kT1ShapeQ15[model][pos2], 7); // (Q10*Q15)>>7 = Q18
|
||||||
pos++;
|
pos++;
|
||||||
@ -874,48 +1003,16 @@ int WebRtcIsacfix_DecodeLpcCoef(Bitstr_dec *streamdata,
|
|||||||
}
|
}
|
||||||
tmpcoeffs2_sQ18[poss] = sumQQ; //Q18
|
tmpcoeffs2_sQ18[poss] = sumQQ; //Q18
|
||||||
poss++;
|
poss++;
|
||||||
offs2 += LPC_SHAPE_ORDER;
|
|
||||||
}
|
}
|
||||||
offsg += 2;
|
|
||||||
offss += LPC_SHAPE_ORDER;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* right transform */ // Transpose matrix
|
/* right transform */ // Transpose matrix
|
||||||
offsg = 0;
|
WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
|
||||||
offss = 0;
|
tmpcoeffs_gQ17, kTIndexFactor1, kTIndexStep2);
|
||||||
posg = 0;
|
WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT2ShapeQ15[model],
|
||||||
poss = 0;
|
tmpcoeffs2_sQ18, tmpcoeffs_sQ17, kTIndexFactor1, kTIndexFactor1,
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
kTInitCase1, kTIndexStep3, kTIndexStep2, kTLoopCount1, kTLoopCount3,
|
||||||
posg = offsg;
|
kTMatrix1_shift0);
|
||||||
for (k=0; k<2; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = j;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[model][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
|
|
||||||
pos += 2;
|
|
||||||
pos2 += SUBFRAMES;
|
|
||||||
|
|
||||||
}
|
|
||||||
tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
|
|
||||||
posg++;
|
|
||||||
}
|
|
||||||
poss = offss;
|
|
||||||
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = j;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2ShapeQ15[model][pos2], tmpcoeffs2_sQ18[pos])); // (Q15*Q18)>>16 = Q17
|
|
||||||
pos += LPC_SHAPE_ORDER;
|
|
||||||
pos2 += SUBFRAMES;
|
|
||||||
}
|
|
||||||
tmpcoeffs_sQ17[poss] = sumQQ;
|
|
||||||
poss++;
|
|
||||||
}
|
|
||||||
offsg += 2;
|
|
||||||
offss += LPC_SHAPE_ORDER;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* scaling, mean addition, and gain restoration */
|
/* scaling, mean addition, and gain restoration */
|
||||||
gainpos = 0;
|
gainpos = 0;
|
||||||
@ -968,7 +1065,7 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
|
|||||||
transcode_obj *transcodingParam) {
|
transcode_obj *transcodingParam) {
|
||||||
int j, k, n;
|
int j, k, n;
|
||||||
WebRtc_Word16 posQQ, pos2QQ, gainpos;
|
WebRtc_Word16 posQQ, pos2QQ, gainpos;
|
||||||
WebRtc_Word16 pos, pos2, poss, posg, offsg, offss, offs2;
|
WebRtc_Word16 pos, poss, posg, offsg;
|
||||||
WebRtc_Word16 index_gQQ[KLT_ORDER_GAIN], index_sQQ[KLT_ORDER_SHAPE];
|
WebRtc_Word16 index_gQQ[KLT_ORDER_GAIN], index_sQQ[KLT_ORDER_SHAPE];
|
||||||
WebRtc_Word16 index_ovr_gQQ[KLT_ORDER_GAIN], index_ovr_sQQ[KLT_ORDER_SHAPE];
|
WebRtc_Word16 index_ovr_gQQ[KLT_ORDER_GAIN], index_ovr_sQQ[KLT_ORDER_SHAPE];
|
||||||
WebRtc_Word32 BitsQQ;
|
WebRtc_Word32 BitsQQ;
|
||||||
@ -1034,73 +1131,38 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
|
|||||||
|
|
||||||
/* left transform */
|
/* left transform */
|
||||||
offsg = 0;
|
offsg = 0;
|
||||||
offss = 0;
|
posg = 0;
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
for (j=0; j<SUBFRAMES; j++) {
|
||||||
posg = offsg;
|
// Q21 = Q6 * Q15
|
||||||
for (k=0; k<2; k++) {
|
sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
|
||||||
sumQQ = 0;
|
WebRtcIsacfix_kT1GainQ15[0][0]);
|
||||||
pos = offsg;
|
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
|
||||||
pos2 = k;
|
WebRtcIsacfix_kT1GainQ15[0][2]);
|
||||||
for (n=0; n<2; n++) {
|
tmpcoeffs2_gQ21[posg] = sumQQ;
|
||||||
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[pos], WebRtcIsacfix_kT1GainQ15[0][pos2]); //Q21 = Q6*Q15
|
posg++;
|
||||||
pos++;
|
|
||||||
pos2 += 2;
|
// Q21 = Q6 * Q15
|
||||||
}
|
sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
|
||||||
tmpcoeffs2_gQ21[posg] = sumQQ;
|
WebRtcIsacfix_kT1GainQ15[0][1]);
|
||||||
posg++;
|
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
|
||||||
}
|
WebRtcIsacfix_kT1GainQ15[0][3]);
|
||||||
poss = offss;
|
tmpcoeffs2_gQ21[posg] = sumQQ;
|
||||||
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
posg++;
|
||||||
sumQQ = 0;
|
|
||||||
pos = offss;
|
|
||||||
pos2 = k;
|
|
||||||
for (n=0; n<LPC_SHAPE_ORDER; n++) {
|
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1ShapeQ15[0][pos2], tmpcoeffs_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
|
|
||||||
pos++;
|
|
||||||
pos2 += LPC_SHAPE_ORDER;
|
|
||||||
}
|
|
||||||
tmpcoeffs2_sQ17[poss] = sumQQ; //Q17
|
|
||||||
poss++;
|
|
||||||
}
|
|
||||||
offsg += 2;
|
offsg += 2;
|
||||||
offss += LPC_SHAPE_ORDER;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT1ShapeQ15[0], tmpcoeffs_sQ17,
|
||||||
|
tmpcoeffs2_sQ17, kTIndexFactor4, kTIndexFactor1, kTInitCase0,
|
||||||
|
kTIndexStep1, kTIndexStep3, kTLoopCount3, kTLoopCount3, kTMatrix1_shift1);
|
||||||
|
|
||||||
/* right transform */
|
/* right transform */
|
||||||
offsg = 0;
|
WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
|
||||||
offss = 0;
|
tmpcoeffs_gQ17, kTIndexFactor3, kTIndexStep1);
|
||||||
offs2 = 0;
|
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT2ShapeQ15[0], tmpcoeffs2_sQ17,
|
||||||
posg = offsg;
|
tmpcoeffs_sQ17, kTIndexFactor1, kTIndexFactor3, kTInitCase1, kTIndexStep3,
|
||||||
for (k=0; k<2; k++) {
|
kTIndexStep1, kTLoopCount1, kTLoopCount3, kTMatrix1_shift1);
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = offs2;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
|
|
||||||
pos += 2;
|
|
||||||
pos2++;
|
|
||||||
}
|
|
||||||
tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
|
|
||||||
posg++;
|
|
||||||
}
|
|
||||||
poss = offss;
|
|
||||||
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = offs2;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2ShapeQ15[0][pos2], tmpcoeffs2_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
|
|
||||||
pos += LPC_SHAPE_ORDER;
|
|
||||||
pos2++;
|
|
||||||
}
|
|
||||||
tmpcoeffs_sQ17[poss] = sumQQ;
|
|
||||||
poss++;
|
|
||||||
}
|
|
||||||
offs2 += SUBFRAMES;
|
|
||||||
offsg += 2;
|
|
||||||
offss += LPC_SHAPE_ORDER;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* quantize coefficients */
|
/* quantize coefficients */
|
||||||
|
|
||||||
@ -1191,47 +1253,14 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
|
|||||||
/* inverse KLT */
|
/* inverse KLT */
|
||||||
|
|
||||||
/* left transform */ // Transpose matrix!
|
/* left transform */ // Transpose matrix!
|
||||||
offss = 0;
|
WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT1ShapeQ15[0], tmpcoeffs_sQ17,
|
||||||
poss = 0;
|
tmpcoeffs2_sQ17, kTIndexFactor4, kTIndexFactor4, kTInitCase0,
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
kTIndexStep1, kTIndexStep1, kTLoopCount3, kTLoopCount3, kTMatrix1_shift1);
|
||||||
offs2 = 0;
|
|
||||||
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = offss;
|
|
||||||
pos2 = offs2;
|
|
||||||
for (n=0; n<LPC_SHAPE_ORDER; n++) {
|
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1ShapeQ15[0][pos2], tmpcoeffs_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
|
|
||||||
pos++;
|
|
||||||
pos2++;
|
|
||||||
}
|
|
||||||
tmpcoeffs2_sQ17[poss] = sumQQ;
|
|
||||||
|
|
||||||
poss++;
|
|
||||||
offs2 += LPC_SHAPE_ORDER;
|
|
||||||
}
|
|
||||||
offss += LPC_SHAPE_ORDER;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* right transform */ // Transpose matrix
|
/* right transform */ // Transpose matrix
|
||||||
offss = 0;
|
WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT2ShapeQ15[0], tmpcoeffs2_sQ17,
|
||||||
poss = 0;
|
tmpcoeffs_sQ17, kTIndexFactor1, kTIndexFactor1, kTInitCase1, kTIndexStep3,
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
kTIndexStep2, kTLoopCount1, kTLoopCount3, kTMatrix1_shift1);
|
||||||
poss = offss;
|
|
||||||
for (k=0; k<LPC_SHAPE_ORDER; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = j;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2ShapeQ15[0][pos2], tmpcoeffs2_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
|
|
||||||
pos += LPC_SHAPE_ORDER;
|
|
||||||
pos2 += SUBFRAMES;
|
|
||||||
}
|
|
||||||
tmpcoeffs_sQ17[poss] = sumQQ;
|
|
||||||
poss++;
|
|
||||||
}
|
|
||||||
offss += LPC_SHAPE_ORDER;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* scaling, mean addition, and gain restoration */
|
/* scaling, mean addition, and gain restoration */
|
||||||
poss = 0;pos=0;
|
poss = 0;pos=0;
|
||||||
@ -1266,42 +1295,26 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
|
|||||||
offsg = 0;
|
offsg = 0;
|
||||||
posg = 0;
|
posg = 0;
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
for (j=0; j<SUBFRAMES; j++) {
|
||||||
offs2 = 0;
|
// (Q15 * Q17) >> (16 - 1) = Q17; Q17 << 4 = Q21.
|
||||||
for (k=0; k<2; k++) {
|
sumQQ = (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][0],
|
||||||
sumQQ = 0;
|
tmpcoeffs_gQ17[offsg]) << 1);
|
||||||
pos = offsg;
|
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][1],
|
||||||
pos2 = offs2;
|
tmpcoeffs_gQ17[offsg + 1]) << 1);
|
||||||
for (n=0; n<2; n++) {
|
tmpcoeffs2_gQ21[posg] = WEBRTC_SPL_LSHIFT_W32(sumQQ, 4);
|
||||||
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][pos2], tmpcoeffs_gQ17[pos])<<1); // (Q15*Q17)>>(16-1) = Q17
|
posg++;
|
||||||
pos++;
|
|
||||||
pos2++;
|
sumQQ = (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][2],
|
||||||
}
|
tmpcoeffs_gQ17[offsg]) << 1);
|
||||||
tmpcoeffs2_gQ21[posg] = WEBRTC_SPL_LSHIFT_W32(sumQQ, 4); //Q17<<4 = Q21
|
sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][3],
|
||||||
posg++;
|
tmpcoeffs_gQ17[offsg + 1]) << 1);
|
||||||
offs2 += 2;
|
tmpcoeffs2_gQ21[posg] = WEBRTC_SPL_LSHIFT_W32(sumQQ, 4);
|
||||||
}
|
posg++;
|
||||||
offsg += 2;
|
offsg += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* right transform */ // Transpose matrix
|
/* right transform */ // Transpose matrix
|
||||||
offsg = 0;
|
WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
|
||||||
posg = 0;
|
tmpcoeffs_gQ17, kTIndexFactor1, kTIndexStep2);
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
|
||||||
posg = offsg;
|
|
||||||
for (k=0; k<2; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = j;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
|
|
||||||
pos += 2;
|
|
||||||
pos2 += SUBFRAMES;
|
|
||||||
}
|
|
||||||
tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
|
|
||||||
posg++;
|
|
||||||
}
|
|
||||||
offsg += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* scaling, mean addition, and gain restoration */
|
/* scaling, mean addition, and gain restoration */
|
||||||
posg = 0;
|
posg = 0;
|
||||||
@ -1323,9 +1336,9 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
|
|||||||
int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
|
int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
|
||||||
Bitstr_enc *streamdata,
|
Bitstr_enc *streamdata,
|
||||||
ISAC_SaveEncData_t* encData) {
|
ISAC_SaveEncData_t* encData) {
|
||||||
int j, k, n;
|
int j, k;
|
||||||
WebRtc_Word16 posQQ, pos2QQ, gainpos;
|
WebRtc_Word16 posQQ, pos2QQ, gainpos;
|
||||||
WebRtc_Word16 pos, pos2, posg, offsg, offs2;
|
WebRtc_Word16 posg;
|
||||||
WebRtc_Word16 index_gQQ[KLT_ORDER_GAIN];
|
WebRtc_Word16 index_gQQ[KLT_ORDER_GAIN];
|
||||||
|
|
||||||
WebRtc_Word16 tmpcoeffs_gQ6[KLT_ORDER_GAIN];
|
WebRtc_Word16 tmpcoeffs_gQ6[KLT_ORDER_GAIN];
|
||||||
@ -1343,7 +1356,7 @@ int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* log gains, mean removal and scaling */
|
/* log gains, mean removal and scaling */
|
||||||
posg = 0; pos = 0; gainpos = 0;
|
posg = 0; gainpos = 0;
|
||||||
|
|
||||||
for (k=0; k<SUBFRAMES; k++) {
|
for (k=0; k<SUBFRAMES; k++) {
|
||||||
/* log gains */
|
/* log gains */
|
||||||
@ -1369,44 +1382,27 @@ int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
|
|||||||
/* KLT */
|
/* KLT */
|
||||||
|
|
||||||
/* left transform */
|
/* left transform */
|
||||||
offsg = 0;
|
posg = 0;
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
for (j=0; j<SUBFRAMES; j++) {
|
||||||
posg = offsg;
|
// Q21 = Q6 * Q15
|
||||||
for (k=0; k<2; k++) {
|
sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2],
|
||||||
sumQQ = 0;
|
WebRtcIsacfix_kT1GainQ15[0][0]);
|
||||||
pos = offsg;
|
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2 + 1],
|
||||||
pos2 = k;
|
WebRtcIsacfix_kT1GainQ15[0][2]);
|
||||||
for (n=0; n<2; n++) {
|
tmpcoeffs2_gQ21[posg] = sumQQ;
|
||||||
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[pos], WebRtcIsacfix_kT1GainQ15[0][pos2]); //Q21 = Q6*Q15
|
posg++;
|
||||||
pos++;
|
|
||||||
pos2 += 2;
|
sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2],
|
||||||
}
|
WebRtcIsacfix_kT1GainQ15[0][1]);
|
||||||
|
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2 + 1],
|
||||||
|
WebRtcIsacfix_kT1GainQ15[0][3]);
|
||||||
tmpcoeffs2_gQ21[posg] = sumQQ;
|
tmpcoeffs2_gQ21[posg] = sumQQ;
|
||||||
posg++;
|
posg++;
|
||||||
}
|
|
||||||
offsg += 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* right transform */
|
/* right transform */
|
||||||
offsg = 0;
|
WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
|
||||||
offs2 = 0;
|
tmpcoeffs_gQ17, kTIndexFactor3, kTIndexStep1);
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
|
||||||
posg = offsg;
|
|
||||||
for (k=0; k<2; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = offs2;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
|
|
||||||
pos += 2;
|
|
||||||
pos2++;
|
|
||||||
}
|
|
||||||
tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
|
|
||||||
posg++;
|
|
||||||
}
|
|
||||||
offsg += 2;
|
|
||||||
offs2 += SUBFRAMES;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* quantize coefficients */
|
/* quantize coefficients */
|
||||||
|
|
||||||
@ -1454,7 +1450,8 @@ int WebRtcIsacfix_EncodeLpc(WebRtc_Word32 *gain_lo_hiQ17,
|
|||||||
|
|
||||||
Poly2LarFix(LPCCoef_loQ15, ORDERLO, LPCCoef_hiQ15, ORDERHI, SUBFRAMES, larsQ17);
|
Poly2LarFix(LPCCoef_loQ15, ORDERLO, LPCCoef_hiQ15, ORDERHI, SUBFRAMES, larsQ17);
|
||||||
|
|
||||||
status = EstCodeLpcCoef(larsQ17, gain_lo_hiQ17, model, sizeQ11, streamdata, encData, transcodeParam);
|
status = EstCodeLpcCoef(larsQ17, gain_lo_hiQ17, model, sizeQ11,
|
||||||
|
streamdata, encData, transcodeParam);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
return (status);
|
return (status);
|
||||||
}
|
}
|
||||||
@ -1978,9 +1975,9 @@ int WebRtcIsacfix_EncodeReceiveBandwidth(WebRtc_Word16 *BWno, Bitstr_enc *stream
|
|||||||
/* estimate codel length of LPC Coef */
|
/* estimate codel length of LPC Coef */
|
||||||
void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
|
void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
|
||||||
WebRtc_Word16 *index_gQQ) {
|
WebRtc_Word16 *index_gQQ) {
|
||||||
int j, k, n;
|
int j, k;
|
||||||
WebRtc_Word16 posQQ, pos2QQ;
|
WebRtc_Word16 posQQ, pos2QQ;
|
||||||
WebRtc_Word16 pos, pos2, posg, offsg, offs2, gainpos;
|
WebRtc_Word16 posg, offsg, gainpos;
|
||||||
WebRtc_Word32 tmpcoeffs_gQ6[KLT_ORDER_GAIN];
|
WebRtc_Word32 tmpcoeffs_gQ6[KLT_ORDER_GAIN];
|
||||||
WebRtc_Word32 tmpcoeffs_gQ17[KLT_ORDER_GAIN];
|
WebRtc_Word32 tmpcoeffs_gQ17[KLT_ORDER_GAIN];
|
||||||
WebRtc_Word32 tmpcoeffs2_gQ21[KLT_ORDER_GAIN];
|
WebRtc_Word32 tmpcoeffs2_gQ21[KLT_ORDER_GAIN];
|
||||||
@ -1988,7 +1985,7 @@ void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
|
|||||||
|
|
||||||
|
|
||||||
/* log gains, mean removal and scaling */
|
/* log gains, mean removal and scaling */
|
||||||
posg = 0;pos=0; gainpos=0;
|
posg = 0; gainpos=0;
|
||||||
|
|
||||||
for (k=0; k<SUBFRAMES; k++) {
|
for (k=0; k<SUBFRAMES; k++) {
|
||||||
/* log gains */
|
/* log gains */
|
||||||
@ -2017,43 +2014,26 @@ void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
|
|||||||
/* left transform */
|
/* left transform */
|
||||||
offsg = 0;
|
offsg = 0;
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
for (j=0; j<SUBFRAMES; j++) {
|
||||||
posg = offsg;
|
// Q21 = Q6 * Q15
|
||||||
for (k=0; k<2; k++) {
|
sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
|
||||||
sumQQ = 0;
|
WebRtcIsacfix_kT1GainQ15[0][0]);
|
||||||
pos = offsg;
|
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
|
||||||
pos2 = k;
|
WebRtcIsacfix_kT1GainQ15[0][2]);
|
||||||
for (n=0; n<2; n++) {
|
|
||||||
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[pos], WebRtcIsacfix_kT1GainQ15[0][pos2]); //Q21 = Q6*Q15
|
|
||||||
pos++;
|
|
||||||
pos2 += 2;
|
|
||||||
}
|
|
||||||
tmpcoeffs2_gQ21[posg] = sumQQ;
|
tmpcoeffs2_gQ21[posg] = sumQQ;
|
||||||
posg++;
|
posg++;
|
||||||
}
|
|
||||||
|
|
||||||
offsg += 2;
|
// Q21 = Q6 * Q15
|
||||||
|
sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
|
||||||
|
WebRtcIsacfix_kT1GainQ15[0][1]);
|
||||||
|
sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
|
||||||
|
WebRtcIsacfix_kT1GainQ15[0][3]);
|
||||||
|
tmpcoeffs2_gQ21[posg] = sumQQ;
|
||||||
|
posg++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* right transform */
|
/* right transform */
|
||||||
offsg = 0;
|
WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
|
||||||
offs2 = 0;
|
tmpcoeffs_gQ17, kTIndexFactor3, kTIndexStep1);
|
||||||
for (j=0; j<SUBFRAMES; j++) {
|
|
||||||
posg = offsg;
|
|
||||||
for (k=0; k<2; k++) {
|
|
||||||
sumQQ = 0;
|
|
||||||
pos = k;
|
|
||||||
pos2 = offs2;
|
|
||||||
for (n=0; n<SUBFRAMES; n++) {
|
|
||||||
sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
|
|
||||||
pos += 2;
|
|
||||||
pos2++;
|
|
||||||
}
|
|
||||||
tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
|
|
||||||
posg++;
|
|
||||||
}
|
|
||||||
offsg += 2;
|
|
||||||
offs2 += SUBFRAMES;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* quantize coefficients */
|
/* quantize coefficients */
|
||||||
for (k=0; k<KLT_ORDER_GAIN; k++) //ATTN: ok?
|
for (k=0; k<KLT_ORDER_GAIN; k++) //ATTN: ok?
|
||||||
|
@ -108,4 +108,62 @@ int WebRtcIsacfix_EncodeReceiveBandwidth(WebRtc_Word16 *BWno,
|
|||||||
void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *tmpcoeffs_gQ6,
|
void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *tmpcoeffs_gQ6,
|
||||||
WebRtc_Word16 *index_gQQ);
|
WebRtc_Word16 *index_gQQ);
|
||||||
|
|
||||||
#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_ENTROPY_CODING_H_ */
|
// Pointer functions for LPC transforms.
|
||||||
|
|
||||||
|
typedef void (*MatrixProduct1)(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix1_index_factor1,
|
||||||
|
const int matrix0_index_factor1,
|
||||||
|
const int matrix1_index_init_case,
|
||||||
|
const int matrix1_index_step,
|
||||||
|
const int matrix0_index_step,
|
||||||
|
const int inner_loop_count,
|
||||||
|
const int mid_loop_count,
|
||||||
|
const int shift);
|
||||||
|
typedef void (*MatrixProduct2)(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix0_index_factor,
|
||||||
|
const int matrix0_index_step);
|
||||||
|
|
||||||
|
extern MatrixProduct1 WebRtcIsacfix_MatrixProduct1;
|
||||||
|
extern MatrixProduct2 WebRtcIsacfix_MatrixProduct2;
|
||||||
|
|
||||||
|
void WebRtcIsacfix_MatrixProduct1C(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix1_index_factor1,
|
||||||
|
const int matrix0_index_factor1,
|
||||||
|
const int matrix1_index_init_case,
|
||||||
|
const int matrix1_index_step,
|
||||||
|
const int matrix0_index_step,
|
||||||
|
const int inner_loop_count,
|
||||||
|
const int mid_loop_count,
|
||||||
|
const int shift);
|
||||||
|
void WebRtcIsacfix_MatrixProduct2C(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix0_index_factor,
|
||||||
|
const int matrix0_index_step);
|
||||||
|
|
||||||
|
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
||||||
|
void WebRtcIsacfix_MatrixProduct1Neon(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix1_index_factor1,
|
||||||
|
const int matrix0_index_factor1,
|
||||||
|
const int matrix1_index_init_case,
|
||||||
|
const int matrix1_index_step,
|
||||||
|
const int matrix0_index_step,
|
||||||
|
const int inner_loop_count,
|
||||||
|
const int mid_loop_count,
|
||||||
|
const int shift);
|
||||||
|
void WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix0_index_factor,
|
||||||
|
const int matrix0_index_step);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_ENTROPY_CODING_H_
|
||||||
|
@ -0,0 +1,220 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file contains WebRtcIsacfix_MatrixProduct1Neon() and
|
||||||
|
* WebRtcIsacfix_MatrixProduct2Neon() for ARM Neon platform. API's are in
|
||||||
|
* entropy_coding.c. Results are bit exact with the c code for
|
||||||
|
* generic platforms.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "entropy_coding.h"
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#include "signal_processing_library.h"
|
||||||
|
|
||||||
|
void WebRtcIsacfix_MatrixProduct1Neon(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix1_index_factor1,
|
||||||
|
const int matrix0_index_factor1,
|
||||||
|
const int matrix1_index_init_case,
|
||||||
|
const int matrix1_index_step,
|
||||||
|
const int matrix0_index_step,
|
||||||
|
const int inner_loop_count,
|
||||||
|
const int mid_loop_count,
|
||||||
|
const int shift) {
|
||||||
|
int j = 0, k = 0, n = 0;
|
||||||
|
int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0;
|
||||||
|
int* matrix1_index_factor2 = &j;
|
||||||
|
int* matrix0_index_factor2 = &k;
|
||||||
|
if (matrix1_index_init_case != 0) {
|
||||||
|
matrix1_index_factor2 = &k;
|
||||||
|
matrix0_index_factor2 = &j;
|
||||||
|
}
|
||||||
|
int32x4_t shift32x4 = vdupq_n_s32(shift);
|
||||||
|
int32x2_t shift32x2 = vdup_n_s32(shift);
|
||||||
|
|
||||||
|
assert(inner_loop_count % 2 == 0);
|
||||||
|
assert(mid_loop_count % 2 == 0);
|
||||||
|
|
||||||
|
if (matrix1_index_init_case != 0 && matrix1_index_factor1 == 1) {
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
matrix_prod_index = mid_loop_count * j;
|
||||||
|
for (k = 0; k < (mid_loop_count >> 2) << 2; k += 4) {
|
||||||
|
// Initialize sum_32x4 to zeros.
|
||||||
|
int32x4_t sum_32x4 = veorq_s32(sum_32x4, sum_32x4);
|
||||||
|
matrix1_index = k;
|
||||||
|
matrix0_index = matrix0_index_factor1 * j;
|
||||||
|
for (n = 0; n < inner_loop_count; n++) {
|
||||||
|
int32x4_t matrix0_32x4 =
|
||||||
|
vdupq_n_s32((int32_t)(matrix0[matrix0_index]) << 15);
|
||||||
|
int32x4_t matrix1_32x4 =
|
||||||
|
vshlq_s32(vld1q_s32(&matrix1[matrix1_index]), shift32x4);
|
||||||
|
int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4);
|
||||||
|
sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4);
|
||||||
|
matrix1_index += matrix1_index_step;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
vst1q_s32(&matrix_product[matrix_prod_index], sum_32x4);
|
||||||
|
matrix_prod_index += 4;
|
||||||
|
}
|
||||||
|
if (mid_loop_count % 4 > 1) {
|
||||||
|
// Initialize sum_32x2 to zeros.
|
||||||
|
int32x2_t sum_32x2 = veor_s32(sum_32x2, sum_32x2);
|
||||||
|
matrix1_index = k;
|
||||||
|
k += 2;
|
||||||
|
matrix0_index = matrix0_index_factor1 * j;
|
||||||
|
for (n = 0; n < inner_loop_count; n++) {
|
||||||
|
int32x2_t matrix0_32x2 =
|
||||||
|
vdup_n_s32((int32_t)(matrix0[matrix0_index]) << 15);
|
||||||
|
int32x2_t matrix1_32x2 =
|
||||||
|
vshl_s32(vld1_s32(&matrix1[matrix1_index]), shift32x2);
|
||||||
|
int32x2_t multi_32x2 = vqdmulh_s32(matrix0_32x2, matrix1_32x2);
|
||||||
|
sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
|
||||||
|
matrix1_index += matrix1_index_step;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
vst1_s32(&matrix_product[matrix_prod_index], sum_32x2);
|
||||||
|
matrix_prod_index += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (matrix1_index_init_case == 0 && matrix0_index_factor1 == 1) {
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
matrix_prod_index = mid_loop_count * j;
|
||||||
|
for (k = 0; k < (mid_loop_count >> 2) << 2; k += 4) {
|
||||||
|
// Initialize sum_32x4 to zeros.
|
||||||
|
int32x4_t sum_32x4 = veorq_s32(sum_32x4, sum_32x4);
|
||||||
|
matrix1_index = matrix1_index_factor1 * j;
|
||||||
|
matrix0_index = k;
|
||||||
|
for (n = 0; n < inner_loop_count; n++) {
|
||||||
|
int32x4_t matrix1_32x4 = vdupq_n_s32(matrix1[matrix1_index] << shift);
|
||||||
|
int32x4_t matrix0_32x4 =
|
||||||
|
vshll_n_s16(vld1_s16(&matrix0[matrix0_index]), 15);
|
||||||
|
int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4);
|
||||||
|
sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4);
|
||||||
|
matrix1_index += matrix1_index_step;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
vst1q_s32(&matrix_product[matrix_prod_index], sum_32x4);
|
||||||
|
matrix_prod_index += 4;
|
||||||
|
}
|
||||||
|
if (mid_loop_count % 4 > 1) {
|
||||||
|
// Initialize sum_32x2 to zeros.
|
||||||
|
int32x2_t sum_32x2 = veor_s32(sum_32x2, sum_32x2);
|
||||||
|
matrix1_index = matrix1_index_factor1 * j;
|
||||||
|
matrix0_index = k;
|
||||||
|
for (n = 0; n < inner_loop_count; n++) {
|
||||||
|
int32x2_t multi_32x2;
|
||||||
|
int32x2_t matrix1_32x2 = vdup_n_s32(matrix1[matrix1_index] << shift);
|
||||||
|
int32x2_t matrix0_32x2 =
|
||||||
|
vset_lane_s32((int32_t)matrix0[matrix0_index], matrix0_32x2, 0);
|
||||||
|
matrix0_32x2 = vset_lane_s32((int32_t)matrix0[matrix0_index + 1],
|
||||||
|
matrix0_32x2, 1);
|
||||||
|
matrix0_32x2 = vshl_n_s32(matrix0_32x2, 15);
|
||||||
|
multi_32x2 = vqdmulh_s32(matrix1_32x2, matrix0_32x2);
|
||||||
|
sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
|
||||||
|
matrix1_index += matrix1_index_step;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
vst1_s32(&matrix_product[matrix_prod_index], sum_32x2);
|
||||||
|
matrix_prod_index += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (matrix1_index_init_case == 0 &&
|
||||||
|
matrix1_index_step == 1 &&
|
||||||
|
matrix0_index_step == 1) {
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
matrix_prod_index = mid_loop_count * j;
|
||||||
|
for (k = 0; k < mid_loop_count; k++) {
|
||||||
|
int32x2_t sum_32x2;
|
||||||
|
// Initialize sum_32x4 to zeros.
|
||||||
|
int32x4_t sum_32x4 = veorq_s32(sum_32x4, sum_32x4);
|
||||||
|
matrix1_index = matrix1_index_factor1 * j;
|
||||||
|
matrix0_index = matrix0_index_factor1 * k;
|
||||||
|
for (n = 0; n < (inner_loop_count >> 2) << 2; n += 4) {
|
||||||
|
int32x4_t matrix1_32x4 =
|
||||||
|
vshlq_s32(vld1q_s32(&matrix1[matrix1_index]), shift32x4);
|
||||||
|
int32x4_t matrix0_32x4 =
|
||||||
|
vshll_n_s16(vld1_s16(&matrix0[matrix0_index]), 15);
|
||||||
|
int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4);
|
||||||
|
sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4);
|
||||||
|
matrix1_index += 4;
|
||||||
|
matrix0_index += 4;
|
||||||
|
}
|
||||||
|
sum_32x2 = vqadd_s32(vget_low_s32(sum_32x4), vget_high_s32(sum_32x4));
|
||||||
|
if (inner_loop_count % 4 > 1) {
|
||||||
|
int32x2_t multi_32x2;
|
||||||
|
int32x2_t matrix1_32x2 =
|
||||||
|
vshl_s32(vld1_s32(&matrix1[matrix1_index]), shift32x2);
|
||||||
|
int32x2_t matrix0_32x2 =
|
||||||
|
vset_lane_s32((int32_t)matrix0[matrix0_index], matrix0_32x2, 0);
|
||||||
|
matrix0_32x2 = vset_lane_s32((int32_t)matrix0[matrix0_index + 1],
|
||||||
|
matrix0_32x2, 1);
|
||||||
|
matrix0_32x2 = vshl_n_s32(matrix0_32x2, 15);
|
||||||
|
multi_32x2 = vqdmulh_s32(matrix1_32x2, matrix0_32x2);
|
||||||
|
sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
|
||||||
|
}
|
||||||
|
sum_32x2 = vpadd_s32(sum_32x2, sum_32x2);
|
||||||
|
vst1_lane_s32(&matrix_product[matrix_prod_index], sum_32x2, 0);
|
||||||
|
matrix_prod_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
matrix_prod_index = mid_loop_count * j;
|
||||||
|
for (k=0; k < mid_loop_count; k++) {
|
||||||
|
int32_t sum32 = 0;
|
||||||
|
matrix1_index = matrix1_index_factor1 * (*matrix1_index_factor2);
|
||||||
|
matrix0_index = matrix0_index_factor1 * (*matrix0_index_factor2);
|
||||||
|
for (n = 0; n < inner_loop_count; n++) {
|
||||||
|
sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
|
||||||
|
matrix1[matrix1_index] << shift));
|
||||||
|
matrix1_index += matrix1_index_step;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
matrix_product[matrix_prod_index] = sum32;
|
||||||
|
matrix_prod_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[],
|
||||||
|
const int32_t matrix1[],
|
||||||
|
int32_t matrix_product[],
|
||||||
|
const int matrix0_index_factor,
|
||||||
|
const int matrix0_index_step) {
|
||||||
|
int j = 0, n = 0;
|
||||||
|
int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0;
|
||||||
|
for (j = 0; j < SUBFRAMES; j++) {
|
||||||
|
// Initialize sum_32x2 to zeros.
|
||||||
|
int32x2_t sum_32x2 = veor_s32(sum_32x2, sum_32x2);
|
||||||
|
matrix1_index = 0;
|
||||||
|
matrix0_index = matrix0_index_factor * j;
|
||||||
|
for (n = SUBFRAMES; n > 0; n--) {
|
||||||
|
int32x2_t matrix0_32x2 =
|
||||||
|
vdup_n_s32((int32_t)(matrix0[matrix0_index]) << 15);
|
||||||
|
int32x2_t matrix1_32x2 = vld1_s32(&matrix1[matrix1_index]);
|
||||||
|
int32x2_t multi_32x2 = vqdmulh_s32(matrix0_32x2, matrix1_32x2);
|
||||||
|
sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
|
||||||
|
matrix1_index += 2;
|
||||||
|
matrix0_index += matrix0_index_step;
|
||||||
|
}
|
||||||
|
sum_32x2 = vshr_n_s32(sum_32x2, 3);
|
||||||
|
vst1_s32(&matrix_product[matrix_prod_index], sum_32x2);
|
||||||
|
matrix_prod_index += 2;
|
||||||
|
}
|
||||||
|
}
|
@ -31,6 +31,8 @@
|
|||||||
FilterMaLoopFix WebRtcIsacfix_FilterMaLoopFix;
|
FilterMaLoopFix WebRtcIsacfix_FilterMaLoopFix;
|
||||||
Spec2Time WebRtcIsacfix_Spec2Time;
|
Spec2Time WebRtcIsacfix_Spec2Time;
|
||||||
Time2Spec WebRtcIsacfix_Time2Spec;
|
Time2Spec WebRtcIsacfix_Time2Spec;
|
||||||
|
MatrixProduct1 WebRtcIsacfix_MatrixProduct1;
|
||||||
|
MatrixProduct2 WebRtcIsacfix_MatrixProduct2;
|
||||||
|
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
* WebRtcIsacfix_AssignSize(...)
|
* WebRtcIsacfix_AssignSize(...)
|
||||||
@ -192,6 +194,8 @@ static void WebRtcIsacfix_InitNeon(void) {
|
|||||||
WebRtcIsacfix_CalculateResidualEnergyNeon;
|
WebRtcIsacfix_CalculateResidualEnergyNeon;
|
||||||
WebRtcIsacfix_AllpassFilter2FixDec16 =
|
WebRtcIsacfix_AllpassFilter2FixDec16 =
|
||||||
WebRtcIsacfix_AllpassFilter2FixDec16Neon;
|
WebRtcIsacfix_AllpassFilter2FixDec16Neon;
|
||||||
|
WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1Neon;
|
||||||
|
WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2Neon;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -281,6 +285,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
|
|||||||
WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C;
|
WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C;
|
||||||
WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecC;
|
WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecC;
|
||||||
WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeC;
|
WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeC;
|
||||||
|
WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1C;
|
||||||
|
WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2C ;
|
||||||
|
|
||||||
#ifdef WEBRTC_DETECT_ARM_NEON
|
#ifdef WEBRTC_DETECT_ARM_NEON
|
||||||
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
|
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
|
||||||
|
@ -97,6 +97,7 @@
|
|||||||
'<(webrtc_root)/common_audio/common_audio.gyp:signal_processing',
|
'<(webrtc_root)/common_audio/common_audio.gyp:signal_processing',
|
||||||
],
|
],
|
||||||
'sources': [
|
'sources': [
|
||||||
|
'entropy_coding_neon.c',
|
||||||
'filterbanks_neon.S',
|
'filterbanks_neon.S',
|
||||||
'filters_neon.S',
|
'filters_neon.S',
|
||||||
'lattice_neon.S',
|
'lattice_neon.S',
|
||||||
|
Loading…
Reference in New Issue
Block a user