From 8fe5f32ccc1673757a00da97ad135d721bc9cd7c Mon Sep 17 00:00:00 2001
From: "kma@webrtc.org" <kma@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Mon, 6 Aug 2012 20:19:56 +0000
Subject: [PATCH] Refactor three signal processing library files. WebRTC issue
 545 is solved by the way. Review URL:
 https://webrtc-codereview.appspot.com/692007

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2562 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../signal_processing/auto_correlation.c      | 165 +++------
 .../signal_processing/cross_correlation.c     | 265 +--------------
 .../dot_product_with_scale.c                  |  95 +-----
 .../include/signal_processing_library.h       | 313 ++++++++----------
 .../signal_processing_unittest.cc             |  55 ++-
 5 files changed, 264 insertions(+), 629 deletions(-)

diff --git a/src/common_audio/signal_processing/auto_correlation.c b/src/common_audio/signal_processing/auto_correlation.c
index a00fde4bc..bd954cff9 100644
--- a/src/common_audio/signal_processing/auto_correlation.c
+++ b/src/common_audio/signal_processing/auto_correlation.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -8,134 +8,61 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
-/*
- * This file contains the function WebRtcSpl_AutoCorrelation().
- * The description header can be found in signal_processing_library.h
- *
- */
-
 #include "signal_processing_library.h"
 
-int WebRtcSpl_AutoCorrelation(G_CONST WebRtc_Word16* in_vector,
+int WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
                               int in_vector_length,
                               int order,
-                              WebRtc_Word32* result,
-                              int* scale)
-{
-    WebRtc_Word32 sum;
-    int i, j;
-    WebRtc_Word16 smax; // Sample max
-    G_CONST WebRtc_Word16* xptr1;
-    G_CONST WebRtc_Word16* xptr2;
-    WebRtc_Word32* resultptr;
-    int scaling = 0;
+                              int32_t* result,
+                              int* scale) {
+  int32_t sum = 0;
+  int i = 0, j = 0;
+  int16_t smax = 0;
+  int scaling = 0;
 
-#ifdef _ARM_OPT_
-#pragma message("NOTE: _ARM_OPT_ optimizations are used")
-    WebRtc_Word16 loops4;
-#endif
+  if (order > in_vector_length) {
+    /* Undefined */
+    return -1;
+  } else if (order < 0) {
+    order = in_vector_length;
+  }
 
-    if (order < 0)
-        order = in_vector_length;
+  // Find the maximum absolute value of the samples.
+  smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
 
-    // Find the max. sample
-    smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
-
-    // In order to avoid overflow when computing the sum we should scale the samples so that
-    // (in_vector_length * smax * smax) will not overflow.
-
-    if (smax == 0)
-    {
-        scaling = 0;
-    } else
-    {
-        int nbits = WebRtcSpl_GetSizeInBits(in_vector_length); // # of bits in the sum loop
-        int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); // # of bits to normalize smax
-
-        if (t > nbits)
-        {
-            scaling = 0;
-        } else
-        {
-            scaling = nbits - t;
-        }
+  // In order to avoid overflow when computing the sum we should scale the
+  // samples so that (in_vector_length * smax * smax) will not overflow.
+  if (smax == 0) {
+    scaling = 0;
+  } else {
+    // Number of bits in the sum loop.
+    int nbits = WebRtcSpl_GetSizeInBits(in_vector_length);
+    // Number of bits to normalize smax.
+    int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
 
+    if (t > nbits) {
+      scaling = 0;
+    } else {
+      scaling = nbits - t;
     }
+  }
 
-    resultptr = result;
-
-    // Perform the actual correlation calculation
-    for (i = 0; i < order + 1; i++)
-    {
-        int loops = (in_vector_length - i);
-        sum = 0;
-        xptr1 = in_vector;
-        xptr2 = &in_vector[i];
-#ifndef _ARM_OPT_
-        for (j = loops; j > 0; j--)
-        {
-            sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1++, *xptr2++, scaling);
-        }
-#else
-        loops4 = (loops >> 2) << 2;
-
-        if (scaling == 0)
-        {
-            for (j = 0; j < loops4; j = j + 4)
-            {
-                sum += WEBRTC_SPL_MUL_16_16(*xptr1, *xptr2);
-                xptr1++;
-                xptr2++;
-                sum += WEBRTC_SPL_MUL_16_16(*xptr1, *xptr2);
-                xptr1++;
-                xptr2++;
-                sum += WEBRTC_SPL_MUL_16_16(*xptr1, *xptr2);
-                xptr1++;
-                xptr2++;
-                sum += WEBRTC_SPL_MUL_16_16(*xptr1, *xptr2);
-                xptr1++;
-                xptr2++;
-            }
-
-            for (j = loops4; j < loops; j++)
-            {
-                sum += WEBRTC_SPL_MUL_16_16(*xptr1, *xptr2);
-                xptr1++;
-                xptr2++;
-            }
-        }
-        else
-        {
-            for (j = 0; j < loops4; j = j + 4)
-            {
-                sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1, *xptr2, scaling);
-                xptr1++;
-                xptr2++;
-                sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1, *xptr2, scaling);
-                xptr1++;
-                xptr2++;
-                sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1, *xptr2, scaling);
-                xptr1++;
-                xptr2++;
-                sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1, *xptr2, scaling);
-                xptr1++;
-                xptr2++;
-            }
-
-            for (j = loops4; j < loops; j++)
-            {
-                sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1, *xptr2, scaling);
-                xptr1++;
-                xptr2++;
-            }
-        }
-
-#endif
-        *resultptr++ = sum;
+  // Perform the actual correlation calculation.
+  for (i = 0; i < order + 1; i++) {
+    sum = 0;
+    /* Unroll the loop to improve performance. */
+    for (j = 0; j < in_vector_length - i - 3; j += 4) {
+      sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
+      sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
+      sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
+      sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
     }
+    for (; j < in_vector_length - i; j++) {
+      sum += (in_vector[j] * in_vector[i + j]) >> scaling;
+    }
+    *result++ = sum;
+  }
 
-    *scale = scaling;
-
-    return order + 1;
+  *scale = scaling;
+  return order + 1;
 }
diff --git a/src/common_audio/signal_processing/cross_correlation.c b/src/common_audio/signal_processing/cross_correlation.c
index 39eeb3cc3..cf7705c74 100644
--- a/src/common_audio/signal_processing/cross_correlation.c
+++ b/src/common_audio/signal_processing/cross_correlation.c
@@ -8,258 +8,23 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
-/*
- * This file contains the function WebRtcSpl_CrossCorrelation().
- * The description header can be found in signal_processing_library.h
- *
- */
-
-/* TODO(kma): Clean up the code in this file, and break it up for
- * various platforms (Xscale, ARM/Neon etc.).
- */
-
 #include "signal_processing_library.h"
 
-void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation, WebRtc_Word16* seq1,
-                                WebRtc_Word16* seq2, WebRtc_Word16 dim_seq,
-                                WebRtc_Word16 dim_cross_correlation,
-                                WebRtc_Word16 right_shifts,
-                                WebRtc_Word16 step_seq2)
-{
-    int i, j;
-    WebRtc_Word16* seq1Ptr;
-    WebRtc_Word16* seq2Ptr;
-    WebRtc_Word32* CrossCorrPtr;
-
-#ifdef _XSCALE_OPT_
-
-#ifdef _WIN32
-#pragma message("NOTE: _XSCALE_OPT_ optimizations are used (overrides _ARM_OPT_ and requires /QRxscale compiler flag)")
-#endif
-
-    __int64 macc40;
-
-    int iseq1[250];
-    int iseq2[250];
-    int iseq3[250];
-    int * iseq1Ptr;
-    int * iseq2Ptr;
-    int * iseq3Ptr;
-    int len, i_len;
-
-    seq1Ptr = seq1;
-    iseq1Ptr = iseq1;
-    for(i = 0; i < ((dim_seq + 1) >> 1); i++)
-    {
-        *iseq1Ptr = (unsigned short)*seq1Ptr++;
-        *iseq1Ptr++ |= (WebRtc_Word32)*seq1Ptr++ << 16;
+void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
+                                const int16_t* seq1,
+                                const int16_t* seq2,
+                                int16_t dim_seq,
+                                int16_t dim_cross_correlation,
+                                int16_t right_shifts,
+                                int16_t step_seq2) {
+  int i = 0, j = 0;
 
+  for (i = 0; i < dim_cross_correlation; i++) {
+    *cross_correlation = 0;
+    /* Unrolling doesn't seem to improve performance. */
+    for (j = 0; j < dim_seq; j++) {
+      *cross_correlation += (seq1[j] * seq2[step_seq2 * i + j]) >> right_shifts;
     }
-
-    if(dim_seq%2)
-    {
-        *(iseq1Ptr-1) &= 0x0000ffff;
-    }
-    *iseq1Ptr = 0;
-    iseq1Ptr++;
-    *iseq1Ptr = 0;
-    iseq1Ptr++;
-    *iseq1Ptr = 0;
-
-    if(step_seq2 < 0)
-    {
-        seq2Ptr = seq2 - dim_cross_correlation + 1;
-        CrossCorrPtr = &cross_correlation[dim_cross_correlation - 1];
-    }
-    else
-    {
-        seq2Ptr = seq2;
-        CrossCorrPtr = cross_correlation;
-    }
-
-    len = dim_seq + dim_cross_correlation - 1;
-    i_len = (len + 1) >> 1;
-    iseq2Ptr = iseq2;
-
-    iseq3Ptr = iseq3;
-    for(i = 0; i < i_len; i++)
-    {
-        *iseq2Ptr = (unsigned short)*seq2Ptr++;
-        *iseq3Ptr = (unsigned short)*seq2Ptr;
-        *iseq2Ptr++ |= (WebRtc_Word32)*seq2Ptr++ << 16;
-        *iseq3Ptr++ |= (WebRtc_Word32)*seq2Ptr << 16;
-    }
-
-    if(len % 2)
-    {
-        iseq2[i_len - 1] &= 0x0000ffff;
-        iseq3[i_len - 1] = 0;
-    }
-    else
-    iseq3[i_len - 1] &= 0x0000ffff;
-
-    iseq2[i_len] = 0;
-    iseq3[i_len] = 0;
-    iseq2[i_len + 1] = 0;
-    iseq3[i_len + 1] = 0;
-    iseq2[i_len + 2] = 0;
-    iseq3[i_len + 2] = 0;
-
-    // Set pointer to start value
-    iseq2Ptr = iseq2;
-    iseq3Ptr = iseq3;
-
-    i_len = (dim_seq + 7) >> 3;
-    for (i = 0; i < dim_cross_correlation; i++)
-    {
-
-        iseq1Ptr = iseq1;
-
-        macc40 = 0;
-
-        _WriteCoProcessor(macc40, 0);
-
-        if((i & 1))
-        {
-            iseq3Ptr = iseq3 + (i >> 1);
-            for (j = i_len; j > 0; j--)
-            {
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
-            }
-        }
-        else
-        {
-            iseq2Ptr = iseq2 + (i >> 1);
-            for (j = i_len; j > 0; j--)
-            {
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
-                _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
-            }
-
-        }
-
-        macc40 = _ReadCoProcessor(0);
-        *CrossCorrPtr = (WebRtc_Word32)(macc40 >> right_shifts);
-        CrossCorrPtr += step_seq2;
-    }
-#else // #ifdef _XSCALE_OPT_
-#ifdef _ARM_OPT_
-    WebRtc_Word16 dim_seq8 = (dim_seq >> 3) << 3;
-#endif
-
-    CrossCorrPtr = cross_correlation;
-
-    for (i = 0; i < dim_cross_correlation; i++)
-    {
-        // Set the pointer to the static vector, set the pointer to the sliding vector
-        // and initialize cross_correlation
-        seq1Ptr = seq1;
-        seq2Ptr = seq2 + (step_seq2 * i);
-        (*CrossCorrPtr) = 0;
-
-#ifndef _ARM_OPT_ 
-        // Perform the cross correlation
-        for (j = 0; j < dim_seq; j++)
-        {
-            (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), right_shifts);
-            seq1Ptr++;
-            seq2Ptr++;
-        }
-#else
-        if (right_shifts == 0)
-        {
-            // Perform the optimized cross correlation
-            for (j = 0; j < dim_seq8; j = j + 8)
-            {
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-            }
-
-            for (j = dim_seq8; j < dim_seq; j++)
-            {
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
-                seq1Ptr++;
-                seq2Ptr++;
-            }
-        }
-        else // right_shifts != 0
-
-        {
-            // Perform the optimized cross correlation
-            for (j = 0; j < dim_seq8; j = j + 8)
-            {
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-            }
-
-            for (j = dim_seq8; j < dim_seq; j++)
-            {
-                (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
-                                                             right_shifts);
-                seq1Ptr++;
-                seq2Ptr++;
-            }
-        }
-#endif
-        CrossCorrPtr++;
-    }
-#endif
+    cross_correlation++;
+  }
 }
diff --git a/src/common_audio/signal_processing/dot_product_with_scale.c b/src/common_audio/signal_processing/dot_product_with_scale.c
index 6e085fdb6..486826096 100644
--- a/src/common_audio/signal_processing/dot_product_with_scale.c
+++ b/src/common_audio/signal_processing/dot_product_with_scale.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
@@ -8,84 +8,25 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
-/*
- * This file contains the function WebRtcSpl_DotProductWithScale().
- * The description header can be found in signal_processing_library.h
- *
- */
-
 #include "signal_processing_library.h"
 
-WebRtc_Word32 WebRtcSpl_DotProductWithScale(WebRtc_Word16 *vector1, WebRtc_Word16 *vector2,
-                                            int length, int scaling)
-{
-    WebRtc_Word32 sum;
-    int i;
-#ifdef _ARM_OPT_
-#pragma message("NOTE: _ARM_OPT_ optimizations are used")
-    WebRtc_Word16 len4 = (length >> 2) << 2;
-#endif
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      int length,
+                                      int scaling) {
+  int32_t sum = 0;
+  int i = 0;
 
-    sum = 0;
+  /* Unroll the loop to improve performance. */
+  for (i = 0; i < length - 3; i += 4) {
+    sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
+    sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
+    sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
+    sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
+  }
+  for (; i < length; i++) {
+    sum += (vector1[i] * vector2[i]) >> scaling;
+  }
 
-#ifndef _ARM_OPT_
-    for (i = 0; i < length; i++)
-    {
-        sum += WEBRTC_SPL_MUL_16_16_RSFT(*vector1++, *vector2++, scaling);
-    }
-#else
-    if (scaling == 0)
-    {
-        for (i = 0; i < len4; i = i + 4)
-        {
-            sum += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
-            vector1++;
-            vector2++;
-            sum += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
-            vector1++;
-            vector2++;
-            sum += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
-            vector1++;
-            vector2++;
-            sum += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
-            vector1++;
-            vector2++;
-        }
-
-        for (i = len4; i < length; i++)
-        {
-            sum += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
-            vector1++;
-            vector2++;
-        }
-    }
-    else
-    {
-        for (i = 0; i < len4; i = i + 4)
-        {
-            sum += WEBRTC_SPL_MUL_16_16_RSFT(*vector1, *vector2, scaling);
-            vector1++;
-            vector2++;
-            sum += WEBRTC_SPL_MUL_16_16_RSFT(*vector1, *vector2, scaling);
-            vector1++;
-            vector2++;
-            sum += WEBRTC_SPL_MUL_16_16_RSFT(*vector1, *vector2, scaling);
-            vector1++;
-            vector2++;
-            sum += WEBRTC_SPL_MUL_16_16_RSFT(*vector1, *vector2, scaling);
-            vector1++;
-            vector2++;
-        }
-
-        for (i = len4; i < length; i++)
-        {
-            sum += WEBRTC_SPL_MUL_16_16_RSFT(*vector1, *vector2, scaling);
-            vector1++;
-            vector2++;
-        }
-    }
-#endif
-
-    return sum;
+  return sum;
 }
diff --git a/src/common_audio/signal_processing/include/signal_processing_library.h b/src/common_audio/signal_processing/include/signal_processing_library.h
index 4bcf68af9..38b89cb26 100644
--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@@ -419,32 +419,146 @@ void WebRtcSpl_AffineTransformVector(WebRtc_Word16* out_vector,
                                      int vector_length);
 // End: iLBC specific functions.
 
-// Signal processing operations. Descriptions at bottom of this file.
-int WebRtcSpl_AutoCorrelation(G_CONST WebRtc_Word16* vector,
-                              int vector_length, int order,
-                              WebRtc_Word32* result_vector,
+// Signal processing operations.
+
+// A 32-bit fix-point implementation of auto-correlation computation
+//
+// Input:
+//      - in_vector        : Vector to calculate autocorrelation upon
+//      - in_vector_length : Length (in samples) of |vector|
+//      - order            : The order up to which the autocorrelation should be
+//                           calculated
+//
+// Output:
+//      - result           : auto-correlation values (values should be seen
+//                           relative to each other since the absolute values
+//                           might have been down shifted to avoid overflow)
+//
+//      - scale            : The number of left shifts required to obtain the
+//                           auto-correlation in Q0
+//
+// Return value            :
+//      - -1, if |order| > |in_vector_length|;
+//      - Number of samples in |result|, i.e. (order+1), otherwise.
+int WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
+                              int in_vector_length,
+                              int order,
+                              int32_t* result,
                               int* scale);
+
+// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
+// does NOT use the 64 bit class
+//
+// Input:
+//      - auto_corr : Vector with autocorrelation values of length >=
+//                    |use_order|+1
+//      - use_order : The LPC filter order (support up to order 20)
+//
+// Output:
+//      - lpc_coef  : lpc_coef[0..use_order] LPC coefficients in Q12
+//      - refl_coef : refl_coef[0...use_order-1]| Reflection coefficients in
+//                    Q15
+//
+// Return value     : 1 for stable 0 for unstable
 WebRtc_Word16 WebRtcSpl_LevinsonDurbin(WebRtc_Word32* auto_corr,
                                        WebRtc_Word16* lpc_coef,
                                        WebRtc_Word16* refl_coef,
                                        WebRtc_Word16 order);
+
+// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
+// This version is a 16 bit operation.
+//
+// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
+// "slightly unstable" filter (i.e., a pole just outside the unit circle) in
+// "rare" cases even if the reflection coefficients are stable.
+//
+// Input:
+//      - refl_coef : Reflection coefficients in Q15 that should be converted
+//                    to LPC coefficients
+//      - use_order : Number of coefficients in |refl_coef|
+//
+// Output:
+//      - lpc_coef  : LPC coefficients in Q12
 void WebRtcSpl_ReflCoefToLpc(G_CONST WebRtc_Word16* refl_coef,
                              int use_order,
                              WebRtc_Word16* lpc_coef);
+
+// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
+// This version is a 16 bit operation.
+// The conversion is implemented by the step-down algorithm.
+//
+// Input:
+//      - lpc_coef  : LPC coefficients in Q12, that should be converted to
+//                    reflection coefficients
+//      - use_order : Number of coefficients in |lpc_coef|
+//
+// Output:
+//      - refl_coef : Reflection coefficients in Q15.
 void WebRtcSpl_LpcToReflCoef(WebRtc_Word16* lpc_coef,
                              int use_order,
                              WebRtc_Word16* refl_coef);
+
+// Calculates reflection coefficients (16 bit) from auto-correlation values
+//
+// Input:
+//      - auto_corr : Auto-correlation values
+//      - use_order : Number of coefficients wanted be calculated
+//
+// Output:
+//      - refl_coef : Reflection coefficients in Q15.
 void WebRtcSpl_AutoCorrToReflCoef(G_CONST WebRtc_Word32* auto_corr,
                                   int use_order,
                                   WebRtc_Word16* refl_coef);
-void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_corr,
-                                WebRtc_Word16* vector1,
-                                WebRtc_Word16* vector2,
-                                WebRtc_Word16 dim_vector,
-                                WebRtc_Word16 dim_cross_corr,
-                                WebRtc_Word16 right_shifts,
-                                WebRtc_Word16 step_vector2);
+
+// Calculates the cross-correlation between two sequences |seq1| and |seq2|.
+// |seq1| is fixed and |seq2| slides as the pointer is increased with the
+// amount |step_seq2|. Note the arguments should obey the relationship:
+// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
+//      buffer size of |seq2|
+//
+// Input:
+//      - seq1           : First sequence (fixed throughout the correlation)
+//      - seq2           : Second sequence (slides |step_vector2| for each
+//                            new correlation)
+//      - dim_seq        : Number of samples to use in the cross-correlation
+//      - dim_cross_correlation : Number of cross-correlations to calculate (the
+//                            start position for |vector2| is updated for each
+//                            new one)
+//      - right_shifts   : Number of right bit shifts to use. This will
+//                            become the output Q-domain.
+//      - step_seq2      : How many (positive or negative) steps the
+//                            |vector2| pointer should be updated for each new
+//                            cross-correlation value.
+//
+// Output:
+//      - cross_correlation : The cross-correlation in Q(-right_shifts)
+void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
+                                const int16_t* seq1,
+                                const int16_t* seq2,
+                                int16_t dim_seq,
+                                int16_t dim_cross_correlation,
+                                int16_t right_shifts,
+                                int16_t step_seq2);
+
+// Creates (the first half of) a Hanning window. Size must be at least 1 and
+// at most 512.
+//
+// Input:
+//      - size      : Length of the requested Hanning window (1 to 512)
+//
+// Output:
+//      - window    : Hanning vector in Q14.
 void WebRtcSpl_GetHanningWindow(WebRtc_Word16* window, WebRtc_Word16 size);
+
+// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
+// |in_vector|. Input and output values are in Q15.
+//
+// Inputs:
+//      - in_vector     : Values to calculate sqrt(1 - x^2) of
+//      - vector_length : Length of vector |in_vector|
+//
+// Output:
+//      - out_vector    : Output values in Q15
 void WebRtcSpl_SqrtOfOneMinusXSquared(WebRtc_Word16* in_vector,
                                       int vector_length,
                                       WebRtc_Word16* out_vector);
@@ -478,10 +592,21 @@ WebRtc_Word32 WebRtcSpl_Energy(WebRtc_Word16* vector,
                                int vector_length,
                                int* scale_factor);
 
-WebRtc_Word32 WebRtcSpl_DotProductWithScale(WebRtc_Word16* vector1,
-                                            WebRtc_Word16* vector2,
-                                            int vector_length,
-                                            int scaling);
+// Calculates the dot product between two (WebRtc_Word16) vectors.
+//
+// Input:
+//      - vector1       : Vector 1
+//      - vector2       : Vector 2
+//      - vector_length : Number of samples used in the dot product
+//      - scaling       : The number of right bit shifts to apply on each term
+//                        during calculation to avoid overflow, i.e., the
+//                        output will be in Q(-|scaling|)
+//
+// Return value         : The dot product in Q(-scaling)
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      int length,
+                                      int scaling);
 
 // Filter operations.
 int WebRtcSpl_FilterAR(G_CONST WebRtc_Word16* ar_coef, int ar_coef_length,
@@ -1116,147 +1241,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 //      - out_vector    : Vector with the output
 //
 
-//
-// WebRtcSpl_AutoCorrelation(...)
-//
-// A 32-bit fix-point implementation of auto-correlation computation
-//
-// Input:
-//      - vector        : Vector to calculate autocorrelation upon
-//      - vector_length : Length (in samples) of |vector|
-//      - order         : The order up to which the autocorrelation should be
-//                        calculated
-//
-// Output:
-//      - result_vector : auto-correlation values (values should be seen
-//                        relative to each other since the absolute values
-//                        might have been down shifted to avoid overflow)
-//
-//      - scale         : The number of left shifts required to obtain the
-//                        auto-correlation in Q0
-//
-// Return value         : Number of samples in |result_vector|, i.e., (order+1)
-//
-
-//
-// WebRtcSpl_LevinsonDurbin(...)
-//
-// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
-// does NOT use the 64 bit class
-//
-// Input:
-//      - auto_corr : Vector with autocorrelation values of length >=
-//                    |use_order|+1
-//      - use_order : The LPC filter order (support up to order 20)
-//
-// Output:
-//      - lpc_coef  : lpc_coef[0..use_order] LPC coefficients in Q12
-//      - refl_coef : refl_coef[0...use_order-1]| Reflection coefficients in
-//                    Q15
-//
-// Return value     : 1 for stable 0 for unstable
-//
-
-//
-// WebRtcSpl_ReflCoefToLpc(...)
-//
-// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
-// This version is a 16 bit operation.
-//
-// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
-// "slightly unstable" filter (i.e., a pole just outside the unit circle) in
-// "rare" cases even if the reflection coefficients are stable.
-//
-// Input:
-//      - refl_coef : Reflection coefficients in Q15 that should be converted
-//                    to LPC coefficients
-//      - use_order : Number of coefficients in |refl_coef|
-//
-// Output:
-//      - lpc_coef  : LPC coefficients in Q12
-//
-
-//
-// WebRtcSpl_LpcToReflCoef(...)
-//
-// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
-// This version is a 16 bit operation.
-// The conversion is implemented by the step-down algorithm.
-//
-// Input:
-//      - lpc_coef  : LPC coefficients in Q12, that should be converted to
-//                    reflection coefficients
-//      - use_order : Number of coefficients in |lpc_coef|
-//
-// Output:
-//      - refl_coef : Reflection coefficients in Q15.
-//
-
-//
-// WebRtcSpl_AutoCorrToReflCoef(...)
-//
-// Calculates reflection coefficients (16 bit) from auto-correlation values
-//
-// Input:
-//      - auto_corr : Auto-correlation values
-//      - use_order : Number of coefficients wanted be calculated
-//
-// Output:
-//      - refl_coef : Reflection coefficients in Q15.
-//
-
-//
-// WebRtcSpl_CrossCorrelation(...)
-//
-// Calculates the cross-correlation between two sequences |vector1| and
-// |vector2|. |vector1| is fixed and |vector2| slides as the pointer is
-// increased with the amount |step_vector2|
-//
-// Input:
-//      - vector1           : First sequence (fixed throughout the correlation)
-//      - vector2           : Second sequence (slides |step_vector2| for each
-//                            new correlation)
-//      - dim_vector        : Number of samples to use in the cross-correlation
-//      - dim_cross_corr    : Number of cross-correlations to calculate (the
-//                            start position for |vector2| is updated for each
-//                            new one)
-//      - right_shifts      : Number of right bit shifts to use. This will
-//                            become the output Q-domain.
-//      - step_vector2      : How many (positive or negative) steps the
-//                            |vector2| pointer should be updated for each new
-//                            cross-correlation value.
-//
-// Output:
-//      - cross_corr        : The cross-correlation in Q(-right_shifts)
-//
-
-//
-// WebRtcSpl_GetHanningWindow(...)
-//
-// Creates (the first half of) a Hanning window. Size must be at least 1 and
-// at most 512.
-//
-// Input:
-//      - size      : Length of the requested Hanning window (1 to 512)
-//
-// Output:
-//      - window    : Hanning vector in Q14.
-//
-
-//
-// WebRtcSpl_SqrtOfOneMinusXSquared(...)
-//
-// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
-// |in_vector|. Input and output values are in Q15.
-//
-// Inputs:
-//      - in_vector     : Values to calculate sqrt(1 - x^2) of
-//      - vector_length : Length of vector |in_vector|
-//
-// Output:
-//      - out_vector    : Output values in Q15
-//
-
 //
 // WebRtcSpl_IncreaseSeed(...)
 //
@@ -1492,23 +1476,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 //      - out_vector        : Filtered samples
 //
 
-
-//
-// WebRtcSpl_DotProductWithScale(...)
-//
-// Calculates the dot product between two (WebRtc_Word16) vectors
-//
-// Input:
-//      - vector1       : Vector 1
-//      - vector2       : Vector 2
-//      - vector_length : Number of samples used in the dot product
-//      - scaling       : The number of right bit shifts to apply on each term
-//                        during calculation to avoid overflow, i.e., the
-//                        output will be in Q(-|scaling|)
-//
-// Return value         : The dot product in Q(-scaling)
-//
-
 //
 // WebRtcSpl_ComplexIFFT(...)
 //
diff --git a/src/common_audio/signal_processing/signal_processing_unittest.cc b/src/common_audio/signal_processing/signal_processing_unittest.cc
index f6d11233d..1a371613d 100644
--- a/src/common_audio/signal_processing/signal_processing_unittest.cc
+++ b/src/common_audio/signal_processing/signal_processing_unittest.cc
@@ -11,6 +11,10 @@
 #include "signal_processing_library.h"
 #include "gtest/gtest.h"
 
+static const int kVector16Size = 9;
+static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,
+  WEBRTC_SPL_WORD16_MAX, 0, WEBRTC_SPL_WORD16_MIN + 5, -3333, 345};
+
 class SplTest : public testing::Test {
  protected:
   virtual ~SplTest() {
@@ -253,7 +257,6 @@ TEST_F(SplTest, VectorOperationsTest) {
     int B[] = {4, 12, 133, 1100};
     WebRtc_Word16 a16[kVectorSize];
     WebRtc_Word16 b16[kVectorSize];
-    WebRtc_Word32 b32[kVectorSize];
     WebRtc_Word16 bTmp16[kVectorSize];
 
     for (int kk = 0; kk < kVectorSize; ++kk) {
@@ -275,13 +278,6 @@ TEST_F(SplTest, VectorOperationsTest) {
         EXPECT_EQ(((B[kk]*3+B[kk]*2+2)>>2)+((b16[kk]*3+7)>>2), bTmp16[kk]);
     }
 
-    WebRtcSpl_CrossCorrelation(b32, b16, bTmp16, kVectorSize, 2, 2, 0);
-    for (int kk = 0; kk < 2; ++kk) {
-        EXPECT_EQ(614236, b32[kk]);
-    }
-//    EXPECT_EQ(, WebRtcSpl_DotProduct(b16, bTmp16, 4));
-    EXPECT_EQ(306962, WebRtcSpl_DotProductWithScale(b16, b16, kVectorSize, 2));
-
     WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2);
     for (int kk = 0; kk < kVectorSize; ++kk) {
         EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
@@ -391,6 +387,47 @@ TEST_F(SplTest, RandTest) {
     }
 }
 
+TEST_F(SplTest, DotProductWithScaleTest) {
+  EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16,
+      vector16, kVector16Size, 2));
+}
+
+TEST_F(SplTest, CrossCorrelationTest) {
+  // Note the function arguments relation specificed by API.
+  const int kCrossCorrelationDimension = 3;
+  const int kShift = 2;
+  const int kStep = 1;
+  const int kSeqDimension = 6;
+
+  const int16_t vector16_b[kVector16Size] = {1, 4323, 1963,
+    WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, -876, 8483, 142};
+  const int32_t expected[3] = {-266947903, -15579555, -171282001};
+  int32_t vector32[kCrossCorrelationDimension] = {0};
+
+  WebRtcSpl_CrossCorrelation(vector32, vector16, vector16_b, kSeqDimension,
+                             kCrossCorrelationDimension, kShift, kStep);
+
+  for (int i = 0; i < kCrossCorrelationDimension; ++i) {
+    EXPECT_EQ(expected[i], vector32[i]);
+  }
+}
+
+TEST_F(SplTest, AutoCorrelationTest) {
+  int scale = 0;
+  int32_t vector32[kVector16Size];
+  const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063,
+    -85221647, -17104971, 61806945, 6644603, -669329, 43};
+
+  EXPECT_EQ(-1, WebRtcSpl_AutoCorrelation(vector16,
+      kVector16Size, kVector16Size + 1, vector32, &scale));
+  EXPECT_EQ(kVector16Size, WebRtcSpl_AutoCorrelation(vector16,
+      kVector16Size, kVector16Size - 1, vector32, &scale));
+  EXPECT_EQ(3, scale);
+  for (int i = 0; i < kVector16Size; ++i) {
+    EXPECT_EQ(expected[i], vector32[i]);
+  }
+}
+
 TEST_F(SplTest, SignalProcessingTest) {
     const int kVectorSize = 4;
     int A[] = {1, 2, 33, 100};
@@ -398,7 +435,6 @@ TEST_F(SplTest, SignalProcessingTest) {
     WebRtc_Word16 b16[kVectorSize];
 
     WebRtc_Word16 bTmp16[kVectorSize];
-    WebRtc_Word32 bTmp32[kVectorSize];
 
     int bScale = 0;
 
@@ -406,7 +442,6 @@ TEST_F(SplTest, SignalProcessingTest) {
         b16[kk] = A[kk];
     }
 
-    EXPECT_EQ(2, WebRtcSpl_AutoCorrelation(b16, kVectorSize, 1, bTmp32, &bScale));
     // TODO(bjornv): Activate the Reflection Coefficient tests when refactoring.
 //    WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16);
 ////    for (int kk = 0; kk < kVectorSize; ++kk) {