From ecbe0aa543575664fd2fa05e6d9ea5aa81c85720 Mon Sep 17 00:00:00 2001
From: "minyue@webrtc.org"
 <minyue@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Mon, 12 Aug 2013 06:48:09 +0000
Subject: [PATCH] Added Opus stereo support

TESTED=git try
BUG=webrtc:1360
R=tina.legrand@webrtc.org, turaj@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/1868004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4521 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../codecs/opus/interface/opus_interface.h    |  4 +-
 .../audio_coding/neteq4/audio_decoder_impl.cc | 14 ++-
 .../neteq4/audio_decoder_unittest.cc          | 88 ++++++++++++++++---
 .../neteq4/interface/audio_decoder.h          |  6 +-
 4 files changed, 89 insertions(+), 23 deletions(-)
diff --git a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h
index 15d42c223..1370aff06 100644
--- a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h
+++ b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h
@@ -33,7 +33,7 @@ int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst);
  * Input:
  *      - inst                  : Encoder context
  *      - audio_in              : Input speech data buffer
- *      - samples               : Samples in audio_in
+ *      - samples               : Samples per channel in audio_in
  *      - length_encoded_buffer : Output buffer size
  *
  * Output:
@@ -101,7 +101,7 @@ int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst);
  *                             always return 1 since we're not using Opus's
  *                             built-in DTX/CNG scheme)
  *
- * Return value              : >0 - Samples in decoded vector
+ * Return value              : >0 - Samples per channel in decoded vector
  *                             -1 - Error
  */
 int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc
index b80c829c3..6d25e42a3 100644
--- a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc
+++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc
@@ -393,19 +393,17 @@ AudioDecoderOpus::~AudioDecoderOpus() {
 int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len,
                              int16_t* decoded, SpeechType* speech_type) {
   int16_t temp_type = 1;  // Default is speech.
-  assert(channels_ == 1);
-  // TODO(hlundin): Allow 2 channels when WebRtcOpus_Decode provides both
-  // channels interleaved.
-  int16_t ret = WebRtcOpus_Decode(
-      static_cast<OpusDecInst*>(state_),
-      const_cast<int16_t*>(reinterpret_cast<const int16_t*>(encoded)),
-      static_cast<int16_t>(encoded_len), decoded, &temp_type);
+  int16_t ret = WebRtcOpus_DecodeNew(static_cast<OpusDecInst*>(state_), encoded,
+                                     static_cast<int16_t>(encoded_len), decoded,
+                                     &temp_type);
+  if (ret > 0)
+    ret *= channels_; // Return total number of samples.
   *speech_type = ConvertSpeechType(temp_type);
   return ret;
 }
 
 int AudioDecoderOpus::Init() {
-  return WebRtcOpus_DecoderInit(static_cast<OpusDecInst*>(state_));
+  return WebRtcOpus_DecoderInitNew(static_cast<OpusDecInst*>(state_));
 }
 
 int AudioDecoderOpus::PacketDuration(const uint8_t* encoded,
diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc
index 0f0f64d70..e4885dfdd 100644
--- a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc
@@ -92,10 +92,14 @@ class AudioDecoderTest : public ::testing::Test {
 
   // Encodes and decodes audio. The absolute difference between the input and
   // output is compared vs |tolerance|, and the mean-squared error is compared
-  // with |mse|. The encoded stream should contain |expected_bytes|.
+  // with |mse|. The encoded stream should contain |expected_bytes|. For stereo
+  // audio, the absolute difference between the two channels is compared vs
+  // |channel_diff_tolerance|.
   void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse,
-                        int delay = 0) {
+                        int delay = 0, int channel_diff_tolerance = 0) {
     ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
+    ASSERT_GE(channel_diff_tolerance, 0) <<
+        "Test must define a channel_diff_tolerance >= 0";
     size_t processed_samples = 0u;
     encoded_bytes_ = 0u;
     InitEncoder();
@@ -116,22 +120,19 @@ class AudioDecoderTest : public ::testing::Test {
 #if !(defined(_WIN32) && defined(WEBRTC_ARCH_64_BITS))
     EXPECT_EQ(expected_bytes, encoded_bytes_);
     CompareInputOutput(processed_samples, tolerance, delay);
+    if (channels_ == 2)
+      CompareTwoChannels(processed_samples, channel_diff_tolerance);
     EXPECT_LE(MseInputOutput(processed_samples, delay), mse);
 #endif
   }
 
   // The absolute difference between the input and output (the first channel) is
   // compared vs |tolerance|. The parameter |delay| is used to correct for codec
-  // delays. If |channels_| is 2, the method verifies that the two channels are
-  // identical.
+  // delays.
   virtual void CompareInputOutput(size_t num_samples, int tolerance,
                                   int delay) const {
     assert(num_samples <= data_length_);
     for (unsigned int n = 0; n < num_samples - delay; ++n) {
-      if (channels_ == 2) {
-        ASSERT_EQ(decoded_[channels_ * n], decoded_[channels_ * n + 1]) <<
-            "Stereo samples differ.";
-      }
       ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) <<
           "Exit test on first diff; n = " << n;
       DataLog::InsertCell("CodecTest", "input", input_[n]);
@@ -140,6 +141,15 @@ class AudioDecoderTest : public ::testing::Test {
     }
   }
 
+  // The absolute difference between the two channels in a stereo is compared vs
+  // |tolerance|.
+  virtual void CompareTwoChannels(size_t num_samples, int tolerance) const {
+    assert(num_samples <= data_length_);
+    for (unsigned int n = 0; n < num_samples; ++n)
+        ASSERT_NEAR(decoded_[channels_ * n], decoded_[channels_ * n + 1],
+                    tolerance) << "Stereo samples differ.";
+  }
+
   // Calculates mean-squared error between input and output (the first channel).
   // The parameter |delay| is used to correct for codec delays.
   virtual double MseInputOutput(size_t num_samples, int delay) const {
@@ -162,13 +172,14 @@ class AudioDecoderTest : public ::testing::Test {
     int16_t* output2 = decoded_ + frame_size_;
     InitEncoder();
     size_t enc_len = EncodeFrame(input_, frame_size_, encoded);
+    size_t dec_len;
     // Copy payload since iSAC fix destroys it during decode.
     // Issue: http://code.google.com/p/webrtc/issues/detail?id=845.
     // TODO(hlundin): Remove if the iSAC bug gets fixed.
     memcpy(encoded_copy, encoded, enc_len);
     AudioDecoder::SpeechType speech_type1, speech_type2;
     EXPECT_EQ(0, decoder_->Init());
-    size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1);
+    dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1);
     EXPECT_EQ(frame_size_ * channels_, dec_len);
     // Re-init decoder and decode again.
     EXPECT_EQ(0, decoder_->Init());
@@ -551,6 +562,51 @@ class AudioDecoderOpusTest : public AudioDecoderTest {
   OpusEncInst* encoder_;
 };
 
+class AudioDecoderOpusStereoTest : public AudioDecoderTest {
+ protected:
+  AudioDecoderOpusStereoTest() : AudioDecoderTest() {
+    channels_ = 2;
+    frame_size_ = 320;
+    data_length_ = 10 * frame_size_;
+    decoder_ = new AudioDecoderOpus(kDecoderOpus_2ch);
+    assert(decoder_);
+    WebRtcOpus_EncoderCreate(&encoder_, 2);
+  }
+
+  ~AudioDecoderOpusStereoTest() {
+    WebRtcOpus_EncoderFree(encoder_);
+  }
+
+  virtual void InitEncoder() {}
+
+  virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
+                          uint8_t* output) {
+    // Create stereo by duplicating each sample in |input|.
+    const int input_stereo_samples = input_len_samples * 2;
+    int16_t* input_stereo = new int16_t[input_stereo_samples];
+    for (size_t i = 0; i < input_len_samples; i++)
+      input_stereo[i * 2] = input_stereo[i * 2 + 1] = input[i];
+    // Upsample from 32 to 48 kHz.
+    Resampler rs;
+    rs.Reset(32000, 48000, kResamplerSynchronousStereo);
+    const int max_resamp_len_samples = input_stereo_samples * 3 / 2;
+    int16_t* resamp_input = new int16_t[max_resamp_len_samples];
+    int resamp_len_samples;
+    EXPECT_EQ(0, rs.Push(input_stereo, input_stereo_samples, resamp_input,
+                         max_resamp_len_samples, resamp_len_samples));
+    EXPECT_EQ(max_resamp_len_samples, resamp_len_samples);
+    int enc_len_bytes =
+        WebRtcOpus_Encode(encoder_, resamp_input, resamp_len_samples / 2,
+                          data_length_, output);
+    EXPECT_GT(enc_len_bytes, 0);
+    delete [] resamp_input;
+    delete [] input_stereo;
+    return enc_len_bytes;
+  }
+
+  OpusEncInst* encoder_;
+};
+
 TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
   int tolerance = 251;
   double mse = 1734.0;
@@ -651,10 +707,11 @@ TEST_F(AudioDecoderG722StereoTest, CreateAndDestroy) {
 
 TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
   int tolerance = 6176;
+  int channel_diff_tolerance = 0;
   double mse = 238630.0;
   int delay = 22;  // Delay from input to output.
   EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
-  EncodeDecodeTest(data_length_, tolerance, mse, delay);
+  EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance);
   ReInitTest();
   EXPECT_FALSE(decoder_->HasDecodePlc());
 }
@@ -669,6 +726,17 @@ TEST_F(AudioDecoderOpusTest, EncodeDecode) {
   EXPECT_FALSE(decoder_->HasDecodePlc());
 }
 
+TEST_F(AudioDecoderOpusStereoTest, EncodeDecode) {
+  int tolerance = 6176;
+  int channel_diff_tolerance = 0;
+  double mse = 238630.0;
+  int delay = 22;  // Delay from input to output.
+  EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus_2ch));
+  EncodeDecodeTest(1383, tolerance, mse, delay, channel_diff_tolerance);
+  ReInitTest();
+  EXPECT_FALSE(decoder_->HasDecodePlc());
+}
+
 TEST(AudioDecoder, CodecSampleRateHz) {
   EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu));
   EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa));
diff --git a/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h b/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h
index 6e6b8e85e..f89f887f9 100644
--- a/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h
+++ b/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h
@@ -72,9 +72,9 @@ class AudioDecoder {
   virtual ~AudioDecoder() {}
 
   // Decodes |encode_len| bytes from |encoded| and writes the result in
-  // |decoded|. The number of samples produced is in the return value. If the
-  // decoder produced comfort noise, |speech_type| is set to kComfortNoise,
-  // otherwise it is kSpeech.
+  // |decoded|. The number of samples from all channels produced is in
+  // the return value. If the decoder produced comfort noise, |speech_type|
+  // is set to kComfortNoise, otherwise it is kSpeech.
   virtual int Decode(const uint8_t* encoded, size_t encoded_len,
                      int16_t* decoded, SpeechType* speech_type) = 0;