Consolidate audio conversion from Channel and TransmitMixer.

Replace the two versions with a single DownConvertToCodecFormat. As mentioned in comments, this could be further consolidated with RemixAndResample but we should write a full audio converter class in that case. Along the way: - Fix the bug present in Channel::Demultiplex with mono input and a stereo codec. - Remove the 32 kHz max from the OnDataAvailable path. This avoids a 48 -> 32 -> 48 conversion when VoE is passed 48 kHz audio; instead we get a straight pass-through to ACM. The 32 kHz conversion is still needed in the RecordedDataIsAvailable path until APM natively supports 48 kHz. - Merge resampler improvements from ACM1 to ACM2. This allows ACM to handle 44.1 kHz audio passed to VoE and was originally done here: https://webrtc-codereview.appspot.com/1590004 - Reuse the RemixAndResample unit tests for DownConvertToCodecFormat. - Remove unused functions from utility.cc. BUG=3155,3000,b/12867572 TESTED=voe_cmd_test using both the OnDataAvailable and RecordedDataIsAvailable paths, with a captured audio format of all combinations of {44.1,48} kHz and {1,2} channels, running through all codecs, and finally using both ACM1 and ACM2. R=henrika@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/11019005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5843 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-04-03 21:56:01 +00:00
parent cca888a5bf
commit 40ee3d07ed
18 changed files with 1663 additions and 1791 deletions
--- a/webrtc/modules/audio_coding/main/acm2/acm_resampler.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_resampler.cc
@@ -13,20 +13,15 @@
 #include <string.h>
 #include "webrtc/common_audio/resampler/include/resampler.h"
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/interface/logging.h"
 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 namespace webrtc {
 namespace acm2 {
-ACMResampler::ACMResampler()
+ACMResampler::ACMResampler() {
    : resampler_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) {
 }
 ACMResampler::~ACMResampler() {
  delete resampler_crit_sect_;
 }
 int ACMResampler::Resample10Msec(const int16_t* in_audio,
@@ -34,37 +29,28 @@ int ACMResampler::Resample10Msec(const int16_t* in_audio,
                                 int out_freq_hz,
                                 int num_audio_channels,
                                 int16_t* out_audio) {
-  CriticalSectionScoped cs(resampler_crit_sect_);
+  int in_length = in_freq_hz * num_audio_channels / 100;
-
+  int out_length = out_freq_hz * num_audio_channels / 100;
  if (in_freq_hz == out_freq_hz) {
-    size_t length = static_cast<size_t>(in_freq_hz * num_audio_channels / 100);
+    memcpy(out_audio, in_audio, in_length * sizeof(int16_t));
-    memcpy(out_audio, in_audio, length * sizeof(int16_t));
+    return in_length / num_audio_channels;
    return static_cast<int16_t>(in_freq_hz / 100);
  }
-  // |maxLen| is maximum number of samples for 10ms at 48kHz.
+  if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz,
-  int max_len = 480 * num_audio_channels;
+                                    num_audio_channels) != 0) {
-  int length_in = (in_freq_hz / 100) * num_audio_channels;
+    LOG_FERR3(LS_ERROR, InitializeIfNeeded, in_freq_hz, out_freq_hz,
-  int out_len;
+              num_audio_channels);
  ResamplerType type = (num_audio_channels == 1) ? kResamplerSynchronous :
      kResamplerSynchronousStereo;
  if (resampler_.ResetIfNeeded(in_freq_hz, out_freq_hz, type) < 0) {
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
                 "Error in reset of resampler");
    return -1;
  }
-  if (resampler_.Push(in_audio, length_in, out_audio, max_len, out_len) < 0) {
+  out_length = resampler_.Resample(in_audio, in_length, out_audio, out_length);
-    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
+  if (out_length == -1) {
-                 "Error in resampler: resampler.Push");
+    LOG_FERR4(LS_ERROR, Resample, in_audio, in_length, out_audio, out_length);
    return -1;
  }
-  return out_len / num_audio_channels;
+  return out_length / num_audio_channels;
 }
 }  // namespace acm2
 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/acm2/acm_resampler.h
+++ b/webrtc/modules/audio_coding/main/acm2/acm_resampler.h
@@ -11,13 +11,10 @@
 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
-#include "webrtc/common_audio/resampler/include/resampler.h"
+#include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/typedefs.h"
 namespace webrtc {
 class CriticalSectionWrapper;
 namespace acm2 {
 class ACMResampler {
@@ -32,13 +29,10 @@ class ACMResampler {
                     int16_t* out_audio);
 private:
-  // Use the Resampler class.
+  PushResampler resampler_;
  Resampler resampler_;
  CriticalSectionWrapper* resampler_crit_sect_;
 };
 }  // namespace acm2
 }  // namespace webrtc
 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
@@ -1205,11 +1205,7 @@ int AudioCodingModuleImpl::Add10MsData(
    return -1;
  }
-  // Allow for 8, 16, 32 and 48kHz input audio.
+  if (audio_frame.sample_rate_hz_ > 48000) {
  if ((audio_frame.sample_rate_hz_ != 8000)
      && (audio_frame.sample_rate_hz_ != 16000)
      && (audio_frame.sample_rate_hz_ != 32000)
      && (audio_frame.sample_rate_hz_ != 48000)) {
    assert(false);
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
                 "Cannot Add 10 ms audio, input frequency not valid");
@@ -1371,7 +1367,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
    if (preprocess_frame_.samples_per_channel_ < 0) {
      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "Cannot add 10 ms audio, resmapling failed");
+                   "Cannot add 10 ms audio, resampling failed");
      return -1;
    }
    preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
@@ -1273,11 +1273,7 @@ int32_t AudioCodingModuleImpl::Add10MsData(
    return -1;
  }
-  // Allow for 8, 16, 32 and 48kHz input audio.
+  if (audio_frame.sample_rate_hz_ > 48000) {
  if ((audio_frame.sample_rate_hz_ != 8000)
      && (audio_frame.sample_rate_hz_ != 16000)
      && (audio_frame.sample_rate_hz_ != 32000)
      && (audio_frame.sample_rate_hz_ != 48000)) {
    assert(false);
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
                 "Cannot Add 10 ms audio, input frequency not valid");
@@ -1444,7 +1440,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
    if (preprocess_frame_.samples_per_channel_ < 0) {
      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "Cannot add 10 ms audio, resmapling failed");
+                   "Cannot add 10 ms audio, resampling failed");
      return -1;
    }
    preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -61,6 +61,8 @@ struct ExperimentalAgc {
  bool enabled;
 };
 static const int kAudioProcMaxNativeSampleRateHz = 32000;
 // The Audio Processing Module (APM) provides a collection of voice processing
 // components designed for real-time communications software.
 //
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -4150,61 +4150,26 @@ Channel::Demultiplex(const AudioFrame& audioFrame)
    return 0;
 }
 // TODO(xians): This method borrows quite some code from
 // TransmitMixer::GenerateAudioFrame(), refactor these two methods and reduce
 // code duplication.
 void Channel::Demultiplex(const int16_t* audio_data,
                          int sample_rate,
                          int number_of_frames,
                          int number_of_channels) {
  // The highest sample rate that WebRTC supports for mono audio is 96kHz.
  static const int kMaxNumberOfFrames = 960;
  assert(number_of_frames <= kMaxNumberOfFrames);
  // Get the send codec information for doing resampling or downmixing later on.
  CodecInst codec;
  GetSendCodec(codec);
  assert(codec.channels == 1 || codec.channels == 2);
  int support_sample_rate = std::min(32000,
                                     std::min(sample_rate, codec.plfreq));
-  // Downmix the data to mono if needed.
+  if (!mono_recording_audio_.get()) {
-  const int16_t* audio_ptr = audio_data;
+    // Temporary space for DownConvertToCodecFormat.
-  if (number_of_channels == 2 && codec.channels == 1) {
+    mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]);
    if (!mono_recording_audio_.get())
      mono_recording_audio_.reset(new int16_t[kMaxNumberOfFrames]);
    AudioFrameOperations::StereoToMono(audio_data, number_of_frames,
                                       mono_recording_audio_.get());
    audio_ptr = mono_recording_audio_.get();
  }
-
+  DownConvertToCodecFormat(audio_data,
-  // Resample the data to the sample rate that the codec is using.
+                           number_of_frames,
-  if (input_resampler_.InitializeIfNeeded(sample_rate,
+                           number_of_channels,
-                                          support_sample_rate,
+                           sample_rate,
-                                          codec.channels)) {
+                           codec.channels,
-    WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
+                           codec.plfreq,
-                 "Channel::Demultiplex() unable to resample");
+                           mono_recording_audio_.get(),
-    return;
+                           &input_resampler_,
-  }
+                           &_audioFrame);
  int out_length = input_resampler_.Resample(audio_ptr,
                                             number_of_frames * codec.channels,
                                             _audioFrame.data_,
                                             AudioFrame::kMaxDataSizeSamples);
  if (out_length == -1) {
    WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
                 "Channel::Demultiplex() resampling failed");
    return;
  }
  _audioFrame.samples_per_channel_ = out_length / codec.channels;
  _audioFrame.timestamp_ = -1;
  _audioFrame.sample_rate_hz_ = support_sample_rate;
  _audioFrame.speech_type_ = AudioFrame::kNormalSpeech;
  _audioFrame.vad_activity_ = AudioFrame::kVadUnknown;
  _audioFrame.num_channels_ = codec.channels;
  _audioFrame.id_ = _channelId;
 }
 uint32_t
@@ -4694,11 +4659,11 @@ Channel::MixOrReplaceAudioWithFile(int mixingFrequency)
    {
        // Currently file stream is always mono.
        // TODO(xians): Change the code when FilePlayer supports real stereo.
-        Utility::MixWithSat(_audioFrame.data_,
+        MixWithSat(_audioFrame.data_,
-                            _audioFrame.num_channels_,
+                   _audioFrame.num_channels_,
-                            fileBuffer.get(),
+                   fileBuffer.get(),
-                            1,
+                   1,
-                            fileSamples);
+                   fileSamples);
    }
    else
    {
@@ -4754,11 +4719,11 @@ Channel::MixAudioWithFile(AudioFrame& audioFrame,
    {
        // Currently file stream is always mono.
        // TODO(xians): Change the code when FilePlayer supports real stereo.
-        Utility::MixWithSat(audioFrame.data_,
+        MixWithSat(audioFrame.data_,
-                            audioFrame.num_channels_,
+                   audioFrame.num_channels_,
-                            fileBuffer.get(),
+                   fileBuffer.get(),
-                            1,
+                   1,
-                            fileSamples);
+                   fileSamples);
    }
    else
    {
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -545,7 +545,7 @@ private:
    AudioLevel _outputAudioLevel;
    bool _externalTransport;
    AudioFrame _audioFrame;
-    scoped_array<int16_t> mono_recording_audio_;
+    scoped_ptr<int16_t[]> mono_recording_audio_;
    // Resampler is used when input data is stereo while codec is mono.
    PushResampler input_resampler_;
    uint8_t _audioLevel_dBov;
--- a/webrtc/voice_engine/output_mixer.cc
+++ b/webrtc/voice_engine/output_mixer.cc
@@ -16,11 +16,10 @@
 #include "webrtc/system_wrappers/interface/file_wrapper.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 #include "webrtc/voice_engine/include/voe_external_media.h"
 #include "webrtc/voice_engine/output_mixer_internal.h"
 #include "webrtc/voice_engine/statistics.h"
 #include "webrtc/voice_engine/utility.h"
 namespace webrtc {
 namespace voe {
 void
@@ -528,7 +527,8 @@ int OutputMixer::GetMixedAudio(int sample_rate_hz,
  frame->sample_rate_hz_ = sample_rate_hz;
  // TODO(andrew): Ideally the downmixing would occur much earlier, in
  // AudioCodingModule.
-  return RemixAndResample(_audioFrame, &resampler_, frame);
+  RemixAndResample(_audioFrame, &resampler_, frame);
  return 0;
 }
 int32_t
@@ -565,7 +565,9 @@ OutputMixer::DoOperationsOnCombinedSignal()
    }
    // --- Far-end Voice Quality Enhancement (AudioProcessing Module)
-
+    // TODO(ajm): Check with VoEBase if |need_audio_processing| is false.
    // If so, we don't need to call this method and can avoid the subsequent
    // resampling. See: https://code.google.com/p/webrtc/issues/detail?id=3147
    APMAnalyzeReverseStream();
    // --- External media processing
@@ -603,8 +605,7 @@ void OutputMixer::APMAnalyzeReverseStream() {
  AudioFrame frame;
  frame.num_channels_ = 1;
  frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
-  if (RemixAndResample(_audioFrame, &audioproc_resampler_, &frame) == -1)
+  RemixAndResample(_audioFrame, &audioproc_resampler_, &frame);
    return;
  if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) {
    WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
@@ -657,5 +658,4 @@ OutputMixer::InsertInbandDtmfTone()
 }
 }  // namespace voe
 }  // namespace webrtc
--- a/webrtc/voice_engine/output_mixer_internal.cc
+++ b/webrtc/voice_engine/output_mixer_internal.cc
@@ -1,70 +0,0 @@
 /*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "webrtc/voice_engine/output_mixer_internal.h"
 #include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/utility/interface/audio_frame_operations.h"
 #include "webrtc/system_wrappers/interface/logging.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 namespace webrtc {
 namespace voe {
 int RemixAndResample(const AudioFrame& src_frame,
                     PushResampler* resampler,
                     AudioFrame* dst_frame) {
  const int16_t* audio_ptr = src_frame.data_;
  int audio_ptr_num_channels = src_frame.num_channels_;
  int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
  // Downmix before resampling.
  if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
    AudioFrameOperations::StereoToMono(src_frame.data_,
                                       src_frame.samples_per_channel_,
                                       mono_audio);
    audio_ptr = mono_audio;
    audio_ptr_num_channels = 1;
  }
  if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
                                    dst_frame->sample_rate_hz_,
                                    audio_ptr_num_channels) == -1) {
    dst_frame->CopyFrom(src_frame);
    LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
              dst_frame->sample_rate_hz_, audio_ptr_num_channels);
    return -1;
  }
  const int src_length = src_frame.samples_per_channel_ *
                         audio_ptr_num_channels;
  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
                                       AudioFrame::kMaxDataSizeSamples);
  if (out_length == -1) {
    dst_frame->CopyFrom(src_frame);
    LOG_FERR3(LS_ERROR, Resample, src_length, dst_frame->data_,
              AudioFrame::kMaxDataSizeSamples);
    return -1;
  }
  dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
  // Upmix after resampling.
  if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
    // The audio in dst_frame really is mono at this point; MonoToStereo will
    // set this back to stereo.
    dst_frame->num_channels_ = 1;
    AudioFrameOperations::MonoToStereo(dst_frame);
  }
  return 0;
 }
 }  // namespace voe
 }  // namespace webrtc
--- a/webrtc/voice_engine/output_mixer_internal.h
+++ b/webrtc/voice_engine/output_mixer_internal.h
@@ -1,33 +0,0 @@
 /*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
 #define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
 namespace webrtc {
 class AudioFrame;
 class PushResampler;
 namespace voe {
 // Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
 // Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
 // the desired values. Updates |samples_per_channel_| accordingly.
 //
 // On failure, returns -1 and copies |src_frame| to |dst_frame|.
 int RemixAndResample(const AudioFrame& src_frame,
                     PushResampler* resampler,
                     AudioFrame* dst_frame);
 }  // namespace voe
 }  // namespace webrtc
 #endif  // VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
--- a/webrtc/voice_engine/output_mixer_unittest.cc
+++ b/webrtc/voice_engine/output_mixer_unittest.cc
@@ -11,13 +11,20 @@
 #include <math.h>
 #include "testing/gtest/include/gtest/gtest.h"
-#include "webrtc/voice_engine/output_mixer.h"
+#include "webrtc/common_audio/resampler/include/push_resampler.h"
-#include "webrtc/voice_engine/output_mixer_internal.h"
+#include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/voice_engine/utility.h"
 #include "webrtc/voice_engine/voice_engine_defines.h"
 namespace webrtc {
 namespace voe {
 namespace {
 enum FunctionToTest {
  TestRemixAndResample,
  TestDownConvertToCodecFormat
 };
 class OutputMixerTest : public ::testing::Test {
 protected:
  OutputMixerTest() {
@@ -29,7 +36,8 @@ class OutputMixerTest : public ::testing::Test {
  }
  void RunResampleTest(int src_channels, int src_sample_rate_hz,
-                       int dst_channels, int dst_sample_rate_hz);
+                       int dst_channels, int dst_sample_rate_hz,
                       FunctionToTest function);
  PushResampler resampler_;
  AudioFrame src_frame_;
@@ -121,7 +129,8 @@ void VerifyFramesAreEqual(const AudioFrame& ref_frame,
 void OutputMixerTest::RunResampleTest(int src_channels,
                                      int src_sample_rate_hz,
                                      int dst_channels,
-                                      int dst_sample_rate_hz) {
+                                      int dst_sample_rate_hz,
                                      FunctionToTest function) {
  PushResampler resampler;  // Create a new one with every test.
  const int16_t kSrcLeft = 30;  // Shouldn't overflow for any used sample rate.
  const int16_t kSrcRight = 15;
@@ -157,7 +166,21 @@ void OutputMixerTest::RunResampleTest(int src_channels,
      / src_sample_rate_hz * kInputKernelDelaySamples * dst_channels * 2;
  printf("(%d, %d Hz) -> (%d, %d Hz) ",  // SNR reported on the same line later.
      src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_));
+  if (function == TestRemixAndResample) {
    RemixAndResample(src_frame_, &resampler, &dst_frame_);
  } else {
    int16_t mono_buffer[kMaxMonoDataSizeSamples];
    DownConvertToCodecFormat(src_frame_.data_,
                             src_frame_.samples_per_channel_,
                             src_frame_.num_channels_,
                             src_frame_.sample_rate_hz_,
                             dst_frame_.num_channels_,
                             dst_frame_.sample_rate_hz_,
                             mono_buffer,
                             &resampler,
                             &dst_frame_);
  }
  if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) {
    // The sinc resampler gives poor SNR at this extreme conversion, but we
    // expect to see this rarely in practice.
@@ -171,13 +194,13 @@ TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
  // Stereo -> stereo.
  SetStereoFrame(&src_frame_, 10, 10);
  SetStereoFrame(&dst_frame_, 0, 0);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(src_frame_, dst_frame_);
  // Mono -> mono.
  SetMonoFrame(&src_frame_, 20);
  SetMonoFrame(&dst_frame_, 0);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(src_frame_, dst_frame_);
 }
@@ -186,20 +209,18 @@ TEST_F(OutputMixerTest, RemixAndResampleMixingOnlySucceeds) {
  SetStereoFrame(&dst_frame_, 0, 0);
  SetMonoFrame(&src_frame_, 10);
  SetStereoFrame(&golden_frame_, 10, 10);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(dst_frame_, golden_frame_);
  // Mono -> stereo.
  SetMonoFrame(&dst_frame_, 0);
  SetStereoFrame(&src_frame_, 10, 20);
  SetMonoFrame(&golden_frame_, 15);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(golden_frame_, dst_frame_);
 }
 TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
  // TODO(ajm): convert this to the parameterized TEST_P style used in
  // sinc_resampler_unittest.cc. We can then easily add tighter SNR thresholds.
  const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
  const int kChannels[] = {1, 2};
@@ -209,7 +230,28 @@ TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
      for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
        for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
          RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
-                          kChannels[dst_channel], kSampleRates[dst_rate]);
+                          kChannels[dst_channel], kSampleRates[dst_rate],
                          TestRemixAndResample);
        }
      }
    }
  }
 }
 TEST_F(OutputMixerTest, ConvertToCodecFormatSucceeds) {
  const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
  const int kChannels[] = {1, 2};
  const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
  for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
    for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
      for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
        for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
          if (dst_rate <= src_rate && dst_channel <= src_channel) {
            RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
                            kChannels[src_channel], kSampleRates[dst_rate],
                            TestDownConvertToCodecFormat);
          }
        }
      }
    }
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
--- a/webrtc/voice_engine/transmit_mixer.h
+++ b/webrtc/voice_engine/transmit_mixer.h
@@ -17,6 +17,7 @@
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/utility/interface/file_player.h"
 #include "webrtc/modules/utility/interface/file_recorder.h"
 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
 #include "webrtc/voice_engine/include/voe_base.h"
 #include "webrtc/voice_engine/level_indicator.h"
 #include "webrtc/voice_engine/monitor_module.h"
@@ -36,9 +37,7 @@ class MixedAudio;
 class Statistics;
 class TransmitMixer : public MonitorObserver,
-                      public FileCallback
+                      public FileCallback {
 {
 public:
    static int32_t Create(TransmitMixer*& mixer, uint32_t instanceId);
@@ -175,10 +174,10 @@ private:
    // sending codecs.
    void GetSendCodecInfo(int* max_sample_rate, int* max_channels);
-    int GenerateAudioFrame(const int16_t audioSamples[],
+    void GenerateAudioFrame(const int16_t audioSamples[],
-                           int nSamples,
+                            int nSamples,
-                           int nChannels,
+                            int nChannels,
-                           int samplesPerSec);
+                            int samplesPerSec);
    int32_t RecordAudioToFile(uint32_t mixingFrequency);
    int32_t MixOrReplaceAudioWithFile(
@@ -232,6 +231,7 @@ private:
    int32_t _remainingMuteMicTimeMs;
    bool stereo_codec_;
    bool swap_stereo_channels_;
    scoped_ptr<int16_t[]> mono_buffer_;
 };
 }  // namespace voe
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -10,116 +10,150 @@
 #include "webrtc/voice_engine/utility.h"
 #include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/modules/interface/module.h"
+#include "webrtc/common_types.h"
-#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/utility/interface/audio_frame_operations.h"
 #include "webrtc/system_wrappers/interface/logging.h"
 #include "webrtc/voice_engine/voice_engine_defines.h"
-namespace webrtc
+namespace webrtc {
-{
+namespace voe {
-namespace voe
+// TODO(ajm): There is significant overlap between RemixAndResample and
-{
+// ConvertToCodecFormat, but if we're to consolidate we should probably make a
-enum{kMaxTargetLen = 2*32*10}; // stereo 32KHz 10ms
+// real converter class.
 void RemixAndResample(const AudioFrame& src_frame,
                      PushResampler* resampler,
                      AudioFrame* dst_frame) {
  const int16_t* audio_ptr = src_frame.data_;
  int audio_ptr_num_channels = src_frame.num_channels_;
  int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
-void Utility::MixWithSat(int16_t target[],
+  // Downmix before resampling.
-                         int target_channel,
+  if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
-                         const int16_t source[],
+    AudioFrameOperations::StereoToMono(src_frame.data_,
-                         int source_channel,
+                                       src_frame.samples_per_channel_,
-                         int source_len)
+                                       mono_audio);
-{
+    audio_ptr = mono_audio;
-    assert((target_channel == 1) || (target_channel == 2));
+    audio_ptr_num_channels = 1;
-    assert((source_channel == 1) || (source_channel == 2));
+  }
    assert(source_len <= kMaxTargetLen);
-    if ((target_channel == 2) && (source_channel == 1))
+  if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
-    {
+                                    dst_frame->sample_rate_hz_,
-        // Convert source from mono to stereo.
+                                    audio_ptr_num_channels) == -1) {
-        int32_t left = 0;
+    dst_frame->CopyFrom(src_frame);
-        int32_t right = 0;
+    LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
-        for (int i = 0; i < source_len; ++i) {
+              dst_frame->sample_rate_hz_, audio_ptr_num_channels);
-            left  = source[i] + target[i*2];
+    assert(false);
-            right = source[i] + target[i*2 + 1];
+  }
-            target[i*2]     = WebRtcSpl_SatW32ToW16(left);
+
-            target[i*2 + 1] = WebRtcSpl_SatW32ToW16(right);
+  const int src_length = src_frame.samples_per_channel_ *
-        }
+                         audio_ptr_num_channels;
-    }
+  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
-    else if ((target_channel == 1) && (source_channel == 2))
+                                       AudioFrame::kMaxDataSizeSamples);
-    {
+  if (out_length == -1) {
-        // Convert source from stereo to mono.
+    dst_frame->CopyFrom(src_frame);
-        int32_t temp = 0;
+    LOG_FERR3(LS_ERROR, Resample, audio_ptr, src_length, dst_frame->data_);
-        for (int i = 0; i < source_len/2; ++i) {
+    assert(false);
-          temp = ((source[i*2] + source[i*2 + 1])>>1) + target[i];
+  }
-          target[i] = WebRtcSpl_SatW32ToW16(temp);
+  dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
-        }
+
-    }
+  // Upmix after resampling.
-    else
+  if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
-    {
+    // The audio in dst_frame really is mono at this point; MonoToStereo will
-        int32_t temp = 0;
+    // set this back to stereo.
-        for (int i = 0; i < source_len; ++i) {
+    dst_frame->num_channels_ = 1;
-          temp = source[i] + target[i];
+    AudioFrameOperations::MonoToStereo(dst_frame);
-          target[i] = WebRtcSpl_SatW32ToW16(temp);
+  }
        }
    }
 }
-void Utility::MixSubtractWithSat(int16_t target[],
+void DownConvertToCodecFormat(const int16_t* src_data,
-                                 const int16_t source[],
+                              int samples_per_channel,
-                                 uint16_t len)
+                              int num_channels,
-{
+                              int sample_rate_hz,
-    int32_t temp(0);
+                              int codec_num_channels,
-    for (int i = 0; i < len; i++)
+                              int codec_rate_hz,
-    {
+                              int16_t* mono_buffer,
-        temp = target[i] - source[i];
+                              PushResampler* resampler,
-        if (temp > 32767)
+                              AudioFrame* dst_af) {
-            target[i] = 32767;
+  assert(samples_per_channel <= kMaxMonoDataSizeSamples);
-        else if (temp < -32768)
+  assert(num_channels == 1 || num_channels == 2);
-            target[i] = -32768;
+  assert(codec_num_channels == 1 || codec_num_channels == 2);
-        else
+
-            target[i] = (int16_t) temp;
+  // Never upsample the capture signal here. This should be done at the
-    }
+  // end of the send chain.
  int destination_rate = std::min(codec_rate_hz, sample_rate_hz);
  // If no stereo codecs are in use, we downmix a stereo stream from the
  // device early in the chain, before resampling.
  if (num_channels == 2 && codec_num_channels == 1) {
    AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
                                       mono_buffer);
    src_data = mono_buffer;
    num_channels = 1;
  }
  if (resampler->InitializeIfNeeded(
          sample_rate_hz, destination_rate, num_channels) != 0) {
    LOG_FERR3(LS_ERROR,
              InitializeIfNeeded,
              sample_rate_hz,
              destination_rate,
              num_channels);
    assert(false);
  }
  const int in_length = samples_per_channel * num_channels;
  int out_length = resampler->Resample(
      src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples);
  if (out_length == -1) {
    LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_);
    assert(false);
  }
  dst_af->samples_per_channel_ = out_length / num_channels;
  dst_af->sample_rate_hz_ = destination_rate;
  dst_af->num_channels_ = num_channels;
  dst_af->timestamp_ = -1;
  dst_af->speech_type_ = AudioFrame::kNormalSpeech;
  dst_af->vad_activity_ = AudioFrame::kVadUnknown;
 }
-void Utility::MixAndScaleWithSat(int16_t target[],
+void MixWithSat(int16_t target[],
-                                 const int16_t source[], float scale,
+                int target_channel,
-                                 uint16_t len)
+                const int16_t source[],
-{
+                int source_channel,
-    int32_t temp(0);
+                int source_len) {
-    for (int i = 0; i < len; i++)
+  assert(target_channel == 1 || target_channel == 2);
-    {
+  assert(source_channel == 1 || source_channel == 2);
        temp = (int32_t) (target[i] + scale * source[i]);
        if (temp > 32767)
            target[i] = 32767;
        else if (temp < -32768)
            target[i] = -32768;
        else
            target[i] = (int16_t) temp;
    }
 }
-void Utility::Scale(int16_t vector[], float scale, uint16_t len)
+  if (target_channel == 2 && source_channel == 1) {
-{
+    // Convert source from mono to stereo.
-    for (int i = 0; i < len; i++)
+    int32_t left = 0;
-    {
+    int32_t right = 0;
-        vector[i] = (int16_t) (scale * vector[i]);
+    for (int i = 0; i < source_len; ++i) {
      left = source[i] + target[i * 2];
      right = source[i] + target[i * 2 + 1];
      target[i * 2] = WebRtcSpl_SatW32ToW16(left);
      target[i * 2 + 1] = WebRtcSpl_SatW32ToW16(right);
    }
-}
+  } else if (target_channel == 1 && source_channel == 2) {
-
+    // Convert source from stereo to mono.
-void Utility::ScaleWithSat(int16_t vector[], float scale,
+    int32_t temp = 0;
-                           uint16_t len)
+    for (int i = 0; i < source_len / 2; ++i) {
-{
+      temp = ((source[i * 2] + source[i * 2 + 1]) >> 1) + target[i];
-    int32_t temp(0);
+      target[i] = WebRtcSpl_SatW32ToW16(temp);
    for (int i = 0; i < len; i++)
    {
        temp = (int32_t) (scale * vector[i]);
        if (temp > 32767)
            vector[i] = 32767;
        else if (temp < -32768)
            vector[i] = -32768;
        else
            vector[i] = (int16_t) temp;
    }
  } else {
    int32_t temp = 0;
    for (int i = 0; i < source_len; ++i) {
      temp = source[i] + target[i];
      target[i] = WebRtcSpl_SatW32ToW16(temp);
    }
  }
 }
 }  // namespace voe
 }  // namespace webrtc
--- a/webrtc/voice_engine/utility.h
+++ b/webrtc/voice_engine/utility.h
@@ -12,47 +12,48 @@
 *  Contains functions often used by different parts of VoiceEngine.
 */
-#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H
+#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H_
-#define WEBRTC_VOICE_ENGINE_UTILITY_H
+#define WEBRTC_VOICE_ENGINE_UTILITY_H_
 #include "webrtc/typedefs.h"
 #include "webrtc/voice_engine/voice_engine_defines.h"
-namespace webrtc
+namespace webrtc {
 {
-class Module;
+class AudioFrame;
 class PushResampler;
-namespace voe
+namespace voe {
 {
-class Utility
+// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
-{
+// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
-public:
+// the desired values. Updates |samples_per_channel_| accordingly.
-    static void MixWithSat(int16_t target[],
+//
-                           int target_channel,
+// On failure, returns -1 and copies |src_frame| to |dst_frame|.
-                           const int16_t source[],
+void RemixAndResample(const AudioFrame& src_frame,
-                           int source_channel,
+                      PushResampler* resampler,
-                           int source_len);
+                      AudioFrame* dst_frame);
-    static void MixSubtractWithSat(int16_t target[],
+// Downmix and downsample the audio in |src_data| to |dst_af| as necessary,
-                                   const int16_t source[],
+// specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is
-                                   uint16_t len);
+// temporary space and must be of sufficient size to hold the downmixed source
 // audio (recommend using a size of kMaxMonoDataSizeSamples).
 void DownConvertToCodecFormat(const int16_t* src_data,
                              int samples_per_channel,
                              int num_channels,
                              int sample_rate_hz,
                              int codec_num_channels,
                              int codec_rate_hz,
                              int16_t* mono_buffer,
                              PushResampler* resampler,
                              AudioFrame* dst_af);
-    static void MixAndScaleWithSat(int16_t target[],
+void MixWithSat(int16_t target[],
-                                   const int16_t source[],
+                int target_channel,
-                                   float scale,
+                const int16_t source[],
-                                   uint16_t len);
+                int source_channel,
-
+                int source_len);
    static void Scale(int16_t vector[], float scale, uint16_t len);
    static void ScaleWithSat(int16_t vector[],
                             float scale,
                             uint16_t len);
 };
 }  // namespace voe
 }  // namespace webrtc
-#endif  // WEBRTC_VOICE_ENGINE_UTILITY_H
+#endif  // WEBRTC_VOICE_ENGINE_UTILITY_H_
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -25,13 +25,6 @@
 #include "webrtc/voice_engine/utility.h"
 #include "webrtc/voice_engine/voice_engine_impl.h"
 #if (defined(_WIN32) && defined(_DLL) && (_MSC_VER == 1400))
 // Fix for VS 2005 MD/MDd link problem
 #include <stdio.h>
 extern "C"
    { FILE _iob[3] = {   __iob_func()[0], __iob_func()[1], __iob_func()[2]}; }
 #endif
 namespace webrtc
 {
@@ -223,6 +216,9 @@ int VoEBaseImpl::OnDataAvailable(const int voe_channels[],
  // No need to go through the APM, demultiplex the data to each VoE channel,
  // encode and send to the network.
  for (int i = 0; i < number_of_voe_channels; ++i) {
    // TODO(ajm): In the case where multiple channels are using the same codec
    // rate, this path needlessly does extra conversions. We should convert once
    // and share between channels.
    OnData(voe_channels[i], audio_data, 16, sample_rate,
           number_of_channels, number_of_frames);
  }
--- a/webrtc/voice_engine/voice_engine.gyp
+++ b/webrtc/voice_engine/voice_engine.gyp
@@ -57,8 +57,6 @@
        'monitor_module.h',
        'output_mixer.cc',
        'output_mixer.h',
        'output_mixer_internal.cc',
        'output_mixer_internal.h',
        'shared_data.cc',
        'shared_data.h',
        'statistics.cc',
--- a/webrtc/voice_engine/voice_engine_defines.h
+++ b/webrtc/voice_engine/voice_engine_defines.h
@@ -27,6 +27,10 @@
 namespace webrtc {
 // Internal buffer size required for mono audio, based on the highest sample
 // rate voice engine supports (10 ms of audio at 192 kHz).
 static const int kMaxMonoDataSizeSamples = 1920;
 // VolumeControl
 enum { kMinVolumeLevel = 0 };
 enum { kMaxVolumeLevel = 255 };