Consolidate audio conversion from Channel and TransmitMixer.

Replace the two versions with a single DownConvertToCodecFormat. As mentioned in comments, this could be further consolidated with RemixAndResample but we should write a full audio converter class in that case. Along the way: - Fix the bug present in Channel::Demultiplex with mono input and a stereo codec. - Remove the 32 kHz max from the OnDataAvailable path. This avoids a 48 -> 32 -> 48 conversion when VoE is passed 48 kHz audio; instead we get a straight pass-through to ACM. The 32 kHz conversion is still needed in the RecordedDataIsAvailable path until APM natively supports 48 kHz. - Merge resampler improvements from ACM1 to ACM2. This allows ACM to handle 44.1 kHz audio passed to VoE and was originally done here: https://webrtc-codereview.appspot.com/1590004 - Reuse the RemixAndResample unit tests for DownConvertToCodecFormat. - Remove unused functions from utility.cc. BUG=3155,3000,b/12867572 TESTED=voe_cmd_test using both the OnDataAvailable and RecordedDataIsAvailable paths, with a captured audio format of all combinations of {44.1,48} kHz and {1,2} channels, running through all codecs, and finally using both ACM1 and ACM2. R=henrika@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/11019005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5843 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-04-03 21:56:01 +00:00
parent cca888a5bf
commit 40ee3d07ed
18 changed files with 1663 additions and 1791 deletions
--- a/webrtc/modules/audio_coding/main/acm2/acm_resampler.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_resampler.cc
@@ -13,20 +13,15 @@
 #include <string.h>

 #include "webrtc/common_audio/resampler/include/resampler.h"
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
-#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/system_wrappers/interface/logging.h"

 namespace webrtc {
-
 namespace acm2 {

-ACMResampler::ACMResampler()
-    : resampler_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) {
+ACMResampler::ACMResampler() {
 }

 ACMResampler::~ACMResampler() {
-  delete resampler_crit_sect_;
 }

 int ACMResampler::Resample10Msec(const int16_t* in_audio,
@@ -34,37 +29,28 @@ int ACMResampler::Resample10Msec(const int16_t* in_audio,
                                 int out_freq_hz,
                                 int num_audio_channels,
                                 int16_t* out_audio) {
-  CriticalSectionScoped cs(resampler_crit_sect_);
-
+  int in_length = in_freq_hz * num_audio_channels / 100;
+  int out_length = out_freq_hz * num_audio_channels / 100;
  if (in_freq_hz == out_freq_hz) {
-    size_t length = static_cast<size_t>(in_freq_hz * num_audio_channels / 100);
-    memcpy(out_audio, in_audio, length * sizeof(int16_t));
-    return static_cast<int16_t>(in_freq_hz / 100);
+    memcpy(out_audio, in_audio, in_length * sizeof(int16_t));
+    return in_length / num_audio_channels;
  }

-  // |maxLen| is maximum number of samples for 10ms at 48kHz.
-  int max_len = 480 * num_audio_channels;
-  int length_in = (in_freq_hz / 100) * num_audio_channels;
-  int out_len;
-
-  ResamplerType type = (num_audio_channels == 1) ? kResamplerSynchronous :
-      kResamplerSynchronousStereo;
-
-  if (resampler_.ResetIfNeeded(in_freq_hz, out_freq_hz, type) < 0) {
-    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
-                 "Error in reset of resampler");
+  if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz,
+                                    num_audio_channels) != 0) {
+    LOG_FERR3(LS_ERROR, InitializeIfNeeded, in_freq_hz, out_freq_hz,
+              num_audio_channels);
    return -1;
  }

-  if (resampler_.Push(in_audio, length_in, out_audio, max_len, out_len) < 0) {
-    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
-                 "Error in resampler: resampler.Push");
+  out_length = resampler_.Resample(in_audio, in_length, out_audio, out_length);
+  if (out_length == -1) {
+    LOG_FERR4(LS_ERROR, Resample, in_audio, in_length, out_audio, out_length);
    return -1;
  }

-  return out_len / num_audio_channels;
+  return out_length / num_audio_channels;
 }

 }  // namespace acm2
-
 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/acm2/acm_resampler.h
+++ b/webrtc/modules/audio_coding/main/acm2/acm_resampler.h
@@ -11,13 +11,10 @@
 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_

-#include "webrtc/common_audio/resampler/include/resampler.h"
+#include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/typedefs.h"

 namespace webrtc {
-
-class CriticalSectionWrapper;
-
 namespace acm2 {

 class ACMResampler {
@@ -32,13 +29,10 @@ class ACMResampler {
                     int16_t* out_audio);

 private:
-  // Use the Resampler class.
-  Resampler resampler_;
-  CriticalSectionWrapper* resampler_crit_sect_;
+  PushResampler resampler_;
 };

 }  // namespace acm2
-
 }  // namespace webrtc

 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
@@ -1205,11 +1205,7 @@ int AudioCodingModuleImpl::Add10MsData(
    return -1;
  }

-  // Allow for 8, 16, 32 and 48kHz input audio.
-  if ((audio_frame.sample_rate_hz_ != 8000)
-      && (audio_frame.sample_rate_hz_ != 16000)
-      && (audio_frame.sample_rate_hz_ != 32000)
-      && (audio_frame.sample_rate_hz_ != 48000)) {
+  if (audio_frame.sample_rate_hz_ > 48000) {
    assert(false);
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
                 "Cannot Add 10 ms audio, input frequency not valid");
@@ -1371,7 +1367,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,

    if (preprocess_frame_.samples_per_channel_ < 0) {
      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "Cannot add 10 ms audio, resmapling failed");
+                   "Cannot add 10 ms audio, resampling failed");
      return -1;
    }
    preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
@@ -1273,11 +1273,7 @@ int32_t AudioCodingModuleImpl::Add10MsData(
    return -1;
  }

-  // Allow for 8, 16, 32 and 48kHz input audio.
-  if ((audio_frame.sample_rate_hz_ != 8000)
-      && (audio_frame.sample_rate_hz_ != 16000)
-      && (audio_frame.sample_rate_hz_ != 32000)
-      && (audio_frame.sample_rate_hz_ != 48000)) {
+  if (audio_frame.sample_rate_hz_ > 48000) {
    assert(false);
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
                 "Cannot Add 10 ms audio, input frequency not valid");
@@ -1444,7 +1440,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,

    if (preprocess_frame_.samples_per_channel_ < 0) {
      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "Cannot add 10 ms audio, resmapling failed");
+                   "Cannot add 10 ms audio, resampling failed");
      return -1;
    }
    preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -61,6 +61,8 @@ struct ExperimentalAgc {
  bool enabled;
 };

+static const int kAudioProcMaxNativeSampleRateHz = 32000;
+
 // The Audio Processing Module (APM) provides a collection of voice processing
 // components designed for real-time communications software.
 //
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -4150,61 +4150,26 @@ Channel::Demultiplex(const AudioFrame& audioFrame)
    return 0;
 }

-// TODO(xians): This method borrows quite some code from
-// TransmitMixer::GenerateAudioFrame(), refactor these two methods and reduce
-// code duplication.
 void Channel::Demultiplex(const int16_t* audio_data,
                          int sample_rate,
                          int number_of_frames,
                          int number_of_channels) {
-  // The highest sample rate that WebRTC supports for mono audio is 96kHz.
-  static const int kMaxNumberOfFrames = 960;
-  assert(number_of_frames <= kMaxNumberOfFrames);
-
-  // Get the send codec information for doing resampling or downmixing later on.
  CodecInst codec;
  GetSendCodec(codec);
-  assert(codec.channels == 1 || codec.channels == 2);
-  int support_sample_rate = std::min(32000,
-                                     std::min(sample_rate, codec.plfreq));

-  // Downmix the data to mono if needed.
-  const int16_t* audio_ptr = audio_data;
-  if (number_of_channels == 2 && codec.channels == 1) {
-    if (!mono_recording_audio_.get())
-      mono_recording_audio_.reset(new int16_t[kMaxNumberOfFrames]);
-
-    AudioFrameOperations::StereoToMono(audio_data, number_of_frames,
-                                       mono_recording_audio_.get());
-    audio_ptr = mono_recording_audio_.get();
+  if (!mono_recording_audio_.get()) {
+    // Temporary space for DownConvertToCodecFormat.
+    mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]);
  }
-
-  // Resample the data to the sample rate that the codec is using.
-  if (input_resampler_.InitializeIfNeeded(sample_rate,
-                                          support_sample_rate,
-                                          codec.channels)) {
-    WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
-                 "Channel::Demultiplex() unable to resample");
-    return;
-  }
-
-  int out_length = input_resampler_.Resample(audio_ptr,
-                                             number_of_frames * codec.channels,
-                                             _audioFrame.data_,
-                                             AudioFrame::kMaxDataSizeSamples);
-  if (out_length == -1) {
-    WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
-                 "Channel::Demultiplex() resampling failed");
-    return;
-  }
-
-  _audioFrame.samples_per_channel_ = out_length / codec.channels;
-  _audioFrame.timestamp_ = -1;
-  _audioFrame.sample_rate_hz_ = support_sample_rate;
-  _audioFrame.speech_type_ = AudioFrame::kNormalSpeech;
-  _audioFrame.vad_activity_ = AudioFrame::kVadUnknown;
-  _audioFrame.num_channels_ = codec.channels;
-  _audioFrame.id_ = _channelId;
+  DownConvertToCodecFormat(audio_data,
+                           number_of_frames,
+                           number_of_channels,
+                           sample_rate,
+                           codec.channels,
+                           codec.plfreq,
+                           mono_recording_audio_.get(),
+                           &input_resampler_,
+                           &_audioFrame);
 }

 uint32_t
@@ -4694,11 +4659,11 @@ Channel::MixOrReplaceAudioWithFile(int mixingFrequency)
    {
        // Currently file stream is always mono.
        // TODO(xians): Change the code when FilePlayer supports real stereo.
-        Utility::MixWithSat(_audioFrame.data_,
-                            _audioFrame.num_channels_,
-                            fileBuffer.get(),
-                            1,
-                            fileSamples);
+        MixWithSat(_audioFrame.data_,
+                   _audioFrame.num_channels_,
+                   fileBuffer.get(),
+                   1,
+                   fileSamples);
    }
    else
    {
@@ -4754,11 +4719,11 @@ Channel::MixAudioWithFile(AudioFrame& audioFrame,
    {
        // Currently file stream is always mono.
        // TODO(xians): Change the code when FilePlayer supports real stereo.
-        Utility::MixWithSat(audioFrame.data_,
-                            audioFrame.num_channels_,
-                            fileBuffer.get(),
-                            1,
-                            fileSamples);
+        MixWithSat(audioFrame.data_,
+                   audioFrame.num_channels_,
+                   fileBuffer.get(),
+                   1,
+                   fileSamples);
    }
    else
    {
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -545,7 +545,7 @@ private:
    AudioLevel _outputAudioLevel;
    bool _externalTransport;
    AudioFrame _audioFrame;
-    scoped_array<int16_t> mono_recording_audio_;
+    scoped_ptr<int16_t[]> mono_recording_audio_;
    // Resampler is used when input data is stereo while codec is mono.
    PushResampler input_resampler_;
    uint8_t _audioLevel_dBov;
--- a/webrtc/voice_engine/output_mixer.cc
+++ b/webrtc/voice_engine/output_mixer.cc
@@ -16,11 +16,10 @@
 #include "webrtc/system_wrappers/interface/file_wrapper.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 #include "webrtc/voice_engine/include/voe_external_media.h"
-#include "webrtc/voice_engine/output_mixer_internal.h"
 #include "webrtc/voice_engine/statistics.h"
+#include "webrtc/voice_engine/utility.h"

 namespace webrtc {
-
 namespace voe {

 void
@@ -528,7 +527,8 @@ int OutputMixer::GetMixedAudio(int sample_rate_hz,
  frame->sample_rate_hz_ = sample_rate_hz;
  // TODO(andrew): Ideally the downmixing would occur much earlier, in
  // AudioCodingModule.
-  return RemixAndResample(_audioFrame, &resampler_, frame);
+  RemixAndResample(_audioFrame, &resampler_, frame);
+  return 0;
 }

 int32_t
@@ -565,7 +565,9 @@ OutputMixer::DoOperationsOnCombinedSignal()
    }

    // --- Far-end Voice Quality Enhancement (AudioProcessing Module)
-
+    // TODO(ajm): Check with VoEBase if |need_audio_processing| is false.
+    // If so, we don't need to call this method and can avoid the subsequent
+    // resampling. See: https://code.google.com/p/webrtc/issues/detail?id=3147
    APMAnalyzeReverseStream();

    // --- External media processing
@@ -603,8 +605,7 @@ void OutputMixer::APMAnalyzeReverseStream() {
  AudioFrame frame;
  frame.num_channels_ = 1;
  frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
-  if (RemixAndResample(_audioFrame, &audioproc_resampler_, &frame) == -1)
-    return;
+  RemixAndResample(_audioFrame, &audioproc_resampler_, &frame);

  if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) {
    WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
@@ -657,5 +658,4 @@ OutputMixer::InsertInbandDtmfTone()
 }

 }  // namespace voe
-
 }  // namespace webrtc
--- a/webrtc/voice_engine/output_mixer_internal.cc
+++ b/webrtc/voice_engine/output_mixer_internal.cc
@@ -1,70 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/voice_engine/output_mixer_internal.h"
-
-#include "webrtc/common_audio/resampler/include/push_resampler.h"
-#include "webrtc/modules/interface/module_common_types.h"
-#include "webrtc/modules/utility/interface/audio_frame_operations.h"
-#include "webrtc/system_wrappers/interface/logging.h"
-#include "webrtc/system_wrappers/interface/trace.h"
-
-namespace webrtc {
-namespace voe {
-
-int RemixAndResample(const AudioFrame& src_frame,
-                     PushResampler* resampler,
-                     AudioFrame* dst_frame) {
-  const int16_t* audio_ptr = src_frame.data_;
-  int audio_ptr_num_channels = src_frame.num_channels_;
-  int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
-
-  // Downmix before resampling.
-  if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
-    AudioFrameOperations::StereoToMono(src_frame.data_,
-                                       src_frame.samples_per_channel_,
-                                       mono_audio);
-    audio_ptr = mono_audio;
-    audio_ptr_num_channels = 1;
-  }
-
-  if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
-                                    dst_frame->sample_rate_hz_,
-                                    audio_ptr_num_channels) == -1) {
-    dst_frame->CopyFrom(src_frame);
-    LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
-              dst_frame->sample_rate_hz_, audio_ptr_num_channels);
-    return -1;
-  }
-
-  const int src_length = src_frame.samples_per_channel_ *
-                         audio_ptr_num_channels;
-  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
-                                       AudioFrame::kMaxDataSizeSamples);
-  if (out_length == -1) {
-    dst_frame->CopyFrom(src_frame);
-    LOG_FERR3(LS_ERROR, Resample, src_length, dst_frame->data_,
-              AudioFrame::kMaxDataSizeSamples);
-    return -1;
-  }
-  dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
-
-  // Upmix after resampling.
-  if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
-    // The audio in dst_frame really is mono at this point; MonoToStereo will
-    // set this back to stereo.
-    dst_frame->num_channels_ = 1;
-    AudioFrameOperations::MonoToStereo(dst_frame);
-  }
-  return 0;
-}
-
-}  // namespace voe
-}  // namespace webrtc
--- a/webrtc/voice_engine/output_mixer_internal.h
+++ b/webrtc/voice_engine/output_mixer_internal.h
@@ -1,33 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
-#define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
-
-namespace webrtc {
-
-class AudioFrame;
-class PushResampler;
-
-namespace voe {
-
-// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
-// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
-// the desired values. Updates |samples_per_channel_| accordingly.
-//
-// On failure, returns -1 and copies |src_frame| to |dst_frame|.
-int RemixAndResample(const AudioFrame& src_frame,
-                     PushResampler* resampler,
-                     AudioFrame* dst_frame);
-
-}  // namespace voe
-}  // namespace webrtc
-
-#endif  // VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
--- a/webrtc/voice_engine/output_mixer_unittest.cc
+++ b/webrtc/voice_engine/output_mixer_unittest.cc
@@ -11,13 +11,20 @@
 #include <math.h>

 #include "testing/gtest/include/gtest/gtest.h"
-#include "webrtc/voice_engine/output_mixer.h"
-#include "webrtc/voice_engine/output_mixer_internal.h"
+#include "webrtc/common_audio/resampler/include/push_resampler.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/voice_engine/utility.h"
+#include "webrtc/voice_engine/voice_engine_defines.h"

 namespace webrtc {
 namespace voe {
 namespace {

+enum FunctionToTest {
+  TestRemixAndResample,
+  TestDownConvertToCodecFormat
+};
+
 class OutputMixerTest : public ::testing::Test {
 protected:
  OutputMixerTest() {
@@ -29,7 +36,8 @@ class OutputMixerTest : public ::testing::Test {
  }

  void RunResampleTest(int src_channels, int src_sample_rate_hz,
-                       int dst_channels, int dst_sample_rate_hz);
+                       int dst_channels, int dst_sample_rate_hz,
+                       FunctionToTest function);

  PushResampler resampler_;
  AudioFrame src_frame_;
@@ -121,7 +129,8 @@ void VerifyFramesAreEqual(const AudioFrame& ref_frame,
 void OutputMixerTest::RunResampleTest(int src_channels,
                                      int src_sample_rate_hz,
                                      int dst_channels,
-                                      int dst_sample_rate_hz) {
+                                      int dst_sample_rate_hz,
+                                      FunctionToTest function) {
  PushResampler resampler;  // Create a new one with every test.
  const int16_t kSrcLeft = 30;  // Shouldn't overflow for any used sample rate.
  const int16_t kSrcRight = 15;
@@ -157,7 +166,21 @@ void OutputMixerTest::RunResampleTest(int src_channels,
      / src_sample_rate_hz * kInputKernelDelaySamples * dst_channels * 2;
  printf("(%d, %d Hz) -> (%d, %d Hz) ",  // SNR reported on the same line later.
      src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_));
+  if (function == TestRemixAndResample) {
+    RemixAndResample(src_frame_, &resampler, &dst_frame_);
+  } else {
+    int16_t mono_buffer[kMaxMonoDataSizeSamples];
+    DownConvertToCodecFormat(src_frame_.data_,
+                             src_frame_.samples_per_channel_,
+                             src_frame_.num_channels_,
+                             src_frame_.sample_rate_hz_,
+                             dst_frame_.num_channels_,
+                             dst_frame_.sample_rate_hz_,
+                             mono_buffer,
+                             &resampler,
+                             &dst_frame_);
+  }
+
  if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) {
    // The sinc resampler gives poor SNR at this extreme conversion, but we
    // expect to see this rarely in practice.
@@ -171,13 +194,13 @@ TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
  // Stereo -> stereo.
  SetStereoFrame(&src_frame_, 10, 10);
  SetStereoFrame(&dst_frame_, 0, 0);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(src_frame_, dst_frame_);

  // Mono -> mono.
  SetMonoFrame(&src_frame_, 20);
  SetMonoFrame(&dst_frame_, 0);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(src_frame_, dst_frame_);
 }

@@ -186,20 +209,18 @@ TEST_F(OutputMixerTest, RemixAndResampleMixingOnlySucceeds) {
  SetStereoFrame(&dst_frame_, 0, 0);
  SetMonoFrame(&src_frame_, 10);
  SetStereoFrame(&golden_frame_, 10, 10);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(dst_frame_, golden_frame_);

  // Mono -> stereo.
  SetMonoFrame(&dst_frame_, 0);
  SetStereoFrame(&src_frame_, 10, 20);
  SetMonoFrame(&golden_frame_, 15);
-  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
+  RemixAndResample(src_frame_, &resampler_, &dst_frame_);
  VerifyFramesAreEqual(golden_frame_, dst_frame_);
 }

 TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
-  // TODO(ajm): convert this to the parameterized TEST_P style used in
-  // sinc_resampler_unittest.cc. We can then easily add tighter SNR thresholds.
  const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
  const int kChannels[] = {1, 2};
@@ -209,7 +230,28 @@ TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
      for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
        for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
          RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
-                          kChannels[dst_channel], kSampleRates[dst_rate]);
+                          kChannels[dst_channel], kSampleRates[dst_rate],
+                          TestRemixAndResample);
+        }
+      }
+    }
+  }
+}
+
+TEST_F(OutputMixerTest, ConvertToCodecFormatSucceeds) {
+  const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
+  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
+  const int kChannels[] = {1, 2};
+  const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
+  for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
+    for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
+      for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
+        for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
+          if (dst_rate <= src_rate && dst_channel <= src_channel) {
+            RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
+                            kChannels[src_channel], kSampleRates[dst_rate],
+                            TestDownConvertToCodecFormat);
+          }
        }
      }
    }
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
--- a/webrtc/voice_engine/transmit_mixer.h
+++ b/webrtc/voice_engine/transmit_mixer.h
@@ -17,6 +17,7 @@
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/utility/interface/file_player.h"
 #include "webrtc/modules/utility/interface/file_recorder.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
 #include "webrtc/voice_engine/include/voe_base.h"
 #include "webrtc/voice_engine/level_indicator.h"
 #include "webrtc/voice_engine/monitor_module.h"
@@ -36,9 +37,7 @@ class MixedAudio;
 class Statistics;

 class TransmitMixer : public MonitorObserver,
-                      public FileCallback
-
-{
+                      public FileCallback {
 public:
    static int32_t Create(TransmitMixer*& mixer, uint32_t instanceId);

@@ -175,10 +174,10 @@ private:
    // sending codecs.
    void GetSendCodecInfo(int* max_sample_rate, int* max_channels);

-    int GenerateAudioFrame(const int16_t audioSamples[],
-                           int nSamples,
-                           int nChannels,
-                           int samplesPerSec);
+    void GenerateAudioFrame(const int16_t audioSamples[],
+                            int nSamples,
+                            int nChannels,
+                            int samplesPerSec);
    int32_t RecordAudioToFile(uint32_t mixingFrequency);

    int32_t MixOrReplaceAudioWithFile(
@@ -232,6 +231,7 @@ private:
    int32_t _remainingMuteMicTimeMs;
    bool stereo_codec_;
    bool swap_stereo_channels_;
+    scoped_ptr<int16_t[]> mono_buffer_;
 };

 }  // namespace voe
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -10,116 +10,150 @@

 #include "webrtc/voice_engine/utility.h"

+#include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/modules/interface/module.h"
-#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/common_types.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/modules/utility/interface/audio_frame_operations.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+#include "webrtc/voice_engine/voice_engine_defines.h"

-namespace webrtc
-{
+namespace webrtc {
+namespace voe {

-namespace voe
-{
-enum{kMaxTargetLen = 2*32*10}; // stereo 32KHz 10ms
+// TODO(ajm): There is significant overlap between RemixAndResample and
+// ConvertToCodecFormat, but if we're to consolidate we should probably make a
+// real converter class.
+void RemixAndResample(const AudioFrame& src_frame,
+                      PushResampler* resampler,
+                      AudioFrame* dst_frame) {
+  const int16_t* audio_ptr = src_frame.data_;
+  int audio_ptr_num_channels = src_frame.num_channels_;
+  int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];

-void Utility::MixWithSat(int16_t target[],
-                         int target_channel,
-                         const int16_t source[],
-                         int source_channel,
-                         int source_len)
-{
-    assert((target_channel == 1) || (target_channel == 2));
-    assert((source_channel == 1) || (source_channel == 2));
-    assert(source_len <= kMaxTargetLen);
+  // Downmix before resampling.
+  if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
+    AudioFrameOperations::StereoToMono(src_frame.data_,
+                                       src_frame.samples_per_channel_,
+                                       mono_audio);
+    audio_ptr = mono_audio;
+    audio_ptr_num_channels = 1;
+  }

-    if ((target_channel == 2) && (source_channel == 1))
-    {
-        // Convert source from mono to stereo.
-        int32_t left = 0;
-        int32_t right = 0;
-        for (int i = 0; i < source_len; ++i) {
-            left  = source[i] + target[i*2];
-            right = source[i] + target[i*2 + 1];
-            target[i*2]     = WebRtcSpl_SatW32ToW16(left);
-            target[i*2 + 1] = WebRtcSpl_SatW32ToW16(right);
-        }
-    }
-    else if ((target_channel == 1) && (source_channel == 2))
-    {
-        // Convert source from stereo to mono.
-        int32_t temp = 0;
-        for (int i = 0; i < source_len/2; ++i) {
-          temp = ((source[i*2] + source[i*2 + 1])>>1) + target[i];
-          target[i] = WebRtcSpl_SatW32ToW16(temp);
-        }
-    }
-    else
-    {
-        int32_t temp = 0;
-        for (int i = 0; i < source_len; ++i) {
-          temp = source[i] + target[i];
-          target[i] = WebRtcSpl_SatW32ToW16(temp);
-        }
-    }
+  if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
+                                    dst_frame->sample_rate_hz_,
+                                    audio_ptr_num_channels) == -1) {
+    dst_frame->CopyFrom(src_frame);
+    LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
+              dst_frame->sample_rate_hz_, audio_ptr_num_channels);
+    assert(false);
+  }
+
+  const int src_length = src_frame.samples_per_channel_ *
+                         audio_ptr_num_channels;
+  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
+                                       AudioFrame::kMaxDataSizeSamples);
+  if (out_length == -1) {
+    dst_frame->CopyFrom(src_frame);
+    LOG_FERR3(LS_ERROR, Resample, audio_ptr, src_length, dst_frame->data_);
+    assert(false);
+  }
+  dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
+
+  // Upmix after resampling.
+  if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
+    // The audio in dst_frame really is mono at this point; MonoToStereo will
+    // set this back to stereo.
+    dst_frame->num_channels_ = 1;
+    AudioFrameOperations::MonoToStereo(dst_frame);
+  }
 }

-void Utility::MixSubtractWithSat(int16_t target[],
-                                 const int16_t source[],
-                                 uint16_t len)
-{
-    int32_t temp(0);
-    for (int i = 0; i < len; i++)
-    {
-        temp = target[i] - source[i];
-        if (temp > 32767)
-            target[i] = 32767;
-        else if (temp < -32768)
-            target[i] = -32768;
-        else
-            target[i] = (int16_t) temp;
-    }
+void DownConvertToCodecFormat(const int16_t* src_data,
+                              int samples_per_channel,
+                              int num_channels,
+                              int sample_rate_hz,
+                              int codec_num_channels,
+                              int codec_rate_hz,
+                              int16_t* mono_buffer,
+                              PushResampler* resampler,
+                              AudioFrame* dst_af) {
+  assert(samples_per_channel <= kMaxMonoDataSizeSamples);
+  assert(num_channels == 1 || num_channels == 2);
+  assert(codec_num_channels == 1 || codec_num_channels == 2);
+
+  // Never upsample the capture signal here. This should be done at the
+  // end of the send chain.
+  int destination_rate = std::min(codec_rate_hz, sample_rate_hz);
+
+  // If no stereo codecs are in use, we downmix a stereo stream from the
+  // device early in the chain, before resampling.
+  if (num_channels == 2 && codec_num_channels == 1) {
+    AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
+                                       mono_buffer);
+    src_data = mono_buffer;
+    num_channels = 1;
+  }
+
+  if (resampler->InitializeIfNeeded(
+          sample_rate_hz, destination_rate, num_channels) != 0) {
+    LOG_FERR3(LS_ERROR,
+              InitializeIfNeeded,
+              sample_rate_hz,
+              destination_rate,
+              num_channels);
+    assert(false);
+  }
+
+  const int in_length = samples_per_channel * num_channels;
+  int out_length = resampler->Resample(
+      src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples);
+  if (out_length == -1) {
+    LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_);
+    assert(false);
+  }
+
+  dst_af->samples_per_channel_ = out_length / num_channels;
+  dst_af->sample_rate_hz_ = destination_rate;
+  dst_af->num_channels_ = num_channels;
+  dst_af->timestamp_ = -1;
+  dst_af->speech_type_ = AudioFrame::kNormalSpeech;
+  dst_af->vad_activity_ = AudioFrame::kVadUnknown;
 }

-void Utility::MixAndScaleWithSat(int16_t target[],
-                                 const int16_t source[], float scale,
-                                 uint16_t len)
-{
-    int32_t temp(0);
-    for (int i = 0; i < len; i++)
-    {
-        temp = (int32_t) (target[i] + scale * source[i]);
-        if (temp > 32767)
-            target[i] = 32767;
-        else if (temp < -32768)
-            target[i] = -32768;
-        else
-            target[i] = (int16_t) temp;
-    }
-}
+void MixWithSat(int16_t target[],
+                int target_channel,
+                const int16_t source[],
+                int source_channel,
+                int source_len) {
+  assert(target_channel == 1 || target_channel == 2);
+  assert(source_channel == 1 || source_channel == 2);

-void Utility::Scale(int16_t vector[], float scale, uint16_t len)
-{
-    for (int i = 0; i < len; i++)
-    {
-        vector[i] = (int16_t) (scale * vector[i]);
+  if (target_channel == 2 && source_channel == 1) {
+    // Convert source from mono to stereo.
+    int32_t left = 0;
+    int32_t right = 0;
+    for (int i = 0; i < source_len; ++i) {
+      left = source[i] + target[i * 2];
+      right = source[i] + target[i * 2 + 1];
+      target[i * 2] = WebRtcSpl_SatW32ToW16(left);
+      target[i * 2 + 1] = WebRtcSpl_SatW32ToW16(right);
    }
-}
-
-void Utility::ScaleWithSat(int16_t vector[], float scale,
-                           uint16_t len)
-{
-    int32_t temp(0);
-    for (int i = 0; i < len; i++)
-    {
-        temp = (int32_t) (scale * vector[i]);
-        if (temp > 32767)
-            vector[i] = 32767;
-        else if (temp < -32768)
-            vector[i] = -32768;
-        else
-            vector[i] = (int16_t) temp;
+  } else if (target_channel == 1 && source_channel == 2) {
+    // Convert source from stereo to mono.
+    int32_t temp = 0;
+    for (int i = 0; i < source_len / 2; ++i) {
+      temp = ((source[i * 2] + source[i * 2 + 1]) >> 1) + target[i];
+      target[i] = WebRtcSpl_SatW32ToW16(temp);
    }
+  } else {
+    int32_t temp = 0;
+    for (int i = 0; i < source_len; ++i) {
+      temp = source[i] + target[i];
+      target[i] = WebRtcSpl_SatW32ToW16(temp);
+    }
+  }
 }

 }  // namespace voe
-
 }  // namespace webrtc
--- a/webrtc/voice_engine/utility.h
+++ b/webrtc/voice_engine/utility.h
@@ -12,47 +12,48 @@
 *  Contains functions often used by different parts of VoiceEngine.
 */

-#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H
-#define WEBRTC_VOICE_ENGINE_UTILITY_H
+#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H_
+#define WEBRTC_VOICE_ENGINE_UTILITY_H_

 #include "webrtc/typedefs.h"
-#include "webrtc/voice_engine/voice_engine_defines.h"

-namespace webrtc
-{
+namespace webrtc {

-class Module;
+class AudioFrame;
+class PushResampler;

-namespace voe
-{
+namespace voe {

-class Utility
-{
-public:
-    static void MixWithSat(int16_t target[],
-                           int target_channel,
-                           const int16_t source[],
-                           int source_channel,
-                           int source_len);
+// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
+// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
+// the desired values. Updates |samples_per_channel_| accordingly.
+//
+// On failure, returns -1 and copies |src_frame| to |dst_frame|.
+void RemixAndResample(const AudioFrame& src_frame,
+                      PushResampler* resampler,
+                      AudioFrame* dst_frame);

-    static void MixSubtractWithSat(int16_t target[],
-                                   const int16_t source[],
-                                   uint16_t len);
+// Downmix and downsample the audio in |src_data| to |dst_af| as necessary,
+// specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is
+// temporary space and must be of sufficient size to hold the downmixed source
+// audio (recommend using a size of kMaxMonoDataSizeSamples).
+void DownConvertToCodecFormat(const int16_t* src_data,
+                              int samples_per_channel,
+                              int num_channels,
+                              int sample_rate_hz,
+                              int codec_num_channels,
+                              int codec_rate_hz,
+                              int16_t* mono_buffer,
+                              PushResampler* resampler,
+                              AudioFrame* dst_af);

-    static void MixAndScaleWithSat(int16_t target[],
-                                   const int16_t source[],
-                                   float scale,
-                                   uint16_t len);
-
-    static void Scale(int16_t vector[], float scale, uint16_t len);
-
-    static void ScaleWithSat(int16_t vector[],
-                             float scale,
-                             uint16_t len);
-};
+void MixWithSat(int16_t target[],
+                int target_channel,
+                const int16_t source[],
+                int source_channel,
+                int source_len);

 }  // namespace voe
-
 }  // namespace webrtc

-#endif  // WEBRTC_VOICE_ENGINE_UTILITY_H
+#endif  // WEBRTC_VOICE_ENGINE_UTILITY_H_
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -25,13 +25,6 @@
 #include "webrtc/voice_engine/utility.h"
 #include "webrtc/voice_engine/voice_engine_impl.h"

-#if (defined(_WIN32) && defined(_DLL) && (_MSC_VER == 1400))
-// Fix for VS 2005 MD/MDd link problem
-#include <stdio.h>
-extern "C"
-    { FILE _iob[3] = {   __iob_func()[0], __iob_func()[1], __iob_func()[2]}; }
-#endif
-
 namespace webrtc
 {

@@ -223,6 +216,9 @@ int VoEBaseImpl::OnDataAvailable(const int voe_channels[],
  // No need to go through the APM, demultiplex the data to each VoE channel,
  // encode and send to the network.
  for (int i = 0; i < number_of_voe_channels; ++i) {
+    // TODO(ajm): In the case where multiple channels are using the same codec
+    // rate, this path needlessly does extra conversions. We should convert once
+    // and share between channels.
    OnData(voe_channels[i], audio_data, 16, sample_rate,
           number_of_channels, number_of_frames);
  }
--- a/webrtc/voice_engine/voice_engine.gyp
+++ b/webrtc/voice_engine/voice_engine.gyp
@@ -57,8 +57,6 @@
        'monitor_module.h',
        'output_mixer.cc',
        'output_mixer.h',
-        'output_mixer_internal.cc',
-        'output_mixer_internal.h',
        'shared_data.cc',
        'shared_data.h',
        'statistics.cc',
--- a/webrtc/voice_engine/voice_engine_defines.h
+++ b/webrtc/voice_engine/voice_engine_defines.h
@@ -27,6 +27,10 @@

 namespace webrtc {

+// Internal buffer size required for mono audio, based on the highest sample
+// rate voice engine supports (10 ms of audio at 192 kHz).
+static const int kMaxMonoDataSizeSamples = 1920;
+
 // VolumeControl
 enum { kMinVolumeLevel = 0 };
 enum { kMaxVolumeLevel = 255 };