This is related to an earlier CL of enabling Opus 48 kHz.

https://webrtc-codereview.appspot.com/16619005/ It was reverted due to a build bot error, which this CL is to fix. The problem was that when audio conference mixer receives audio frames all at 48 kHz and mixed them, it uses Audio Processing Module (APM) to do a post-processing. However the APM cannot handle 48 kHz input. The current solution is not to allow the mixer to output 48 kHz. TEST=locally solved https://webrtc-codereview.appspot.com/16619005/ BUG= R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/20779004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6730 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-07-18 12:28:28 +00:00 · 2014-07-18 12:28:28 +00:00 · 026859b983
commit 026859b983
parent e6f84ae8a6
3 changed files with 48 additions and 49 deletions
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@ -32,10 +32,13 @@ typedef std::list<ParticipantFramePair*> ParticipantFramePairList;
 // stereo at most.
 //
 // TODO(andrew): consider not modifying |frame| here.
-void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame) {
+void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame, bool use_limiter) {
  assert(mixed_frame->num_channels_ >= frame->num_channels_);
-  // Divide by two to avoid saturation in the mixing.
-  *frame >>= 1;
+  if (use_limiter) {
+    // Divide by two to avoid saturation in the mixing.
+    // This is only meaningful if the limiter will be used.
+    *frame >>= 1;
+  }
  if (mixed_frame->num_channels_ > frame->num_channels_) {
    // We only support mono-to-stereo.
    assert(mixed_frame->num_channels_ == 2 &&
@ -131,6 +134,7 @@ AudioConferenceMixerImpl::AudioConferenceMixerImpl(int id)
      _participantList(),
      _additionalParticipantList(),
      _numMixedParticipants(0),
+      use_limiter_(true),
      _timeStamp(0),
      _timeScheduler(kProcessPeriodicityInMs),
      _mixedAudioLevel(),
@ -308,6 +312,11 @@ int32_t AudioConferenceMixerImpl::Process() {

        _timeStamp += _sampleSize;

+        // We only use the limiter if it supports the output sample rate and
+        // we're actually mixing multiple streams.
+        use_limiter_ = _numMixedParticipants > 1 &&
+                       _outputFrequency <= kAudioProcMaxNativeSampleRateHz;
+
        MixFromList(*mixedAudio, &mixList);
        MixAnonomouslyFromList(*mixedAudio, &additionalFramesList);
        MixAnonomouslyFromList(*mixedAudio, &rampOutList);
@ -946,14 +955,6 @@ int32_t AudioConferenceMixerImpl::MixFromList(
    if(audioFrameList->empty()) return 0;

    uint32_t position = 0;
-    if(_numMixedParticipants == 1) {
-        // No mixing required here; skip the saturation protection.
-        AudioFrame* audioFrame = audioFrameList->front();
-        mixedAudio.CopyFrom(*audioFrame);
-        SetParticipantStatistics(&_scratchMixedParticipants[position],
-                                 *audioFrame);
-        return 0;
-    }

    if (_numMixedParticipants == 1) {
      mixedAudio.timestamp_ = audioFrameList->front()->timestamp_;
@ -979,7 +980,7 @@ int32_t AudioConferenceMixerImpl::MixFromList(
            assert(false);
            position = 0;
        }
-        MixFrames(&mixedAudio, (*iter));
+        MixFrames(&mixedAudio, (*iter), use_limiter_);

        SetParticipantStatistics(&_scratchMixedParticipants[position],
                                 **iter);
@ -999,24 +1000,17 @@ int32_t AudioConferenceMixerImpl::MixAnonomouslyFromList(

    if(audioFrameList->empty()) return 0;

-    if(_numMixedParticipants == 1) {
-        // No mixing required here; skip the saturation protection.
-        AudioFrame* audioFrame = audioFrameList->front();
-        mixedAudio.CopyFrom(*audioFrame);
-        return 0;
-    }
-
    for (AudioFrameList::const_iterator iter = audioFrameList->begin();
         iter != audioFrameList->end();
         ++iter) {
-        MixFrames(&mixedAudio, *iter);
+        MixFrames(&mixedAudio, *iter, use_limiter_);
    }
    return 0;
 }

 bool AudioConferenceMixerImpl::LimitMixedAudio(AudioFrame& mixedAudio) {
-    if(_numMixedParticipants == 1) {
-        return true;
+    if (!use_limiter_) {
+      return true;
    }

    // Smoothly limit the mixed frame.
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h
@ -192,6 +192,9 @@ private:
    MixerParticipantList _additionalParticipantList;

    size_t _numMixedParticipants;
+    // Determines if we will use a limiter for clipping protection during
+    // mixing.
+    bool use_limiter_;

    uint32_t _timeStamp;

--- a/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc
+++ b/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc
@ -20,8 +20,12 @@ namespace {

 const int16_t kLimiterHeadroom = 29204;  // == -1 dbFS
 const int16_t kInt16Max = 0x7fff;
-const int kSampleRateHz = 16000;
+const int kPayloadType = 105;
+const int kInSampleRateHz = 16000;  // Input file taken as 16 kHz by default.
+const int kRecSampleRateHz = 16000;  // Recorded with 16 kHz L16.
 const int kTestDurationMs = 3000;
+const CodecInst kCodecL16 = {kPayloadType, "L16", 16000, 160, 1, 256000};
+const CodecInst kCodecOpus = {kPayloadType, "opus", 48000, 960, 1, 32000};

 }  // namespace

@ -54,7 +58,8 @@ class MixingTest : public AfterInitializationFixture {
                     bool real_audio,
                     int16_t input_value,
                     int16_t max_output_value,
-                     int16_t min_output_value) {
+                     int16_t min_output_value,
+                     const CodecInst& codec_inst) {
    ASSERT_LE(num_remote_streams_using_mono, num_remote_streams);

    if (real_audio) {
@ -77,7 +82,8 @@ class MixingTest : public AfterInitializationFixture {
      remote_streams[i] = voe_base_->CreateChannel();
      EXPECT_NE(-1, remote_streams[i]);
    }
-    StartRemoteStreams(remote_streams, num_remote_streams_using_mono);
+    StartRemoteStreams(remote_streams, num_remote_streams_using_mono,
+                       codec_inst);
    TEST_LOG("Playing %d remote streams.\n", num_remote_streams);

    // Give it plenty of time to get started.
@ -106,7 +112,7 @@ class MixingTest : public AfterInitializationFixture {
  void GenerateInputFile(int16_t input_value) {
    FILE* input_file = fopen(input_filename_.c_str(), "wb");
    ASSERT_TRUE(input_file != NULL);
-    for (int i = 0; i < kSampleRateHz / 1000 * (kTestDurationMs * 2); i++) {
+    for (int i = 0; i < kInSampleRateHz / 1000 * (kTestDurationMs * 2); i++) {
      ASSERT_EQ(1u, fwrite(&input_value, sizeof(input_value), 1, input_file));
    }
    ASSERT_EQ(0, fclose(input_file));
@ -129,7 +135,7 @@ class MixingTest : public AfterInitializationFixture {
    // Ensure we've at least recorded half as much file as the duration of the
    // test. We have to use a relaxed tolerance here due to filesystem flakiness
    // on the bots.
-    ASSERT_GE((samples_read * 1000.0) / kSampleRateHz, kTestDurationMs);
+    ASSERT_GE((samples_read * 1000.0) / kRecSampleRateHz, kTestDurationMs);
    // Ensure we read the entire file.
    ASSERT_NE(0, feof(output_file));
    ASSERT_EQ(0, fclose(output_file));
@ -153,17 +159,8 @@ class MixingTest : public AfterInitializationFixture {

  // Start up remote streams ("normal" participants).
  void StartRemoteStreams(const std::vector<int>& streams,
-                          int num_remote_streams_using_mono) {
-    // Use L16 at 16kHz to minimize distortion (file recording is 16kHz and
-    // resampling will cause distortion).
-    CodecInst codec_inst;
-    strcpy(codec_inst.plname, "L16");
-    codec_inst.channels = 1;
-    codec_inst.plfreq = kSampleRateHz;
-    codec_inst.pltype = 105;
-    codec_inst.pacsize = codec_inst.plfreq / 100;
-    codec_inst.rate = codec_inst.plfreq * sizeof(int16_t) * 8;  // 8 bits/byte.
-
+                          int num_remote_streams_using_mono,
+                          const CodecInst& codec_inst) {
    for (int i = 0; i < num_remote_streams_using_mono; ++i) {
      // Add some delay between starting up the channels in order to give them
      // different energies in the "real audio" test and hopefully exercise
@ -173,10 +170,11 @@ class MixingTest : public AfterInitializationFixture {
    }

    // The remainder of the streams will use stereo.
-    codec_inst.channels = 2;
-    codec_inst.pltype++;
+    CodecInst codec_inst_stereo = codec_inst;
+    codec_inst_stereo.channels = 2;
+    codec_inst_stereo.pltype++;
    for (size_t i = num_remote_streams_using_mono; i < streams.size(); ++i) {
-      StartRemoteStream(streams[i], codec_inst, 1234 + 2 * i);
+      StartRemoteStream(streams[i], codec_inst_stereo, 1234 + 2 * i);
    }
  }

@ -210,7 +208,7 @@ class MixingTest : public AfterInitializationFixture {
    EXPECT_NE(-1, size);
    fclose(fid);
    // Divided by 2 due to 2 bytes/sample.
-    return size * 1000 / kSampleRateHz / 2;
+    return size * 1000 / kRecSampleRateHz / 2;
  }

  std::string input_filename_;
@ -222,7 +220,11 @@ class MixingTest : public AfterInitializationFixture {
 // somewhat more realistic scenario using real audio. It can at least hunt for
 // asserts and crashes.
 TEST_F(MixingTest, MixManyChannelsForStress) {
-  RunMixingTest(10, 0, 10, true, 0, 0, 0);
+  RunMixingTest(10, 0, 10, true, 0, 0, 0, kCodecL16);
+}
+
+TEST_F(MixingTest, MixManyChannelsForStressOpus) {
+  RunMixingTest(10, 0, 10, true, 0, 0, 0, kCodecOpus);
 }

 // These tests assume a maximum of three mixed participants. We typically allow
@ -232,7 +234,7 @@ TEST_F(MixingTest, FourChannelsWithOnlyThreeMixed) {
  const int16_t kInputValue = 1000;
  const int16_t kExpectedOutput = kInputValue * 3;
  RunMixingTest(4, 0, 4, false, kInputValue, 1.1 * kExpectedOutput,
-                0.9 * kExpectedOutput);
+                0.9 * kExpectedOutput, kCodecL16);
 }

 // Ensure the mixing saturation protection is working. We can do this because
@ -245,7 +247,7 @@ TEST_F(MixingTest, VerifySaturationProtection) {
  ASSERT_GT(kInputValue * 3, kInt16Max);
  ASSERT_LT(1.1 * kExpectedOutput, kInt16Max);
  RunMixingTest(3, 0, 3, false, kInputValue, 1.1 * kExpectedOutput,
-               0.9 * kExpectedOutput);
+               0.9 * kExpectedOutput, kCodecL16);
 }

 TEST_F(MixingTest, SaturationProtectionHasNoEffectOnOneChannel) {
@ -255,21 +257,21 @@ TEST_F(MixingTest, SaturationProtectionHasNoEffectOnOneChannel) {
  ASSERT_GT(0.95 * kExpectedOutput, kLimiterHeadroom);
  // Tighter constraints are required here to properly test this.
  RunMixingTest(1, 0, 1, false, kInputValue, kExpectedOutput,
-                0.95 * kExpectedOutput);
+                0.95 * kExpectedOutput, kCodecL16);
 }

 TEST_F(MixingTest, VerifyAnonymousAndNormalParticipantMixing) {
  const int16_t kInputValue = 1000;
  const int16_t kExpectedOutput = kInputValue * 2;
  RunMixingTest(1, 1, 1, false, kInputValue, 1.1 * kExpectedOutput,
-                0.9 * kExpectedOutput);
+                0.9 * kExpectedOutput, kCodecL16);
 }

 TEST_F(MixingTest, AnonymousParticipantsAreAlwaysMixed) {
  const int16_t kInputValue = 1000;
  const int16_t kExpectedOutput = kInputValue * 4;
  RunMixingTest(3, 1, 3, false, kInputValue, 1.1 * kExpectedOutput,
-                0.9 * kExpectedOutput);
+                0.9 * kExpectedOutput, kCodecL16);
 }

 TEST_F(MixingTest, VerifyStereoAndMonoMixing) {
@ -277,7 +279,7 @@ TEST_F(MixingTest, VerifyStereoAndMonoMixing) {
  const int16_t kExpectedOutput = kInputValue * 2;
  RunMixingTest(2, 0, 1, false, kInputValue, 1.1 * kExpectedOutput,
                // Lower than 0.9 due to observed flakiness on bots.
-                0.8 * kExpectedOutput);
+                0.8 * kExpectedOutput, kCodecL16);
 }

 }  // namespace webrtc