Setting OPUS_SIGNAL_VOICE when enable DTX.

A better solution than forcing OPUS_APPLICATION_VOIP when enabling DTX has been found, which is to set OPUS_SIGNAL_VOICE. This reduces the uncertainty of entering DTX over silence period of audio. This CL contains the setup of OPUS_SIGNAL_VOICE and decoupling opus application mode with DTX. BUG=4559 R=henrik.lundin@webrtc.org, henrika@webrtc.org Review URL: https://webrtc-codereview.appspot.com/46959004 Cr-Commit-Position: refs/heads/master@{#9168}
2015-05-11 12:19:35 +02:00
parent 9f7908e497
commit 092041c1cd
13 changed files with 97 additions and 133 deletions
--- a/webrtc/modules/audio_coding/codecs/audio_encoder.h
+++ b/webrtc/modules/audio_coding/codecs/audio_encoder.h
@@ -124,14 +124,12 @@ class AudioEncoderMutable : public AudioEncoder {
  virtual bool SetFec(bool enable) = 0;

  // Enables or disables codec-internal VAD/DTX, if the implementation supports
-  // it. Otherwise, false is returned. If |force| is true, other configurations
-  // may be changed to allow the operation.
-  virtual bool SetDtx(bool enable, bool force) = 0;
+  // it.
+  virtual bool SetDtx(bool enable) = 0;

  // Sets the application mode. The implementation is free to disregard this
-  // setting. If |force| is true, other configurations may be changed to allow
-  // the operation.
-  virtual bool SetApplication(Application application, bool force) = 0;
+  // setting.
+  virtual bool SetApplication(Application application) = 0;

  // Sets an upper limit on the payload size produced by the encoder. The
  // implementation is free to disregard this setting.
--- a/webrtc/modules/audio_coding/codecs/audio_encoder_mutable_impl.h
+++ b/webrtc/modules/audio_coding/codecs/audio_encoder_mutable_impl.h
@@ -28,10 +28,9 @@ class AudioEncoderMutableImpl : public P {

  bool SetFec(bool enable) override { return false; }

-  bool SetDtx(bool enable, bool force) override { return false; }
+  bool SetDtx(bool enable) override { return false; }

-  bool SetApplication(AudioEncoderMutable::Application application,
-                      bool force) override {
+  bool SetApplication(AudioEncoderMutable::Application application) override {
    return false;
  }

--- a/webrtc/modules/audio_coding/codecs/opus/audio_encoder_mutable_opus_test.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/audio_encoder_mutable_opus_test.cc
@@ -47,7 +47,7 @@ TEST_F(AudioEncoderMutableOpusTest, DefaultApplicationModeStereo) {
 TEST_F(AudioEncoderMutableOpusTest, ChangeApplicationMode) {
  CreateCodec(2);
  EXPECT_TRUE(
-      encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech, false));
+      encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech));
  EXPECT_EQ(AudioEncoderOpus::kVoip, encoder_->application());
 }

@@ -61,7 +61,7 @@ TEST_F(AudioEncoderMutableOpusTest, ResetWontChangeApplicationMode) {

  // Now change to kVoip.
  EXPECT_TRUE(
-      encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech, false));
+      encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech));
  EXPECT_EQ(AudioEncoderOpus::kVoip, encoder_->application());

  // Trigger a reset again.
@@ -72,41 +72,12 @@ TEST_F(AudioEncoderMutableOpusTest, ResetWontChangeApplicationMode) {

 TEST_F(AudioEncoderMutableOpusTest, ToggleDtx) {
  CreateCodec(2);
-
-  // DTX is not allowed in audio mode, if mode forcing flag is false.
-  EXPECT_FALSE(encoder_->SetDtx(true, false));
+  // Enable DTX
+  EXPECT_TRUE(encoder_->SetDtx(true));
+  // Verify that the mode is still kAudio.
  EXPECT_EQ(AudioEncoderOpus::kAudio, encoder_->application());
-
-  // DTX will be on, if mode forcing flag is true. Then application mode is
-  // switched to kVoip.
-  EXPECT_TRUE(encoder_->SetDtx(true, true));
-  EXPECT_EQ(AudioEncoderOpus::kVoip, encoder_->application());
-
-  // Audio mode is not allowed when DTX is on, and DTX forcing flag is false.
-  EXPECT_FALSE(
-      encoder_->SetApplication(AudioEncoderMutable::kApplicationAudio, false));
-  EXPECT_TRUE(encoder_->dtx_enabled());
-
-  // Audio mode will be set, if DTX forcing flag is true. Then DTX is switched
-  // off.
-  EXPECT_TRUE(
-      encoder_->SetApplication(AudioEncoderMutable::kApplicationAudio, true));
-  EXPECT_FALSE(encoder_->dtx_enabled());
-
-  // Now we set VOIP mode. The DTX forcing flag has no effect.
-  EXPECT_TRUE(
-      encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech, true));
-  EXPECT_FALSE(encoder_->dtx_enabled());
-
-  // In VOIP mode, we can enable DTX with mode forcing flag being false.
-  EXPECT_TRUE(encoder_->SetDtx(true, false));
-
  // Turn off DTX.
-  EXPECT_TRUE(encoder_->SetDtx(false, false));
-
-  // When DTX is off, we can set Audio mode with DTX forcing flag being false.
-  EXPECT_TRUE(
-      encoder_->SetApplication(AudioEncoderMutable::kApplicationAudio, false));
+  EXPECT_TRUE(encoder_->SetDtx(false));
 }
 #endif  // WEBRTC_CODEC_OPUS

--- a/webrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
@@ -67,8 +67,6 @@ bool AudioEncoderOpus::Config::IsOk() const {
    return false;
  if (complexity < 0 || complexity > 10)
    return false;
-  if (dtx_enabled && application != kVoip)
-    return false;
  return true;
 }

@@ -239,24 +237,19 @@ bool AudioEncoderMutableOpus::SetFec(bool enable) {
  return Reconstruct(conf);
 }

-bool AudioEncoderMutableOpus::SetDtx(bool enable, bool force) {
+bool AudioEncoderMutableOpus::SetDtx(bool enable) {
  auto conf = config();
-  if (enable && force)
-    conf.application = AudioEncoderOpus::kVoip;
  conf.dtx_enabled = enable;
  return Reconstruct(conf);
 }

-bool AudioEncoderMutableOpus::SetApplication(Application application,
-                                             bool force) {
+bool AudioEncoderMutableOpus::SetApplication(Application application) {
  auto conf = config();
  switch (application) {
    case kApplicationSpeech:
      conf.application = AudioEncoderOpus::kVoip;
      break;
    case kApplicationAudio:
-      if (force)
-        conf.dtx_enabled = false;
      conf.application = AudioEncoderOpus::kAudio;
      break;
  }
--- a/webrtc/modules/audio_coding/codecs/opus/interface/audio_encoder_opus.h
+++ b/webrtc/modules/audio_coding/codecs/opus/interface/audio_encoder_opus.h
@@ -84,8 +84,13 @@ class AudioEncoderMutableOpus
 public:
  explicit AudioEncoderMutableOpus(const CodecInst& codec_inst);
  bool SetFec(bool enable) override;
-  bool SetDtx(bool enable, bool force) override;
-  bool SetApplication(Application application, bool force) override;
+
+  // Set Opus DTX. Once enabled, Opus stops transmission, when it detects voice
+  // being inactive. During that, it still sends 2 packets (one for content, one
+  // for signaling) about every 400 ms.
+  bool SetDtx(bool enable) override;
+
+  bool SetApplication(Application application) override;
  bool SetMaxPlaybackRate(int frequency_hz) override;
  AudioEncoderOpus::ApplicationMode application() const {
    return encoder()->application();
--- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
@@ -168,15 +168,29 @@ int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
-  if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
-  } else {
+  if (!inst) {
    return -1;
  }
+
+  // To prevent Opus from entering CELT-only mode by forcing signal type to
+  // voice to make sure that DTX behaves correctly. Currently, DTX does not
+  // last long during a pure silence, if the signal type is not forced.
+  // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
+  // without it.
+  int ret = opus_encoder_ctl(inst->encoder,
+                             OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
+  if (ret != OPUS_OK)
+    return ret;
+
+  return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
 }

 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
  if (inst) {
+    int ret = opus_encoder_ctl(inst->encoder,
+                               OPUS_SET_SIGNAL(OPUS_AUTO));
+    if (ret != OPUS_OK)
+      return ret;
    return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
  } else {
    return -1;
--- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
@@ -171,15 +171,49 @@ void OpusTest::TestDtxEffect(bool dtx) {
    }
  }

-  // DTX mode is maintained 400 ms.
-  for (int i = 0; i < 19; ++i) {
+  // When Opus is in DTX, it wakes up in a regular basis. It sends two packets,
+  // one with an arbitrary size and the other of 1-byte, then stops sending for
+  // 19 frames.
+  const int cycles = 5;
+  for (int j = 0; j < cycles; ++j) {
+    // DTX mode is maintained 19 frames.
+    for (int i = 0; i < 19; ++i) {
+      EXPECT_EQ(kOpus20msFrameSamples,
+                EncodeDecode(opus_encoder_, silence,
+                             kOpus20msFrameSamples, opus_decoder_,
+                             output_data_decode, &audio_type));
+      if (dtx) {
+        EXPECT_EQ(0, encoded_bytes_)  // Send 0 byte.
+            << "Opus should have entered DTX mode.";
+        EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
+        EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
+        EXPECT_EQ(2, audio_type);  // Comfort noise.
+      } else {
+        EXPECT_GT(encoded_bytes_, 1);
+        EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
+        EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
+        EXPECT_EQ(0, audio_type);  // Speech.
+      }
+    }
+
+    // Quit DTX after 19 frames.
+    EXPECT_EQ(kOpus20msFrameSamples,
+              EncodeDecode(opus_encoder_, silence,
+                           kOpus20msFrameSamples, opus_decoder_,
+                           output_data_decode, &audio_type));
+
+    EXPECT_GT(encoded_bytes_, 1);
+    EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
+    EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
+    EXPECT_EQ(0, audio_type);  // Speech.
+
+    // Enters DTX again immediately.
    EXPECT_EQ(kOpus20msFrameSamples,
              EncodeDecode(opus_encoder_, silence,
                           kOpus20msFrameSamples, opus_decoder_,
                           output_data_decode, &audio_type));
    if (dtx) {
-      EXPECT_EQ(0, encoded_bytes_)  // Send 0 byte.
-          << "Opus should have entered DTX mode.";
+      EXPECT_EQ(1, encoded_bytes_);  // Send 1 byte.
      EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
      EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
      EXPECT_EQ(2, audio_type);  // Comfort noise.
@@ -191,34 +225,6 @@ void OpusTest::TestDtxEffect(bool dtx) {
    }
  }

-  // Quit DTX after 400 ms
-  EXPECT_EQ(kOpus20msFrameSamples,
-            EncodeDecode(opus_encoder_, silence,
-                         kOpus20msFrameSamples, opus_decoder_,
-                         output_data_decode, &audio_type));
-
-  EXPECT_GT(encoded_bytes_, 1);
-  EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
-  EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
-  EXPECT_EQ(0, audio_type);  // Speech.
-
-  // Enters DTX again immediately.
-  EXPECT_EQ(kOpus20msFrameSamples,
-            EncodeDecode(opus_encoder_, silence,
-                         kOpus20msFrameSamples, opus_decoder_,
-                         output_data_decode, &audio_type));
-  if (dtx) {
-    EXPECT_EQ(1, encoded_bytes_);  // Send 1 byte.
-    EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
-    EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
-    EXPECT_EQ(2, audio_type);  // Comfort noise.
-  } else {
-    EXPECT_GT(encoded_bytes_, 1);
-    EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
-    EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
-    EXPECT_EQ(0, audio_type);  // Speech.
-  }
-
  silence[0] = 10000;
  if (dtx) {
    // Verify that encoder/decoder can jump out from DTX mode.
@@ -436,10 +442,6 @@ TEST_P(OpusTest, OpusDtxOff) {
 }

 TEST_P(OpusTest, OpusDtxOn) {
-  if (application_ == 1) {
-    // We do not check DTX under OPUS_APPLICATION_AUDIO mode.
-    return;
-  }
  TestDtxEffect(true);
 }

--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.cc
@@ -829,8 +829,7 @@ int AudioCodingModuleImpl::ConfigISACBandwidthEstimator(
 //      frame_size_ms, rate_bit_per_sec, enforce_frame_size);
 }

-int AudioCodingModuleImpl::SetOpusApplication(OpusApplicationMode application,
-                                              bool disable_dtx_if_needed) {
+int AudioCodingModuleImpl::SetOpusApplication(OpusApplicationMode application) {
  CriticalSectionScoped lock(acm_crit_sect_);
  if (!HaveValidEncoder("SetOpusApplication")) {
    return -1;
@@ -847,10 +846,7 @@ int AudioCodingModuleImpl::SetOpusApplication(OpusApplicationMode application,
      FATAL();
      return 0;
  }
-  return codec_manager_.CurrentSpeechEncoder()->SetApplication(
-             app, disable_dtx_if_needed)
-             ? 0
-             : -1;
+  return codec_manager_.CurrentSpeechEncoder()->SetApplication(app) ? 0 : -1;
 }

 // Informs Opus encoder of the maximum playback rate the receiver will render.
@@ -864,13 +860,12 @@ int AudioCodingModuleImpl::SetOpusMaxPlaybackRate(int frequency_hz) {
             : -1;
 }

-int AudioCodingModuleImpl::EnableOpusDtx(bool force_voip) {
+int AudioCodingModuleImpl::EnableOpusDtx() {
  CriticalSectionScoped lock(acm_crit_sect_);
  if (!HaveValidEncoder("EnableOpusDtx")) {
    return -1;
  }
-  return codec_manager_.CurrentSpeechEncoder()->SetDtx(true, force_voip) ? 0
-                                                                         : -1;
+  return codec_manager_.CurrentSpeechEncoder()->SetDtx(true) ? 0 : -1;
 }

 int AudioCodingModuleImpl::DisableOpusDtx() {
@@ -878,7 +873,7 @@ int AudioCodingModuleImpl::DisableOpusDtx() {
  if (!HaveValidEncoder("DisableOpusDtx")) {
    return -1;
  }
-  return codec_manager_.CurrentSpeechEncoder()->SetDtx(false, false) ? 0 : -1;
+  return codec_manager_.CurrentSpeechEncoder()->SetDtx(false) ? 0 : -1;
 }

 int AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) {
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.h
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.h
@@ -221,14 +221,13 @@ class AudioCodingModuleImpl : public AudioCodingModule {
                                   int rate_bit_per_sec,
                                   bool enforce_frame_size = false) override;

-  int SetOpusApplication(OpusApplicationMode application,
-                         bool disable_dtx_if_needed) override;
+  int SetOpusApplication(OpusApplicationMode application) override;

  // If current send codec is Opus, informs it about the maximum playback rate
  // the receiver will render.
  int SetOpusMaxPlaybackRate(int frequency_hz) override;

-  int EnableOpusDtx(bool force_voip) override;
+  int EnableOpusDtx() override;

  int DisableOpusDtx() override;

--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
@@ -1156,7 +1156,7 @@ TEST_F(AcmSenderBitExactnessOldApi, MAYBE_Opus_stereo_20ms) {
 TEST_F(AcmSenderBitExactnessOldApi, MAYBE_Opus_stereo_20ms_voip) {
  ASSERT_NO_FATAL_FAILURE(SetUpTest("opus", 48000, 2, 120, 960, 960));
  // If not set, default will be kAudio in case of stereo.
-  EXPECT_EQ(0, send_test_->acm()->SetOpusApplication(kVoip, false));
+  EXPECT_EQ(0, send_test_->acm()->SetOpusApplication(kVoip));
  Run(AcmReceiverBitExactnessOldApi::PlatformChecksum(
          "9b9e12bc3cc793740966e11cbfa8b35b",
          "57412a4b5771d19ff03ec35deffe7067",
--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@@ -867,26 +867,20 @@ class AudioCodingModule {
      bool enforce_frame_size = false) = 0;

  ///////////////////////////////////////////////////////////////////////////
-  // int SetOpusApplication(OpusApplicationMode application,
-  //                        bool disable_dtx_if_needed)
+  // int SetOpusApplication()
  // Sets the intended application if current send codec is Opus. Opus uses this
  // to optimize the encoding for applications like VOIP and music. Currently,
-  // two modes are supported: kVoip and kAudio. kAudio is only allowed when Opus
-  // DTX is switched off. If DTX is on, and |application| == kAudio, a failure
-  // will be triggered unless |disable_dtx_if_needed| == true, for which, the
-  // DTX will be forced off.
+  // two modes are supported: kVoip and kAudio.
  //
  // Input:
  //   - application            : intended application.
-  //   - disable_dtx_if_needed  : whether to force Opus DTX to stop.
  //
  // Return value:
  //   -1 if current send codec is not Opus or error occurred in setting the
  //      Opus application mode.
  //    0 if the Opus application mode is successfully set.
  //
-  virtual int SetOpusApplication(OpusApplicationMode application,
-                                 bool force_dtx) = 0;
+  virtual int SetOpusApplication(OpusApplicationMode application) = 0;

  ///////////////////////////////////////////////////////////////////////////
  // int SetOpusMaxPlaybackRate()
@@ -905,18 +899,15 @@ class AudioCodingModule {
  virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;

  ///////////////////////////////////////////////////////////////////////////
-  // EnableOpusDtx(bool force_voip)
-  // Enable the DTX, if current send codec is Opus. Currently, DTX can only be
-  // enabled when the application mode is kVoip. If |force_voip| == true,
-  // the application mode will be forced to kVoip. Otherwise, a failure will be
-  // triggered if current application mode is kAudio.
-  // Input:
-  //   - force_application    : whether to force application mode to kVoip.
+  // EnableOpusDtx()
+  // Enable the DTX, if current send codec is Opus.
+  //
  // Return value:
  //   -1 if current send codec is not Opus or error occurred in enabling the
  //      Opus DTX.
-  //    0 if Opus DTX is enabled successfully..
-  virtual int EnableOpusDtx(bool force_application) = 0;
+  //    0 if Opus DTX is enabled successfully.
+  //
+  virtual int EnableOpusDtx() = 0;

  ///////////////////////////////////////////////////////////////////////////
  // int DisableOpusDtx()
--- a/webrtc/modules/audio_coding/main/test/TestVADDTX.cc
+++ b/webrtc/modules/audio_coding/main/test/TestVADDTX.cc
@@ -239,7 +239,7 @@ void TestOpusDtx::Perform() {
 #ifdef WEBRTC_CODEC_ISAC
  // If we set other codec than Opus, DTX cannot be toggled.
  RegisterCodec(kIsacWb);
-  EXPECT_EQ(-1, acm_send_->EnableOpusDtx(false));
+  EXPECT_EQ(-1, acm_send_->EnableOpusDtx());
  EXPECT_EQ(-1, acm_send_->DisableOpusDtx());
 #endif

@@ -255,7 +255,7 @@ void TestOpusDtx::Perform() {
  Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
      32000, 1, out_filename, false, expects);

-  EXPECT_EQ(0, acm_send_->EnableOpusDtx(false));
+  EXPECT_EQ(0, acm_send_->EnableOpusDtx());
  expects[kFrameEmpty] = 1;
  Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
      32000, 1, out_filename, true, expects);
@@ -268,10 +268,7 @@ void TestOpusDtx::Perform() {
  Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"),
      32000, 2, out_filename, false, expects);

-  // Opus should be now in kAudio mode. Opus DTX should not be set without
-  // forcing kVoip mode.
-  EXPECT_EQ(-1, acm_send_->EnableOpusDtx(false));
-  EXPECT_EQ(0, acm_send_->EnableOpusDtx(true));
+  EXPECT_EQ(0, acm_send_->EnableOpusDtx());

  expects[kFrameEmpty] = 1;
  Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"),
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -1574,7 +1574,7 @@ int Channel::SetOpusMaxPlaybackRate(int frequency_hz) {
 int Channel::SetOpusDtx(bool enable_dtx) {
  WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, _channelId),
               "Channel::SetOpusDtx(%d)", enable_dtx);
-  int ret = enable_dtx ? audio_coding_->EnableOpusDtx(true)
+  int ret = enable_dtx ? audio_coding_->EnableOpusDtx()
                       : audio_coding_->DisableOpusDtx();
  if (ret != 0) {
    _engineStatisticsPtr->SetLastError(