Merge r4326 from stable to trunk.

r4326 was mistakenly committed to stable, so this is to re-merge back to trunk. Add new interface to support multiple sources in webrtc. CaptureData() will be called by chrome with a flag |need_audio_processing| to indicate if the data needs to be processed by APM or not. Different from the old interface that will send the data to all voe channels, the new interface will specify a list of voe channels that the data is demultiplexing to. R=tommi@webrtc.org Review URL: https://webrtc-codereview.appspot.com/1919004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4449 4adac7df-926f-26a2-2b94-8c16560cd09d
2013-07-31 16:23:37 +00:00
parent 7126b38d8f
commit 2f84afad30
10 changed files with 238 additions and 1 deletions
--- a/webrtc/modules/audio_device/include/audio_device_defines.h
+++ b/webrtc/modules/audio_device/include/audio_device_defines.h
@@ -72,6 +72,32 @@ public:
                                     void* audioSamples,
                                     uint32_t& nSamplesOut) = 0;

+    // Method to pass captured data directly and unmixed to network channels.
+    // |channel_ids| contains a list of VoE channels which are the
+    // sinks to the capture data. |audio_delay_milliseconds| is the sum of
+    // recording delay and playout delay of the hardware. |current_volume| is
+    // in the range of [0, 255], representing the current microphone analog
+    // volume. |key_pressed| is used by the typing detection.
+    // |need_audio_processing| specify if the data needs to be processed by APM.
+    // Currently WebRtc supports only one APM, and Chrome will make sure only
+    // one stream goes through APM. When |need_audio_processing| is false, the
+    // values of |audio_delay_milliseconds|, |current_volume| and |key_pressed|
+    // will be ignored.
+    // The return value is the new microphone volume, in the range of |0, 255].
+    // When the volume does not need to be updated, it returns 0.
+    // TODO(xians): Make the interface pure virtual after libjingle has its
+    // implementation.
+    virtual int OnDataAvailable(int voe_channels[],
+                                int number_of_voe_channels,
+                                const int16_t* audio_data,
+                                int sample_rate,
+                                int number_of_channels,
+                                int number_of_frames,
+                                int audio_delay_milliseconds,
+                                int current_volume,
+                                bool key_pressed,
+                                bool need_audio_processing) { return 0; }
+
 protected:
    virtual ~AudioTransport() {}
 };
--- a/webrtc/modules/audio_device/test/audio_device_test_api.cc
+++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc
@@ -129,6 +129,19 @@ class AudioTransportAPI: public AudioTransport {
    return 0;
  }

+  virtual int OnDataAvailable(int voe_channels[],
+                              int number_of_voe_channels,
+                              const int16_t* audio_data,
+                              int sample_rate,
+                              int number_of_channels,
+                              int number_of_frames,
+                              int audio_delay_milliseconds,
+                              int current_volume,
+                              bool key_pressed,
+                              bool need_audio_processing) {
+    return 0;
+  }
+
 private:
  uint32_t rec_count_;
  uint32_t play_count_;
--- a/webrtc/modules/audio_device/test/func_test_manager.cc
+++ b/webrtc/modules/audio_device/test/func_test_manager.cc
@@ -557,6 +557,19 @@ int32_t AudioTransportImpl::NeedMorePlayData(
    return 0;
 }

+int AudioTransportImpl::OnDataAvailable(int voe_channels[],
+                                        int number_of_voe_channels,
+                                        const int16_t* audio_data,
+                                        int sample_rate,
+                                        int number_of_channels,
+                                        int number_of_frames,
+                                        int audio_delay_milliseconds,
+                                        int current_volume,
+                                        bool key_pressed,
+                                        bool need_audio_processing) {
+  return 0;
+}
+
 FuncTestManager::FuncTestManager() :
    _processThread(NULL),
    _audioDevice(NULL),
--- a/webrtc/modules/audio_device/test/func_test_manager.h
+++ b/webrtc/modules/audio_device/test/func_test_manager.h
@@ -111,6 +111,17 @@ public:
                                     void* audioSamples,
                                     uint32_t& nSamplesOut);

+    virtual int OnDataAvailable(int voe_channels[],
+                                int number_of_voe_channels,
+                                const int16_t* audio_data,
+                                int sample_rate,
+                                int number_of_channels,
+                                int number_of_frames,
+                                int audio_delay_milliseconds,
+                                int current_volume,
+                                bool key_pressed,
+                                bool need_audio_processing);
+
    AudioTransportImpl(AudioDeviceModule* audioDevice);
    ~AudioTransportImpl();

--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -4418,6 +4418,63 @@ Channel::Demultiplex(const AudioFrame& audioFrame)
    return 0;
 }

+// TODO(xians): This method borrows quite some code from
+// TransmitMixer::GenerateAudioFrame(), refactor these two methods and reduce
+// code duplication.
+void Channel::Demultiplex(const int16_t* audio_data,
+                          int number_of_frames,
+                          int number_of_channels,
+                          int sample_rate) {
+  // The highest sample rate that WebRTC supports for mono audio is 96kHz.
+  static const int kMaxNumberOfFrames = 960;
+  assert(number_of_frames <= kMaxNumberOfFrames);
+
+  // Get the send codec information for doing resampling or downmixing later on.
+  CodecInst codec;
+  GetSendCodec(codec);
+  assert(codec.channels == 1 || codec.channels == 2);
+  int support_sample_rate = std::min(32000,
+                                     std::min(sample_rate, codec.plfreq));
+
+  // Downmix the data to mono if needed.
+  const int16_t* audio_ptr = audio_data;
+  if (number_of_channels == 2 && codec.channels == 1) {
+    if (!mono_recording_audio_.get())
+      mono_recording_audio_.reset(new int16_t[kMaxNumberOfFrames]);
+
+    AudioFrameOperations::StereoToMono(audio_data, number_of_frames,
+                                       mono_recording_audio_.get());
+    audio_ptr = mono_recording_audio_.get();
+  }
+
+  // Resample the data to the sample rate that the codec is using.
+  if (input_resampler_.InitializeIfNeeded(sample_rate,
+                                          support_sample_rate,
+                                          codec.channels)) {
+    WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
+                 "Channel::Demultiplex() unable to resample");
+    return;
+  }
+
+  int out_length = input_resampler_.Resample(audio_ptr,
+                                             number_of_frames * codec.channels,
+                                             _audioFrame.data_,
+                                             AudioFrame::kMaxDataSizeSamples);
+  if (out_length == -1) {
+    WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
+                 "Channel::Demultiplex() resampling failed");
+    return;
+  }
+
+  _audioFrame.samples_per_channel_ = out_length / codec.channels;
+  _audioFrame.timestamp_ = -1;
+  _audioFrame.sample_rate_hz_ = support_sample_rate;
+  _audioFrame.speech_type_ = AudioFrame::kNormalSpeech;
+  _audioFrame.vad_activity_ = AudioFrame::kVadUnknown;
+  _audioFrame.num_channels_ = codec.channels;
+  _audioFrame.id_ = _channelId;
+}
+
 uint32_t
 Channel::PrepareEncodeAndSend(int mixingFrequency)
 {
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -11,7 +11,7 @@
 #ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H
 #define WEBRTC_VOICE_ENGINE_CHANNEL_H

-#include "webrtc/common_audio/resampler/include/resampler.h"
+#include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/common_types.h"
 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
 #include "webrtc/modules/audio_conference_mixer/interface/audio_conference_mixer_defines.h"
@@ -422,6 +422,13 @@ public:
        return _outputAudioLevel.Level();
    }
    uint32_t Demultiplex(const AudioFrame& audioFrame);
+    // Demultiplex the data to the channel's |_audioFrame|. The difference
+    // between this method and the overloaded method above is that |audio_data|
+    // does not go through transmit_mixer and APM.
+    void Demultiplex(const int16_t* audio_data,
+                     int number_of_frames,
+                     int number_of_channels,
+                     int sample_rate);
    uint32_t PrepareEncodeAndSend(int mixingFrequency);
    uint32_t EncodeAndSend();

@@ -454,6 +461,9 @@ private:
    AudioLevel _outputAudioLevel;
    bool _externalTransport;
    AudioFrame _audioFrame;
+    scoped_array<int16_t> mono_recording_audio_;
+    // Resampler is used when input data is stereo while codec is mono.
+    PushResampler input_resampler_;
    uint8_t _audioLevel_dBov;
    FilePlayer* _inputFilePlayerPtr;
    FilePlayer* _outputFilePlayerPtr;
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -443,6 +443,23 @@ TransmitMixer::DemuxAndMix()
    return 0;
 }

+void TransmitMixer::DemuxAndMix(int voe_channels[],
+                                int number_of_voe_channels) {
+  for (int i = 0; i < number_of_voe_channels; ++i) {
+    voe::ScopedChannel sc(*_channelManagerPtr, voe_channels[i]);
+    voe::Channel* channel_ptr = sc.ChannelPtr();
+    if (channel_ptr) {
+      if (channel_ptr->InputIsOnHold()) {
+        channel_ptr->UpdateLocalTimeStamp();
+      } else if (channel_ptr->Sending()) {
+        // Demultiplex makes a copy of its input.
+        channel_ptr->Demultiplex(_audioFrame);
+        channel_ptr->PrepareEncodeAndSend(_audioFrame.sample_rate_hz_);
+      }
+    }
+  }
+}
+
 int32_t
 TransmitMixer::EncodeAndSend()
 {
@@ -463,6 +480,16 @@ TransmitMixer::EncodeAndSend()
    return 0;
 }

+void TransmitMixer::EncodeAndSend(int voe_channels[],
+                                  int number_of_voe_channels) {
+  for (int i = 0; i < number_of_voe_channels; ++i) {
+    voe::ScopedChannel sc(*_channelManagerPtr, voe_channels[i]);
+    voe::Channel* channel_ptr = sc.ChannelPtr();
+    if (channel_ptr && channel_ptr->Sending() && !channel_ptr->InputIsOnHold())
+      channel_ptr->EncodeAndSend();
+  }
+}
+
 uint32_t TransmitMixer::CaptureLevel() const
 {
    CriticalSectionScoped cs(&_critSect);
--- a/webrtc/voice_engine/transmit_mixer.h
+++ b/webrtc/voice_engine/transmit_mixer.h
@@ -61,8 +61,14 @@ public:


    int32_t DemuxAndMix();
+    // Used by the Chrome to pass the recording data to the specific VoE
+    // channels for demux.
+    void DemuxAndMix(int voe_channels[], int number_of_voe_channels);

    int32_t EncodeAndSend();
+    // Used by the Chrome to pass the recording data to the specific VoE
+    // channels for encoding and sending to the network.
+    void EncodeAndSend(int voe_channels[], int number_of_voe_channels);

    uint32_t CaptureLevel() const;

--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -274,6 +274,69 @@ int32_t VoEBaseImpl::NeedMorePlayData(
    return 0;
 }

+int VoEBaseImpl::OnDataAvailable(int voe_channels[],
+                                 int number_of_voe_channels,
+                                 const int16_t* audio_data,
+                                 int sample_rate,
+                                 int number_of_channels,
+                                 int number_of_frames,
+                                 int audio_delay_milliseconds,
+                                 int current_volume,
+                                 bool key_pressed,
+                                 bool need_audio_processing) {
+  WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1),
+               "VoEBaseImpl::OnDataAvailable(number_of_voe_channels=%d, "
+               "sample_rate=%d, number_of_channels=%d, number_of_frames=%d, "
+               "audio_delay_milliseconds=%d, current_volume=%d, "
+               "key_pressed=%d, need_audio_processing=%d)",
+               number_of_voe_channels, sample_rate, number_of_channels,
+               number_of_frames, audio_delay_milliseconds, current_volume,
+               key_pressed, need_audio_processing);
+
+  if (need_audio_processing) {
+    // Perform channel-independent operations
+    // (APM, mix with file, record to file, mute, etc.)
+    _shared->transmit_mixer()->PrepareDemux(
+        audio_data, number_of_frames, number_of_channels,
+        sample_rate, static_cast<uint16_t>(audio_delay_milliseconds), 0,
+        current_volume, key_pressed);
+    _shared->transmit_mixer()->DemuxAndMix(voe_channels,
+                                           number_of_voe_channels);
+    _shared->transmit_mixer()->EncodeAndSend(voe_channels,
+                                             number_of_voe_channels);
+    // Update the volume if the analog AGC is working.
+    if (_shared->audio_processing() &&
+        _shared->audio_processing()->gain_control()->mode() ==
+            GainControl::kAdaptiveAnalog) {
+      return _shared->transmit_mixer()->CaptureLevel();
+    }
+
+    // Return 0 to indicate no need to change the volume.
+    return 0;
+  }
+
+  // No need to go through the APM, demultiplex the data to each VoE channel,
+  // encode and send to the network.
+  for (int i = 0; i < number_of_voe_channels; ++i) {
+    voe::ScopedChannel sc(_shared->channel_manager(), voe_channels[i]);
+    voe::Channel* channel_ptr = sc.ChannelPtr();
+    if (!channel_ptr)
+      continue;
+
+    if (channel_ptr->InputIsOnHold()) {
+      channel_ptr->UpdateLocalTimeStamp();
+    } else if (channel_ptr->Sending()) {
+      channel_ptr->Demultiplex(audio_data, sample_rate, number_of_frames,
+                               number_of_channels);
+      channel_ptr->PrepareEncodeAndSend(sample_rate);
+      channel_ptr->EncodeAndSend();
+    }
+  }
+
+  // Return 0 to indicate no need to change the volume.
+  return 0;
+}
+
 int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
 {
    WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
--- a/webrtc/voice_engine/voe_base_impl.h
+++ b/webrtc/voice_engine/voe_base_impl.h
@@ -90,6 +90,17 @@ public:
                                     void* audioSamples,
                                     uint32_t& nSamplesOut);

+    virtual int OnDataAvailable(int voe_channels[],
+                                int number_of_voe_channels,
+                                const int16_t* audio_data,
+                                int sample_rate,
+                                int number_of_channels,
+                                int number_of_frames,
+                                int audio_delay_milliseconds,
+                                int current_volume,
+                                bool key_pressed,
+                                bool need_audio_processing);
+
    // AudioDeviceObserver
    virtual void OnErrorIsReported(ErrorCode error);
    virtual void OnWarningIsReported(WarningCode warning);