webrtc/modules/audio_processing/main/source/audio_processing_impl.cc

/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "audio_processing_impl.h"

#include <cassert>

#include "module_common_types.h"

#include "critical_section_wrapper.h"
#include "file_wrapper.h"

#include "audio_buffer.h"
#include "echo_cancellation_impl.h"
#include "echo_control_mobile_impl.h"
#include "high_pass_filter_impl.h"
#include "gain_control_impl.h"
#include "level_estimator_impl.h"
#include "noise_suppression_impl.h"
#include "processing_component.h"
#include "splitting_filter.h"
#include "voice_detection_impl.h"

namespace webrtc {
namespace {

enum Events {
  kInitializeEvent,
  kRenderEvent,
  kCaptureEvent
};

const char kMagicNumber[] = "#!vqetrace1.2";
}  // namespace

AudioProcessing* AudioProcessing::Create(int id) {
  /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
             webrtc::kTraceVqe,
             id,
             "AudioProcessing::Create()");*/

  AudioProcessingImpl* apm = new AudioProcessingImpl(id);
  if (apm->Initialize() != kNoError) {
    delete apm;
    apm = NULL;
  }

  return apm;
}

void AudioProcessing::Destroy(AudioProcessing* apm) {
  delete static_cast<AudioProcessingImpl*>(apm);
}

AudioProcessingImpl::AudioProcessingImpl(int id)
    : id_(id),
      echo_cancellation_(NULL),
      echo_control_mobile_(NULL),
      gain_control_(NULL),
      high_pass_filter_(NULL),
      level_estimator_(NULL),
      noise_suppression_(NULL),
      voice_detection_(NULL),
      debug_file_(FileWrapper::Create()),
      crit_(CriticalSectionWrapper::CreateCriticalSection()),
      render_audio_(NULL),
      capture_audio_(NULL),
      sample_rate_hz_(kSampleRate16kHz),
      split_sample_rate_hz_(kSampleRate16kHz),
      samples_per_channel_(sample_rate_hz_ / 100),
      stream_delay_ms_(0),
      was_stream_delay_set_(false),
      num_render_input_channels_(1),
      num_capture_input_channels_(1),
      num_capture_output_channels_(1) {

  echo_cancellation_ = new EchoCancellationImpl(this);
  component_list_.push_back(echo_cancellation_);

  echo_control_mobile_ = new EchoControlMobileImpl(this);
  component_list_.push_back(echo_control_mobile_);

  gain_control_ = new GainControlImpl(this);
  component_list_.push_back(gain_control_);

  high_pass_filter_ = new HighPassFilterImpl(this);
  component_list_.push_back(high_pass_filter_);

  level_estimator_ = new LevelEstimatorImpl(this);
  component_list_.push_back(level_estimator_);

  noise_suppression_ = new NoiseSuppressionImpl(this);
  component_list_.push_back(noise_suppression_);

  voice_detection_ = new VoiceDetectionImpl(this);
  component_list_.push_back(voice_detection_);
}

AudioProcessingImpl::~AudioProcessingImpl() {
  while (!component_list_.empty()) {
    ProcessingComponent* component = component_list_.front();
    component->Destroy();
    delete component;
    component_list_.pop_front();
  }

  if (debug_file_->Open()) {
    debug_file_->CloseFile();
  }
  delete debug_file_;
  debug_file_ = NULL;

  delete crit_;
  crit_ = NULL;

  if (render_audio_ != NULL) {
    delete render_audio_;
    render_audio_ = NULL;
  }

  if (capture_audio_ != NULL) {
    delete capture_audio_;
    capture_audio_ = NULL;
  }
}

CriticalSectionWrapper* AudioProcessingImpl::crit() const {
  return crit_;
}

int AudioProcessingImpl::split_sample_rate_hz() const {
  return split_sample_rate_hz_;
}

int AudioProcessingImpl::Initialize() {
  CriticalSectionScoped crit_scoped(*crit_);
  return InitializeLocked();
}

int AudioProcessingImpl::InitializeLocked() {
  if (render_audio_ != NULL) {
    delete render_audio_;
    render_audio_ = NULL;
  }

  if (capture_audio_ != NULL) {
    delete capture_audio_;
    capture_audio_ = NULL;
  }

  render_audio_ = new AudioBuffer(num_render_input_channels_,
                                  samples_per_channel_);
  capture_audio_ = new AudioBuffer(num_capture_input_channels_,
                                   samples_per_channel_);

  was_stream_delay_set_ = false;

  // Initialize all components.
  std::list<ProcessingComponent*>::iterator it;
  for (it = component_list_.begin(); it != component_list_.end(); it++) {
    int err = (*it)->Initialize();
    if (err != kNoError) {
      return err;
    }
  }

  return kNoError;
}

int AudioProcessingImpl::set_sample_rate_hz(int rate) {
  CriticalSectionScoped crit_scoped(*crit_);
  if (rate != kSampleRate8kHz &&
      rate != kSampleRate16kHz &&
      rate != kSampleRate32kHz) {
    return kBadParameterError;
  }

  sample_rate_hz_ = rate;
  samples_per_channel_ = rate / 100;

  if (sample_rate_hz_ == kSampleRate32kHz) {
    split_sample_rate_hz_ = kSampleRate16kHz;
  } else {
    split_sample_rate_hz_ = sample_rate_hz_;
  }

  return InitializeLocked();
}

int AudioProcessingImpl::sample_rate_hz() const {
  return sample_rate_hz_;
}

int AudioProcessingImpl::set_num_reverse_channels(int channels) {
  CriticalSectionScoped crit_scoped(*crit_);
  // Only stereo supported currently.
  if (channels > 2 || channels < 1) {
    return kBadParameterError;
  }

  num_render_input_channels_ = channels;

  return InitializeLocked();
}

int AudioProcessingImpl::num_reverse_channels() const {
  return num_render_input_channels_;
}

int AudioProcessingImpl::set_num_channels(
    int input_channels,
    int output_channels) {
  CriticalSectionScoped crit_scoped(*crit_);
  if (output_channels > input_channels) {
    return kBadParameterError;
  }

  // Only stereo supported currently.
  if (input_channels > 2 || input_channels < 1) {
    return kBadParameterError;
  }

  if (output_channels > 2 || output_channels < 1) {
    return kBadParameterError;
  }

  num_capture_input_channels_ = input_channels;
  num_capture_output_channels_ = output_channels;

  return InitializeLocked();
}

int AudioProcessingImpl::num_input_channels() const {
  return num_capture_input_channels_;
}

int AudioProcessingImpl::num_output_channels() const {
  return num_capture_output_channels_;
}

int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
  CriticalSectionScoped crit_scoped(*crit_);
  int err = kNoError;

  if (frame == NULL) {
    return kNullPointerError;
  }

  if (frame->_frequencyInHz !=
      static_cast<WebRtc_UWord32>(sample_rate_hz_)) {
    return kBadSampleRateError;
  }

  if (frame->_audioChannel != num_capture_input_channels_) {
    return kBadNumberChannelsError;
  }

  if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
    return kBadDataLengthError;
  }

  if (debug_file_->Open()) {
    WebRtc_UWord8 event = kCaptureEvent;
    if (!debug_file_->Write(&event, sizeof(event))) {
      return kFileError;
    }

    if (!debug_file_->Write(&frame->_frequencyInHz,
                                   sizeof(frame->_frequencyInHz))) {
      return kFileError;
    }

    if (!debug_file_->Write(&frame->_audioChannel,
                                   sizeof(frame->_audioChannel))) {
      return kFileError;
    }

    if (!debug_file_->Write(&frame->_payloadDataLengthInSamples,
        sizeof(frame->_payloadDataLengthInSamples))) {
      return kFileError;
    }

    if (!debug_file_->Write(frame->_payloadData,
        sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples *
        frame->_audioChannel)) {
      return kFileError;
    }
  }

  capture_audio_->DeinterleaveFrom(frame);

  // TODO(ajm): experiment with mixing and AEC placement.
  if (num_capture_output_channels_ < num_capture_input_channels_) {
    capture_audio_->Mix(num_capture_output_channels_);

    frame->_audioChannel = num_capture_output_channels_;
  }

  if (sample_rate_hz_ == kSampleRate32kHz) {
    for (int i = 0; i < num_capture_input_channels_; i++) {
      // Split into a low and high band.
      SplittingFilterAnalysis(capture_audio_->data(i),
                              capture_audio_->low_pass_split_data(i),
                              capture_audio_->high_pass_split_data(i),
                              capture_audio_->analysis_filter_state1(i),
                              capture_audio_->analysis_filter_state2(i));
    }
  }

  err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  if (echo_control_mobile_->is_enabled() &&
      noise_suppression_->is_enabled()) {
    capture_audio_->CopyLowPassToReference();
  }

  err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  err = voice_detection_->ProcessCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  err = gain_control_->ProcessCaptureAudio(capture_audio_);
  if (err != kNoError) {
    return err;
  }

  //err = level_estimator_->ProcessCaptureAudio(capture_audio_);
  //if (err != kNoError) {
  //  return err;
  //}

  if (sample_rate_hz_ == kSampleRate32kHz) {
    for (int i = 0; i < num_capture_output_channels_; i++) {
      // Recombine low and high bands.
      SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
                               capture_audio_->high_pass_split_data(i),
                               capture_audio_->data(i),
                               capture_audio_->synthesis_filter_state1(i),
                               capture_audio_->synthesis_filter_state2(i));
    }
  }

  capture_audio_->InterleaveTo(frame);

  return kNoError;
}

int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
  CriticalSectionScoped crit_scoped(*crit_);
  int err = kNoError;

  if (frame == NULL) {
    return kNullPointerError;
  }

  if (frame->_frequencyInHz !=
      static_cast<WebRtc_UWord32>(sample_rate_hz_)) {
    return kBadSampleRateError;
  }

  if (frame->_audioChannel != num_render_input_channels_) {
    return kBadNumberChannelsError;
  }

  if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
    return kBadDataLengthError;
  }

  if (debug_file_->Open()) {
    WebRtc_UWord8 event = kRenderEvent;
    if (!debug_file_->Write(&event, sizeof(event))) {
      return kFileError;
    }

    if (!debug_file_->Write(&frame->_frequencyInHz,
                                   sizeof(frame->_frequencyInHz))) {
      return kFileError;
    }

    if (!debug_file_->Write(&frame->_audioChannel,
                                   sizeof(frame->_audioChannel))) {
      return kFileError;
    }

    if (!debug_file_->Write(&frame->_payloadDataLengthInSamples,
        sizeof(frame->_payloadDataLengthInSamples))) {
      return kFileError;
    }

    if (!debug_file_->Write(frame->_payloadData,
        sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples *
        frame->_audioChannel)) {
      return kFileError;
    }
  }

  render_audio_->DeinterleaveFrom(frame);

  // TODO(ajm): turn the splitting filter into a component?
  if (sample_rate_hz_ == kSampleRate32kHz) {
    for (int i = 0; i < num_render_input_channels_; i++) {
      // Split into low and high band.
      SplittingFilterAnalysis(render_audio_->data(i),
                              render_audio_->low_pass_split_data(i),
                              render_audio_->high_pass_split_data(i),
                              render_audio_->analysis_filter_state1(i),
                              render_audio_->analysis_filter_state2(i));
    }
  }

  // TODO(ajm): warnings possible from components?
  err = echo_cancellation_->ProcessRenderAudio(render_audio_);
  if (err != kNoError) {
    return err;
  }

  err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
  if (err != kNoError) {
    return err;
  }

  err = gain_control_->ProcessRenderAudio(render_audio_);
  if (err != kNoError) {
    return err;
  }

  //err = level_estimator_->AnalyzeReverseStream(render_audio_);
  //if (err != kNoError) {
  //  return err;
  //}

  was_stream_delay_set_ = false;
  return err;  // TODO(ajm): this is for returning warnings; necessary?
}

int AudioProcessingImpl::set_stream_delay_ms(int delay) {
  was_stream_delay_set_ = true;
  if (delay < 0) {
    return kBadParameterError;
  }

  // TODO(ajm): the max is rather arbitrarily chosen; investigate.
  if (delay > 500) {
    stream_delay_ms_ = 500;
    return kBadStreamParameterWarning;
  }

  stream_delay_ms_ = delay;
  return kNoError;
}

int AudioProcessingImpl::stream_delay_ms() const {
  return stream_delay_ms_;
}

bool AudioProcessingImpl::was_stream_delay_set() const {
  return was_stream_delay_set_;
}

int AudioProcessingImpl::StartDebugRecording(
    const char filename[AudioProcessing::kMaxFilenameSize]) {
  CriticalSectionScoped crit_scoped(*crit_);
  assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);

  if (filename == NULL) {
    return kNullPointerError;
  }

  // Stop any ongoing recording.
  if (debug_file_->Open()) {
    if (debug_file_->CloseFile() == -1) {
      return kFileError;
    }
  }

  if (debug_file_->OpenFile(filename, false) == -1) {
    debug_file_->CloseFile();
    return kFileError;
  }

  if (debug_file_->WriteText("%s\n", kMagicNumber) == -1) {
    debug_file_->CloseFile();
    return kFileError;
  }

  // TODO(ajm): should we do this? If so, we need the number of channels etc.
  // Record the default sample rate.
  WebRtc_UWord8 event = kInitializeEvent;
  if (!debug_file_->Write(&event, sizeof(event))) {
    return kFileError;
  }

  if (!debug_file_->Write(&sample_rate_hz_, sizeof(sample_rate_hz_))) {
    return kFileError;
  }

  return kNoError;
}

int AudioProcessingImpl::StopDebugRecording() {
  CriticalSectionScoped crit_scoped(*crit_);
  // We just return if recording hasn't started.
  if (debug_file_->Open()) {
    if (debug_file_->CloseFile() == -1) {
      return kFileError;
    }
  }

  return kNoError;
}

EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
  return echo_cancellation_;
}

EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
  return echo_control_mobile_;
}

GainControl* AudioProcessingImpl::gain_control() const {
  return gain_control_;
}

HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
  return high_pass_filter_;
}

LevelEstimator* AudioProcessingImpl::level_estimator() const {
  return level_estimator_;
}

NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
  return noise_suppression_;
}

VoiceDetection* AudioProcessingImpl::voice_detection() const {
  return voice_detection_;
}

WebRtc_Word32 AudioProcessingImpl::Version(WebRtc_Word8* version,
    WebRtc_UWord32& bytes_remaining, WebRtc_UWord32& position) const {
  if (version == NULL) {
    /*WEBRTC_TRACE(webrtc::kTraceError,
               webrtc::kTraceVqe,
               -1,
               "Null version pointer");*/
    return kNullPointerError;
  }
  memset(&version[position], 0, bytes_remaining);

  WebRtc_Word8 my_version[] = "AudioProcessing 1.0.0";
  // Includes null termination.
  WebRtc_UWord32 length = static_cast<WebRtc_UWord32>(strlen(my_version));
  if (bytes_remaining < length) {
    /*WEBRTC_TRACE(webrtc::kTraceError,
               webrtc::kTraceVqe,
               -1,
               "Buffer of insufficient length");*/
    return kBadParameterError;
  }
  memcpy(&version[position], my_version, length);
  bytes_remaining -= length;
  position += length;

  std::list<ProcessingComponent*>::const_iterator it;
  for (it = component_list_.begin(); it != component_list_.end(); it++) {
    char component_version[256];
    strcpy(component_version, "\n");
    int err = (*it)->get_version(&component_version[1],
                                 sizeof(component_version) - 1);
    if (err != kNoError) {
      return err;
    }
    if (strncmp(&component_version[1], "\0", 1) == 0) {
      // Assume empty if first byte is NULL.
      continue;
    }

    length = static_cast<WebRtc_UWord32>(strlen(component_version));
    if (bytes_remaining < length) {
      /*WEBRTC_TRACE(webrtc::kTraceError,
                 webrtc::kTraceVqe,
                 -1,
                 "Buffer of insufficient length");*/
      return kBadParameterError;
    }
    memcpy(&version[position], component_version, length);
    bytes_remaining -= length;
    position += length;
  }

  return kNoError;
}

WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
  CriticalSectionScoped crit_scoped(*crit_);
  /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
             webrtc::kTraceVqe,
             id_,
             "ChangeUniqueId(new id = %d)",
             id);*/
  id_ = id;

  return kNoError;
}
}  // namespace webrtc