Add keyboard channel support to AudioBuffer.

Also use local aliases for AudioBuffers for brevity.

BUG=2894
R=aluebs@webrtc.org, bjornv@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/13369005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5973 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2014-04-24 18:28:56 +00:00
parent d57b8149c2
commit 103657b484
4 changed files with 135 additions and 41 deletions

View File

@ -23,6 +23,35 @@ enum {
kSamplesPer32kHzChannel = 320 kSamplesPer32kHzChannel = 320
}; };
bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kStereo:
return false;
case AudioProcessing::kMonoAndKeyboard:
case AudioProcessing::kStereoAndKeyboard:
return true;
}
assert(false);
return false;
}
int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kStereo:
assert(false);
return -1;
case AudioProcessing::kMonoAndKeyboard:
return 1;
case AudioProcessing::kStereoAndKeyboard:
return 2;
}
assert(false);
return -1;
}
void StereoToMono(const float* left, const float* right, float* out, void StereoToMono(const float* left, const float* right, float* out,
int samples_per_channel) { int samples_per_channel) {
for (int i = 0; i < samples_per_channel; ++i) { for (int i = 0; i < samples_per_channel; ++i) {
@ -32,8 +61,9 @@ void StereoToMono(const float* left, const float* right, float* out,
void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
int samples_per_channel) { int samples_per_channel) {
for (int i = 0; i < samples_per_channel; i++) for (int i = 0; i < samples_per_channel; ++i) {
out[i] = (left[i] + right[i]) >> 1; out[i] = (left[i] + right[i]) >> 1;
}
} }
} // namespace } // namespace
@ -72,6 +102,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
activity_(AudioFrame::kVadUnknown), activity_(AudioFrame::kVadUnknown),
is_muted_(false), is_muted_(false),
data_(NULL), data_(NULL),
keyboard_data_(NULL),
channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_, channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
num_proc_channels_)) { num_proc_channels_)) {
assert(input_samples_per_channel_ > 0); assert(input_samples_per_channel_ > 0);
@ -118,6 +149,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
} }
} }
AudioBuffer::~AudioBuffer() {}
void AudioBuffer::CopyFrom(const float* const* data, void AudioBuffer::CopyFrom(const float* const* data,
int samples_per_channel, int samples_per_channel,
AudioProcessing::ChannelLayout layout) { AudioProcessing::ChannelLayout layout) {
@ -125,6 +158,10 @@ void AudioBuffer::CopyFrom(const float* const* data,
assert(ChannelsFromLayout(layout) == num_input_channels_); assert(ChannelsFromLayout(layout) == num_input_channels_);
InitForNewData(); InitForNewData();
if (HasKeyboardChannel(layout)) {
keyboard_data_ = data[KeyboardChannelIndex(layout)];
}
// Downmix. // Downmix.
const float* const* data_ptr = data; const float* const* data_ptr = data;
if (num_input_channels_ == 2 && num_proc_channels_ == 1) { if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
@ -180,10 +217,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
} }
} }
AudioBuffer::~AudioBuffer() {}
void AudioBuffer::InitForNewData() { void AudioBuffer::InitForNewData() {
data_ = NULL; data_ = NULL;
keyboard_data_ = NULL;
data_was_mixed_ = false; data_was_mixed_ = false;
num_mixed_channels_ = 0; num_mixed_channels_ = 0;
num_mixed_low_pass_channels_ = 0; num_mixed_low_pass_channels_ = 0;
@ -240,6 +276,10 @@ int16_t* AudioBuffer::low_pass_reference(int channel) const {
return low_pass_reference_channels_->channel(channel); return low_pass_reference_channels_->channel(channel);
} }
const float* AudioBuffer::keyboard_data() const {
return keyboard_data_;
}
SplitFilterStates* AudioBuffer::filter_states(int channel) const { SplitFilterStates* AudioBuffer::filter_states(int channel) const {
assert(channel >= 0 && channel < num_proc_channels_); assert(channel >= 0 && channel < num_proc_channels_);
return &filter_states_[channel]; return &filter_states_[channel];
@ -269,6 +309,11 @@ int AudioBuffer::samples_per_split_channel() const {
return samples_per_split_channel_; return samples_per_split_channel_;
} }
int AudioBuffer::samples_per_keyboard_channel() const {
// We don't resample the keyboard channel.
return input_samples_per_channel_;
}
// TODO(andrew): Do deinterleaving and mixing in one step? // TODO(andrew): Do deinterleaving and mixing in one step?
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(proc_samples_per_channel_ == input_samples_per_channel_); assert(proc_samples_per_channel_ == input_samples_per_channel_);

View File

@ -53,6 +53,7 @@ class AudioBuffer {
int num_channels() const; int num_channels() const;
int samples_per_channel() const; int samples_per_channel() const;
int samples_per_split_channel() const; int samples_per_split_channel() const;
int samples_per_keyboard_channel() const;
int16_t* data(int channel) const; int16_t* data(int channel) const;
int16_t* low_pass_split_data(int channel) const; int16_t* low_pass_split_data(int channel) const;
@ -60,6 +61,7 @@ class AudioBuffer {
int16_t* mixed_data(int channel) const; int16_t* mixed_data(int channel) const;
int16_t* mixed_low_pass_data(int channel) const; int16_t* mixed_low_pass_data(int channel) const;
int16_t* low_pass_reference(int channel) const; int16_t* low_pass_reference(int channel) const;
const float* keyboard_data() const;
SplitFilterStates* filter_states(int channel) const; SplitFilterStates* filter_states(int channel) const;
@ -106,6 +108,7 @@ class AudioBuffer {
bool is_muted_; bool is_muted_;
int16_t* data_; int16_t* data_;
const float* keyboard_data_;
scoped_ptr<ChannelBuffer<int16_t> > channels_; scoped_ptr<ChannelBuffer<int16_t> > channels_;
scoped_ptr<SplitChannelBuffer> split_channels_; scoped_ptr<SplitChannelBuffer> split_channels_;
scoped_ptr<SplitFilterStates[]> filter_states_; scoped_ptr<SplitFilterStates[]> filter_states_;

View File

@ -468,48 +468,46 @@ int AudioProcessingImpl::ProcessStreamLocked() {
} }
#endif #endif
AudioBuffer* ca = capture_audio_.get(); // For brevity.
bool data_processed = is_data_processed(); bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) { if (analysis_needed(data_processed)) {
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) { for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
SplitFilterStates* filter_states = capture_audio_->filter_states(i);
// Split into a low and high band. // Split into a low and high band.
WebRtcSpl_AnalysisQMF(capture_audio_->data(i), WebRtcSpl_AnalysisQMF(ca->data(i),
capture_audio_->samples_per_channel(), ca->samples_per_channel(),
capture_audio_->low_pass_split_data(i), ca->low_pass_split_data(i),
capture_audio_->high_pass_split_data(i), ca->high_pass_split_data(i),
filter_states->analysis_filter_state1, ca->filter_states(i)->analysis_filter_state1,
filter_states->analysis_filter_state2); ca->filter_states(i)->analysis_filter_state2);
} }
} }
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_.get())); RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_.get())); RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca));
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_.get())); RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca));
if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) {
capture_audio_->CopyLowPassToReference(); ca->CopyLowPassToReference();
} }
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_.get())); RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
RETURN_ON_ERR( RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
echo_control_mobile_->ProcessCaptureAudio(capture_audio_.get())); RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_.get())); RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_.get()));
if (synthesis_needed(data_processed)) { if (synthesis_needed(data_processed)) {
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) { for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
// Recombine low and high bands. // Recombine low and high bands.
SplitFilterStates* filter_states = capture_audio_->filter_states(i); WebRtcSpl_SynthesisQMF(ca->low_pass_split_data(i),
WebRtcSpl_SynthesisQMF(capture_audio_->low_pass_split_data(i), ca->high_pass_split_data(i),
capture_audio_->high_pass_split_data(i), ca->samples_per_split_channel(),
capture_audio_->samples_per_split_channel(), ca->data(i),
capture_audio_->data(i), ca->filter_states(i)->synthesis_filter_state1,
filter_states->synthesis_filter_state1, ca->filter_states(i)->synthesis_filter_state2);
filter_states->synthesis_filter_state2);
} }
} }
// The level estimator operates on the recombined data. // The level estimator operates on the recombined data.
RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_.get())); RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
was_stream_delay_set_ = false; was_stream_delay_set_ = false;
return kNoError; return kNoError;
@ -592,27 +590,23 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
return AnalyzeReverseStreamLocked(); return AnalyzeReverseStreamLocked();
} }
// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
// primary stream and convert ourselves rather than having the user manage it.
// We can be smarter and use the splitting filter when appropriate. Similarly,
// perform downmixing here.
int AudioProcessingImpl::AnalyzeReverseStreamLocked() { int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
AudioBuffer* ra = render_audio_.get(); // For brevity.
if (rev_proc_format_.rate() == kSampleRate32kHz) { if (rev_proc_format_.rate() == kSampleRate32kHz) {
for (int i = 0; i < rev_proc_format_.num_channels(); i++) { for (int i = 0; i < rev_proc_format_.num_channels(); i++) {
// Split into low and high band. // Split into low and high band.
SplitFilterStates* filter_states = render_audio_->filter_states(i); WebRtcSpl_AnalysisQMF(ra->data(i),
WebRtcSpl_AnalysisQMF(render_audio_->data(i), ra->samples_per_channel(),
render_audio_->samples_per_channel(), ra->low_pass_split_data(i),
render_audio_->low_pass_split_data(i), ra->high_pass_split_data(i),
render_audio_->high_pass_split_data(i), ra->filter_states(i)->analysis_filter_state1,
filter_states->analysis_filter_state1, ra->filter_states(i)->analysis_filter_state2);
filter_states->analysis_filter_state2);
} }
} }
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_.get())); RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_.get())); RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_.get())); RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
return kNoError; return kNoError;
} }

View File

@ -81,6 +81,21 @@ void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
ConvertToFloat(frame.data_, cb); ConvertToFloat(frame.data_, cb);
} }
// Number of channels including the keyboard channel.
int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
return 1;
case AudioProcessing::kMonoAndKeyboard:
case AudioProcessing::kStereo:
return 2;
case AudioProcessing::kStereoAndKeyboard:
return 3;
}
assert(false);
return -1;
}
int TruncateToMultipleOf10(int value) { int TruncateToMultipleOf10(int value) {
return (value / 10) * 10; return (value / 10) * 10;
} }
@ -1916,6 +1931,43 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) {
#endif // WEBRTC_AUDIOPROC_BIT_EXACT #endif // WEBRTC_AUDIOPROC_BIT_EXACT
TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
struct ChannelFormat {
AudioProcessing::ChannelLayout in_layout;
AudioProcessing::ChannelLayout out_layout;
};
ChannelFormat cf[] = {
{AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono},
{AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono},
{AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo},
};
size_t channel_format_size = sizeof(cf) / sizeof(*cf);
scoped_ptr<AudioProcessing> ap(AudioProcessing::Create());
// Enable one component just to ensure some processing takes place.
ap->noise_suppression()->Enable(true);
for (size_t i = 0; i < channel_format_size; ++i) {
const int in_rate = 44100;
const int out_rate = 48000;
ChannelBuffer<float> in_cb(SamplesFromRate(in_rate),
TotalChannelsFromLayout(cf[i].in_layout));
ChannelBuffer<float> out_cb(SamplesFromRate(out_rate),
ChannelsFromLayout(cf[i].out_layout));
// Run over a few chunks.
for (int j = 0; j < 10; ++j) {
EXPECT_NOERR(ap->ProcessStream(
in_cb.channels(),
in_cb.samples_per_channel(),
in_rate,
cf[i].in_layout,
out_rate,
cf[i].out_layout,
out_cb.channels()));
}
}
}
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed // Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
// stereo) file, converts to deinterleaved float (optionally downmixing) and // stereo) file, converts to deinterleaved float (optionally downmixing) and
// returns the result in |cb|. Returns false if the file ended (or on error) and // returns the result in |cb|. Returns false if the file ended (or on error) and