Add keyboard channel support to AudioBuffer.
Also use local aliases for AudioBuffers for brevity. BUG=2894 R=aluebs@webrtc.org, bjornv@webrtc.org Review URL: https://webrtc-codereview.appspot.com/13369005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5973 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
d57b8149c2
commit
103657b484
@ -23,6 +23,35 @@ enum {
|
|||||||
kSamplesPer32kHzChannel = 320
|
kSamplesPer32kHzChannel = 320
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
|
||||||
|
switch (layout) {
|
||||||
|
case AudioProcessing::kMono:
|
||||||
|
case AudioProcessing::kStereo:
|
||||||
|
return false;
|
||||||
|
case AudioProcessing::kMonoAndKeyboard:
|
||||||
|
case AudioProcessing::kStereoAndKeyboard:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
assert(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
|
||||||
|
switch (layout) {
|
||||||
|
case AudioProcessing::kMono:
|
||||||
|
case AudioProcessing::kStereo:
|
||||||
|
assert(false);
|
||||||
|
return -1;
|
||||||
|
case AudioProcessing::kMonoAndKeyboard:
|
||||||
|
return 1;
|
||||||
|
case AudioProcessing::kStereoAndKeyboard:
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
assert(false);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void StereoToMono(const float* left, const float* right, float* out,
|
void StereoToMono(const float* left, const float* right, float* out,
|
||||||
int samples_per_channel) {
|
int samples_per_channel) {
|
||||||
for (int i = 0; i < samples_per_channel; ++i) {
|
for (int i = 0; i < samples_per_channel; ++i) {
|
||||||
@ -32,8 +61,9 @@ void StereoToMono(const float* left, const float* right, float* out,
|
|||||||
|
|
||||||
void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
|
void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
|
||||||
int samples_per_channel) {
|
int samples_per_channel) {
|
||||||
for (int i = 0; i < samples_per_channel; i++)
|
for (int i = 0; i < samples_per_channel; ++i) {
|
||||||
out[i] = (left[i] + right[i]) >> 1;
|
out[i] = (left[i] + right[i]) >> 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -72,6 +102,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
|
|||||||
activity_(AudioFrame::kVadUnknown),
|
activity_(AudioFrame::kVadUnknown),
|
||||||
is_muted_(false),
|
is_muted_(false),
|
||||||
data_(NULL),
|
data_(NULL),
|
||||||
|
keyboard_data_(NULL),
|
||||||
channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
|
channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
|
||||||
num_proc_channels_)) {
|
num_proc_channels_)) {
|
||||||
assert(input_samples_per_channel_ > 0);
|
assert(input_samples_per_channel_ > 0);
|
||||||
@ -118,6 +149,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AudioBuffer::~AudioBuffer() {}
|
||||||
|
|
||||||
void AudioBuffer::CopyFrom(const float* const* data,
|
void AudioBuffer::CopyFrom(const float* const* data,
|
||||||
int samples_per_channel,
|
int samples_per_channel,
|
||||||
AudioProcessing::ChannelLayout layout) {
|
AudioProcessing::ChannelLayout layout) {
|
||||||
@ -125,6 +158,10 @@ void AudioBuffer::CopyFrom(const float* const* data,
|
|||||||
assert(ChannelsFromLayout(layout) == num_input_channels_);
|
assert(ChannelsFromLayout(layout) == num_input_channels_);
|
||||||
InitForNewData();
|
InitForNewData();
|
||||||
|
|
||||||
|
if (HasKeyboardChannel(layout)) {
|
||||||
|
keyboard_data_ = data[KeyboardChannelIndex(layout)];
|
||||||
|
}
|
||||||
|
|
||||||
// Downmix.
|
// Downmix.
|
||||||
const float* const* data_ptr = data;
|
const float* const* data_ptr = data;
|
||||||
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
|
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
|
||||||
@ -180,10 +217,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AudioBuffer::~AudioBuffer() {}
|
|
||||||
|
|
||||||
void AudioBuffer::InitForNewData() {
|
void AudioBuffer::InitForNewData() {
|
||||||
data_ = NULL;
|
data_ = NULL;
|
||||||
|
keyboard_data_ = NULL;
|
||||||
data_was_mixed_ = false;
|
data_was_mixed_ = false;
|
||||||
num_mixed_channels_ = 0;
|
num_mixed_channels_ = 0;
|
||||||
num_mixed_low_pass_channels_ = 0;
|
num_mixed_low_pass_channels_ = 0;
|
||||||
@ -240,6 +276,10 @@ int16_t* AudioBuffer::low_pass_reference(int channel) const {
|
|||||||
return low_pass_reference_channels_->channel(channel);
|
return low_pass_reference_channels_->channel(channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const float* AudioBuffer::keyboard_data() const {
|
||||||
|
return keyboard_data_;
|
||||||
|
}
|
||||||
|
|
||||||
SplitFilterStates* AudioBuffer::filter_states(int channel) const {
|
SplitFilterStates* AudioBuffer::filter_states(int channel) const {
|
||||||
assert(channel >= 0 && channel < num_proc_channels_);
|
assert(channel >= 0 && channel < num_proc_channels_);
|
||||||
return &filter_states_[channel];
|
return &filter_states_[channel];
|
||||||
@ -269,6 +309,11 @@ int AudioBuffer::samples_per_split_channel() const {
|
|||||||
return samples_per_split_channel_;
|
return samples_per_split_channel_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int AudioBuffer::samples_per_keyboard_channel() const {
|
||||||
|
// We don't resample the keyboard channel.
|
||||||
|
return input_samples_per_channel_;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(andrew): Do deinterleaving and mixing in one step?
|
// TODO(andrew): Do deinterleaving and mixing in one step?
|
||||||
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
|
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
|
||||||
assert(proc_samples_per_channel_ == input_samples_per_channel_);
|
assert(proc_samples_per_channel_ == input_samples_per_channel_);
|
||||||
|
@ -53,6 +53,7 @@ class AudioBuffer {
|
|||||||
int num_channels() const;
|
int num_channels() const;
|
||||||
int samples_per_channel() const;
|
int samples_per_channel() const;
|
||||||
int samples_per_split_channel() const;
|
int samples_per_split_channel() const;
|
||||||
|
int samples_per_keyboard_channel() const;
|
||||||
|
|
||||||
int16_t* data(int channel) const;
|
int16_t* data(int channel) const;
|
||||||
int16_t* low_pass_split_data(int channel) const;
|
int16_t* low_pass_split_data(int channel) const;
|
||||||
@ -60,6 +61,7 @@ class AudioBuffer {
|
|||||||
int16_t* mixed_data(int channel) const;
|
int16_t* mixed_data(int channel) const;
|
||||||
int16_t* mixed_low_pass_data(int channel) const;
|
int16_t* mixed_low_pass_data(int channel) const;
|
||||||
int16_t* low_pass_reference(int channel) const;
|
int16_t* low_pass_reference(int channel) const;
|
||||||
|
const float* keyboard_data() const;
|
||||||
|
|
||||||
SplitFilterStates* filter_states(int channel) const;
|
SplitFilterStates* filter_states(int channel) const;
|
||||||
|
|
||||||
@ -106,6 +108,7 @@ class AudioBuffer {
|
|||||||
bool is_muted_;
|
bool is_muted_;
|
||||||
|
|
||||||
int16_t* data_;
|
int16_t* data_;
|
||||||
|
const float* keyboard_data_;
|
||||||
scoped_ptr<ChannelBuffer<int16_t> > channels_;
|
scoped_ptr<ChannelBuffer<int16_t> > channels_;
|
||||||
scoped_ptr<SplitChannelBuffer> split_channels_;
|
scoped_ptr<SplitChannelBuffer> split_channels_;
|
||||||
scoped_ptr<SplitFilterStates[]> filter_states_;
|
scoped_ptr<SplitFilterStates[]> filter_states_;
|
||||||
|
@ -468,48 +468,46 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
AudioBuffer* ca = capture_audio_.get(); // For brevity.
|
||||||
bool data_processed = is_data_processed();
|
bool data_processed = is_data_processed();
|
||||||
if (analysis_needed(data_processed)) {
|
if (analysis_needed(data_processed)) {
|
||||||
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
|
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
|
||||||
SplitFilterStates* filter_states = capture_audio_->filter_states(i);
|
|
||||||
// Split into a low and high band.
|
// Split into a low and high band.
|
||||||
WebRtcSpl_AnalysisQMF(capture_audio_->data(i),
|
WebRtcSpl_AnalysisQMF(ca->data(i),
|
||||||
capture_audio_->samples_per_channel(),
|
ca->samples_per_channel(),
|
||||||
capture_audio_->low_pass_split_data(i),
|
ca->low_pass_split_data(i),
|
||||||
capture_audio_->high_pass_split_data(i),
|
ca->high_pass_split_data(i),
|
||||||
filter_states->analysis_filter_state1,
|
ca->filter_states(i)->analysis_filter_state1,
|
||||||
filter_states->analysis_filter_state2);
|
ca->filter_states(i)->analysis_filter_state2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_.get()));
|
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca));
|
||||||
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_.get()));
|
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca));
|
||||||
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_.get()));
|
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca));
|
||||||
|
|
||||||
if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) {
|
if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) {
|
||||||
capture_audio_->CopyLowPassToReference();
|
ca->CopyLowPassToReference();
|
||||||
}
|
}
|
||||||
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_.get()));
|
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
|
||||||
RETURN_ON_ERR(
|
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
|
||||||
echo_control_mobile_->ProcessCaptureAudio(capture_audio_.get()));
|
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
|
||||||
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_.get()));
|
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
|
||||||
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_.get()));
|
|
||||||
|
|
||||||
if (synthesis_needed(data_processed)) {
|
if (synthesis_needed(data_processed)) {
|
||||||
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
|
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
|
||||||
// Recombine low and high bands.
|
// Recombine low and high bands.
|
||||||
SplitFilterStates* filter_states = capture_audio_->filter_states(i);
|
WebRtcSpl_SynthesisQMF(ca->low_pass_split_data(i),
|
||||||
WebRtcSpl_SynthesisQMF(capture_audio_->low_pass_split_data(i),
|
ca->high_pass_split_data(i),
|
||||||
capture_audio_->high_pass_split_data(i),
|
ca->samples_per_split_channel(),
|
||||||
capture_audio_->samples_per_split_channel(),
|
ca->data(i),
|
||||||
capture_audio_->data(i),
|
ca->filter_states(i)->synthesis_filter_state1,
|
||||||
filter_states->synthesis_filter_state1,
|
ca->filter_states(i)->synthesis_filter_state2);
|
||||||
filter_states->synthesis_filter_state2);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The level estimator operates on the recombined data.
|
// The level estimator operates on the recombined data.
|
||||||
RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_.get()));
|
RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
|
||||||
|
|
||||||
was_stream_delay_set_ = false;
|
was_stream_delay_set_ = false;
|
||||||
return kNoError;
|
return kNoError;
|
||||||
@ -592,27 +590,23 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
|||||||
return AnalyzeReverseStreamLocked();
|
return AnalyzeReverseStreamLocked();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
|
|
||||||
// primary stream and convert ourselves rather than having the user manage it.
|
|
||||||
// We can be smarter and use the splitting filter when appropriate. Similarly,
|
|
||||||
// perform downmixing here.
|
|
||||||
int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
|
int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
|
||||||
|
AudioBuffer* ra = render_audio_.get(); // For brevity.
|
||||||
if (rev_proc_format_.rate() == kSampleRate32kHz) {
|
if (rev_proc_format_.rate() == kSampleRate32kHz) {
|
||||||
for (int i = 0; i < rev_proc_format_.num_channels(); i++) {
|
for (int i = 0; i < rev_proc_format_.num_channels(); i++) {
|
||||||
// Split into low and high band.
|
// Split into low and high band.
|
||||||
SplitFilterStates* filter_states = render_audio_->filter_states(i);
|
WebRtcSpl_AnalysisQMF(ra->data(i),
|
||||||
WebRtcSpl_AnalysisQMF(render_audio_->data(i),
|
ra->samples_per_channel(),
|
||||||
render_audio_->samples_per_channel(),
|
ra->low_pass_split_data(i),
|
||||||
render_audio_->low_pass_split_data(i),
|
ra->high_pass_split_data(i),
|
||||||
render_audio_->high_pass_split_data(i),
|
ra->filter_states(i)->analysis_filter_state1,
|
||||||
filter_states->analysis_filter_state1,
|
ra->filter_states(i)->analysis_filter_state2);
|
||||||
filter_states->analysis_filter_state2);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_.get()));
|
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
|
||||||
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_.get()));
|
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
|
||||||
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_.get()));
|
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
|
||||||
|
|
||||||
return kNoError;
|
return kNoError;
|
||||||
}
|
}
|
||||||
|
@ -81,6 +81,21 @@ void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
|
|||||||
ConvertToFloat(frame.data_, cb);
|
ConvertToFloat(frame.data_, cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Number of channels including the keyboard channel.
|
||||||
|
int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
|
||||||
|
switch (layout) {
|
||||||
|
case AudioProcessing::kMono:
|
||||||
|
return 1;
|
||||||
|
case AudioProcessing::kMonoAndKeyboard:
|
||||||
|
case AudioProcessing::kStereo:
|
||||||
|
return 2;
|
||||||
|
case AudioProcessing::kStereoAndKeyboard:
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
assert(false);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
int TruncateToMultipleOf10(int value) {
|
int TruncateToMultipleOf10(int value) {
|
||||||
return (value / 10) * 10;
|
return (value / 10) * 10;
|
||||||
}
|
}
|
||||||
@ -1916,6 +1931,43 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) {
|
|||||||
|
|
||||||
#endif // WEBRTC_AUDIOPROC_BIT_EXACT
|
#endif // WEBRTC_AUDIOPROC_BIT_EXACT
|
||||||
|
|
||||||
|
TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
|
||||||
|
struct ChannelFormat {
|
||||||
|
AudioProcessing::ChannelLayout in_layout;
|
||||||
|
AudioProcessing::ChannelLayout out_layout;
|
||||||
|
};
|
||||||
|
ChannelFormat cf[] = {
|
||||||
|
{AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono},
|
||||||
|
{AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono},
|
||||||
|
{AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo},
|
||||||
|
};
|
||||||
|
size_t channel_format_size = sizeof(cf) / sizeof(*cf);
|
||||||
|
|
||||||
|
scoped_ptr<AudioProcessing> ap(AudioProcessing::Create());
|
||||||
|
// Enable one component just to ensure some processing takes place.
|
||||||
|
ap->noise_suppression()->Enable(true);
|
||||||
|
for (size_t i = 0; i < channel_format_size; ++i) {
|
||||||
|
const int in_rate = 44100;
|
||||||
|
const int out_rate = 48000;
|
||||||
|
ChannelBuffer<float> in_cb(SamplesFromRate(in_rate),
|
||||||
|
TotalChannelsFromLayout(cf[i].in_layout));
|
||||||
|
ChannelBuffer<float> out_cb(SamplesFromRate(out_rate),
|
||||||
|
ChannelsFromLayout(cf[i].out_layout));
|
||||||
|
|
||||||
|
// Run over a few chunks.
|
||||||
|
for (int j = 0; j < 10; ++j) {
|
||||||
|
EXPECT_NOERR(ap->ProcessStream(
|
||||||
|
in_cb.channels(),
|
||||||
|
in_cb.samples_per_channel(),
|
||||||
|
in_rate,
|
||||||
|
cf[i].in_layout,
|
||||||
|
out_rate,
|
||||||
|
cf[i].out_layout,
|
||||||
|
out_cb.channels()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
|
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
|
||||||
// stereo) file, converts to deinterleaved float (optionally downmixing) and
|
// stereo) file, converts to deinterleaved float (optionally downmixing) and
|
||||||
// returns the result in |cb|. Returns false if the file ended (or on error) and
|
// returns the result in |cb|. Returns false if the file ended (or on error) and
|
||||||
|
Loading…
Reference in New Issue
Block a user