579 lines
16 KiB
C++
579 lines
16 KiB
C++
/*************************************************************************/
|
|
/* */
|
|
/* Centre for Speech Technology Research */
|
|
/* University of Edinburgh, UK */
|
|
/* Copyright (c) 1996 */
|
|
/* All Rights Reserved. */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to use and distribute */
|
|
/* this software and its documentation without restriction, including */
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
/* distribute, sublicense, and/or sell copies of this work, and to */
|
|
/* permit persons to whom this work is furnished to do so, subject to */
|
|
/* the following conditions: */
|
|
/* 1. The code must retain the above copyright notice, this list of */
|
|
/* conditions and the following disclaimer. */
|
|
/* 2. Any modifications must be clearly marked as such. */
|
|
/* 3. Original authors' names are not deleted. */
|
|
/* 4. The authors' names are not used to endorse or promote products */
|
|
/* derived from this software without specific prior written */
|
|
/* permission. */
|
|
/* */
|
|
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
|
|
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
|
|
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
|
|
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
|
|
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
|
|
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
|
|
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
|
|
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
|
|
/* THIS SOFTWARE. */
|
|
/* */
|
|
/*************************************************************************/
|
|
/* Authors: Paul Taylor and Simon King */
|
|
/* Date : March 1998 */
|
|
/*-----------------------------------------------------------------------*/
|
|
/* Signal processing functions which operate on entire utterances */
|
|
/* */
|
|
/*=======================================================================*/
|
|
|
|
|
|
#include "EST_error.h"
|
|
#include "EST_track_aux.h"
|
|
#include "EST_inline_utils.h"
|
|
#include "sigpr/EST_fft.h"
|
|
#include "sigpr/EST_sigpr_frame.h"
|
|
#include "sigpr/EST_sigpr_utt.h"
|
|
|
|
#include "EST_Features.h"
|
|
#include "EST_types.h"
|
|
#include "EST_string_aux.h"
|
|
|
|
void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_StrList &slist);
|
|
|
|
void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_StrList &slist);
|
|
|
|
|
|
|
|
static void parse_op_settings(EST_Features &op, EST_WindowFunc *&wf, float &f)
|
|
{
|
|
EST_String w_name;
|
|
|
|
if (op.present("window_type"))
|
|
w_name = op.S("window_type");
|
|
else
|
|
w_name = DEFAULT_WINDOW_NAME;
|
|
wf = EST_Window::creator(w_name);
|
|
|
|
f = op.present("frame_factor") ? op.F("frame_factor")
|
|
: DEFAULT_FRAME_FACTOR;
|
|
}
|
|
|
|
void add_channels_to_map(EST_StrList &map, EST_StrList &types,
|
|
EST_Features &op, int delta_order)
|
|
{
|
|
EST_String t;
|
|
EST_String dos;
|
|
|
|
if (delta_order == 0)
|
|
dos = "";
|
|
else if (delta_order == 1)
|
|
dos = "_d";
|
|
else if (delta_order == 2)
|
|
dos = "_a";
|
|
else
|
|
EST_error("Requested delta order too high: %d\n", delta_order);
|
|
|
|
|
|
|
|
for (EST_Litem *s = types.head(); s; s = s->next())
|
|
{
|
|
t = types(s);
|
|
if (op.present(t + "_order"))
|
|
{
|
|
int actual_order = op.I(t + "_order");
|
|
if(actual_order < 1)
|
|
{
|
|
cerr << "Invalid " << t << "_order" << " : ";
|
|
cerr << actual_order;
|
|
cerr << " (using 1 instead) " << endl;
|
|
actual_order = 1;
|
|
}
|
|
|
|
int lowest_coef=0,highest_coef=actual_order-1;
|
|
|
|
if(t == "lpc")
|
|
// For lpc coefficients, we ALWAYS include energy as the
|
|
// 0th coefficient, so when the users gives lpc_order of
|
|
// 16, we produce 17 coefficients (0 to 16)
|
|
highest_coef=actual_order;
|
|
|
|
|
|
if(t == "melcep")
|
|
{
|
|
// Mel cepstra have special names - if we are not
|
|
// including c0, then the coefficients are numbered
|
|
// 1...order, and NOT 0...order-1
|
|
highest_coef=actual_order;
|
|
if(op.present("include_c0"))
|
|
lowest_coef = 0;
|
|
else
|
|
lowest_coef = 1;
|
|
}
|
|
|
|
if(actual_order == 1)
|
|
map.append(t + dos);
|
|
else
|
|
map.append("$" + t + dos + "-"+itoString(lowest_coef)+"+"+itoString(highest_coef));
|
|
}
|
|
else
|
|
map.append(t + dos);
|
|
}
|
|
}
|
|
|
|
void sigpr_base(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_StrList &slist)
|
|
{
|
|
EST_Track fill, tmp;
|
|
EST_String b_name;
|
|
EST_String k;
|
|
float frame_factor;
|
|
EST_WindowFunc *wf;
|
|
|
|
int fbank_order;
|
|
float liftering_parameter=0;
|
|
bool use_power_rather_than_energy=false, take_logs=true, include_c0=false;
|
|
|
|
parse_op_settings(op, wf, frame_factor);
|
|
|
|
for (EST_Litem *s = slist.head(); s; s = s->next())
|
|
{
|
|
k = slist(s);
|
|
|
|
EST_String start_channel="0";
|
|
if( (slist(s) == "melcep") && !op.present("include_c0"))
|
|
start_channel = "1";
|
|
|
|
if (fv.has_channel(k))
|
|
fv.sub_track(fill, 0, EST_ALL, k , 1);
|
|
else
|
|
fv.sub_track(fill, 0, EST_ALL, k + "_" + start_channel, k + "_N");
|
|
|
|
if(op.present("usepower"))
|
|
cerr << "USING POWER" << endl;
|
|
|
|
if ((slist(s) == "lpc") || (slist(s) == "cep")
|
|
||(slist(s) == "ref") || (slist(s) == "lsf"))
|
|
sig2coef(sig, fill, slist(s), frame_factor, wf);
|
|
else if (slist(s) == "power")
|
|
power(sig, fill, frame_factor);
|
|
else if (slist(s) == "energy")
|
|
energy(sig, fill, frame_factor);
|
|
else if (slist(s) == "f0")
|
|
{
|
|
op.set("srpd_resize", 0);
|
|
op.set("pda_frame_shift", op.F("frame_shift"));
|
|
pda(sig, fill, op, "srpd");
|
|
}
|
|
// else if (slist(s) == "rasta")
|
|
// rasta(sig, fill, op);
|
|
|
|
else if (slist(s) == "fbank")
|
|
{
|
|
use_power_rather_than_energy = op.present("usepower");
|
|
fbank(sig, fill, frame_factor, wf, use_power_rather_than_energy,
|
|
take_logs);
|
|
}
|
|
|
|
else if (slist(s) == "melcep")
|
|
{
|
|
fbank_order=op.I("fbank_order");
|
|
use_power_rather_than_energy = op.present("usepower");
|
|
include_c0=op.present("include_c0");
|
|
|
|
if(op.present("lifter"))
|
|
liftering_parameter=op.F("lifter");
|
|
|
|
//cerr << "calling melcep " << fill.num_channels() << endl;
|
|
|
|
melcep(sig, fill, frame_factor, fbank_order,
|
|
liftering_parameter, wf, include_c0,
|
|
use_power_rather_than_energy);
|
|
}
|
|
else
|
|
EST_error("Error: Unnknown type of processing requested: %s\n",
|
|
((const char*) slist(s)));
|
|
}
|
|
}
|
|
|
|
void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_String &k)
|
|
{
|
|
EST_Track base, fill;
|
|
|
|
// cout << "type: " << k << endl;
|
|
|
|
// look to see if base coefficients already exist
|
|
EST_String start_channel="0";
|
|
if( (k == "melcep") && !op.present("include_c0"))
|
|
start_channel = "1";
|
|
|
|
if (fv.has_channel(k))
|
|
fv.sub_track(base, 0, EST_ALL, k , 1);
|
|
else if (fv.has_channel(k + "_" + start_channel))
|
|
fv.sub_track(base, 0, EST_ALL, k + "_" + start_channel, k + "_N");
|
|
else // otherwise make them in temporary track
|
|
{
|
|
// cout << "making tmp cpoefs\n";
|
|
EST_StrList tmp_base, tmp_map;
|
|
tmp_base.append(k);
|
|
add_channels_to_map(tmp_map, tmp_base, op, 0);
|
|
base.resize(fv.num_frames(), tmp_map);
|
|
|
|
base.fill_time(fv);
|
|
|
|
base.set_equal_space(false);
|
|
sigpr_base(sig, base, op, tmp_base);
|
|
// cout << "BASE\n" << base;
|
|
// cout <<"after\n";
|
|
}
|
|
|
|
if (fv.has_channel(k + "_d"))
|
|
fv.sub_track(fill, 0, EST_ALL, k+"_d", 1);
|
|
else
|
|
fv.sub_track(fill, 0, EST_ALL, k+"_d_" + start_channel, k+"_d_N");
|
|
|
|
/* cout << "base\n";
|
|
track_info(base);
|
|
cout << "fill\n";
|
|
track_info(fill);
|
|
*/
|
|
|
|
delta(base, fill);
|
|
}
|
|
|
|
void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_String &k)
|
|
{
|
|
EST_Track base, fill;
|
|
|
|
// cout << endl << endl << "acc\n";
|
|
|
|
// cout << "type: " << k << endl;
|
|
|
|
// look to see if delta coefficients already exist
|
|
EST_String start_channel="0";
|
|
if( (k == "melcep") && !op.present("include_c0"))
|
|
start_channel = "1";
|
|
if (fv.has_channel(k+"_d"))
|
|
fv.sub_track(base, 0, EST_ALL, k + "_d", 1);
|
|
else if (fv.has_channel(k + "_d_" + start_channel))
|
|
fv.sub_track(base, 0, EST_ALL, k + "_d_" + start_channel, k + "_d_N");
|
|
else // otherwise make them in temporary track
|
|
{
|
|
EST_StrList tmp_base, tmp_map;
|
|
tmp_base.append(k);
|
|
add_channels_to_map(tmp_map, tmp_base, op, 1);
|
|
base.resize(fv.num_frames(), tmp_map);
|
|
|
|
base.fill_time(fv);
|
|
|
|
base.set_equal_space(false);
|
|
sigpr_delta(sig, base, op, tmp_base);
|
|
}
|
|
|
|
if (fv.has_channel(k + "_a"))
|
|
fv.sub_track(fill, 0, EST_ALL, k+"_a", 1);
|
|
else
|
|
fv.sub_track(fill, 0, EST_ALL, k+"_a_" + start_channel, k+"_a_N");
|
|
|
|
// cout << "base\n";
|
|
// track_info(base);
|
|
// cout << "fill\n";
|
|
// track_info(fill);
|
|
|
|
delta(base, fill);
|
|
}
|
|
|
|
void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_StrList &slist)
|
|
{
|
|
for (EST_Litem *s = slist.head(); s; s = s->next())
|
|
sigpr_acc(sig, fv, op, slist(s));
|
|
}
|
|
|
|
void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
|
|
const EST_StrList &slist)
|
|
{
|
|
for (EST_Litem *s = slist.head(); s; s = s->next())
|
|
sigpr_delta(sig, fv, op, slist(s));
|
|
}
|
|
|
|
|
|
int get_frame_size(EST_Track &pms,
|
|
int i, int sample_rate, int prefer_prev)
|
|
{
|
|
int prev = -1;
|
|
int next = -1;
|
|
|
|
if (i>0)
|
|
prev = irint((pms.t(i) - pms.t(i-1))*sample_rate);
|
|
if (i<pms.num_frames()-1)
|
|
next = irint((pms.t(i+1) - pms.t(i))*sample_rate);
|
|
|
|
if (prefer_prev)
|
|
return prev>=0?prev:(next>=0?next:0);
|
|
return next>=0?next:(prev>=0?prev:0);
|
|
}
|
|
|
|
float get_time_frame_size(EST_Track &pms, int i, int prefer_prev)
|
|
{
|
|
float prev = -1;
|
|
float next = -1;
|
|
|
|
if (i > 0)
|
|
prev = pms.t(i) - pms.t(i-1);
|
|
if (i < pms.num_frames() -1)
|
|
next = pms.t(i+1) - pms.t(i);
|
|
|
|
if (prefer_prev)
|
|
return prev>=0 ? prev: (next>=0 ? next : 0.0);
|
|
return next>=0 ? next: (prev>=0 ? prev : 0.0);
|
|
}
|
|
|
|
/*void sig2lpc(EST_Wave &sig, EST_Track &lpc, EST_WindowFunc *wf, float factor)
|
|
{
|
|
int order = lpc.num_channels() - 1;
|
|
EST_FVector coefs(order + 1);
|
|
int k;
|
|
int window_start, window_size, length; // can be merged with window_size
|
|
|
|
int sample_rate = sig.sample_rate();
|
|
|
|
EST_FVector frame;
|
|
|
|
for (k = 0; k < lpc.num_frames(); ++k)
|
|
{
|
|
int pos = irint(lpc.t(k) * sample_rate);
|
|
|
|
length = get_local_frame_size(lpc, k, sig.sample_rate());
|
|
window_size = irint(length * factor);
|
|
window_start = pos - (window_size/2);
|
|
|
|
EST_Window::window_signal(sig, wf, window_start,
|
|
window_size, frame, 1);
|
|
|
|
lpc.frame(coefs, k);
|
|
sig2lpc(frame, coefs);
|
|
}
|
|
lpc.save("test.est", "est");
|
|
}
|
|
*/
|
|
|
|
/*typedef void EST_FrameFunc(const EST_FVector &in_frame,
|
|
EST_FVector &out_frame);
|
|
|
|
void sig2coef(EST_Wave &sig, EST_Track &lpc, EST_WindowFunc *wf,
|
|
EST_FrameFunc *ff, float factor)
|
|
{
|
|
EST_FVector coefs, frame;
|
|
int start, size;
|
|
|
|
for (int k = 0; k < lpc.num_frames(); ++k)
|
|
{
|
|
size = irint(get_local_frame_size(lpc, k, sig.sample_rate())* factor);
|
|
start = (irint(lpc.t(k) * sig.sample_rate()) - (size/2));
|
|
|
|
EST_Window::window_signal(sig, wf, start, size, frame, 1);
|
|
|
|
lpc.frame(coefs, k);
|
|
(*ff)(frame, coefs);
|
|
}
|
|
}
|
|
*/
|
|
|
|
void sig2coef(EST_Wave &sig, EST_Track &tr, EST_String type,
|
|
float factor, EST_WindowFunc *wf)
|
|
{
|
|
EST_FVector coefs, frame;
|
|
int start, size;
|
|
|
|
// cout << "TYPE IS " << type << endl;
|
|
|
|
for (int k = 0; k < tr.num_frames(); ++k)
|
|
{
|
|
if (factor < 0) // want fixed frame rate
|
|
size = (int)(-1.0 * factor * (float)sig.sample_rate());
|
|
else
|
|
size = irint(get_frame_size(tr, k, sig.sample_rate())* factor);
|
|
start = (irint(tr.t(k) * sig.sample_rate()) - (size/2));
|
|
|
|
EST_Window::window_signal(sig, wf, start, size, frame, 1);
|
|
|
|
tr.frame(coefs, k);
|
|
frame_convert(frame, "sig", coefs, type);
|
|
}
|
|
}
|
|
|
|
void power(EST_Wave &sig, EST_Track &pow, float factor)
|
|
{
|
|
EST_FVector frame;
|
|
int window_start, window_size, pos, k;
|
|
|
|
EST_WindowFunc *wf = EST_Window::creator("rectangular");
|
|
|
|
for (k = 0; k < pow.num_frames(); ++k)
|
|
{
|
|
pos = irint(pow.t(k) * sig.sample_rate());
|
|
if (factor < 0) // want fixed frame rate
|
|
window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
|
|
else
|
|
window_size = irint(get_frame_size(pow, k, sig.sample_rate())
|
|
* factor);
|
|
window_start = pos - window_size/2;
|
|
EST_Window::window_signal(sig, wf, window_start, window_size,frame, 1);
|
|
|
|
sig2pow(frame, pow.a(k));
|
|
}
|
|
}
|
|
|
|
void energy(EST_Wave &sig, EST_Track &pow, float factor)
|
|
{
|
|
EST_FVector frame;
|
|
int window_start, window_size, pos, k;
|
|
|
|
EST_WindowFunc *wf = EST_Window::creator("rectangular");
|
|
|
|
for (k = 0; k < pow.num_frames(); ++k)
|
|
{
|
|
pos = irint(pow.t(k) * sig.sample_rate());
|
|
if (factor < 0) // want fixed frame rate
|
|
window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
|
|
else
|
|
window_size = irint(get_frame_size(pow, k, sig.sample_rate())
|
|
* factor);
|
|
window_start = pos - window_size/2;
|
|
EST_Window::window_signal(sig, wf, window_start, window_size,frame,1);
|
|
|
|
sig2rms(frame, pow.a(k));
|
|
}
|
|
}
|
|
|
|
static EST_String determine_type(const EST_String &intype)
|
|
{
|
|
return (intype.contains("_") ? intype.before("_"): intype);
|
|
}
|
|
|
|
void convert_track(EST_Track &in_track, EST_Track &out_track,
|
|
const EST_String &out_type, const EST_String &in_type)
|
|
{
|
|
if (in_track.num_frames() != out_track.num_frames())
|
|
EST_error("In track has %d frames, out track has %d\n",
|
|
in_track.num_frames(), out_track.num_frames());
|
|
|
|
EST_String tmp;
|
|
tmp = ((in_type == "") ? determine_type(in_track.channel_name(0)):in_type);
|
|
|
|
EST_FVector in_frame(in_track.num_channels());
|
|
EST_FVector out_frame(out_track.num_channels());
|
|
|
|
for (int i = 0; i < in_track.num_frames(); ++i)
|
|
{
|
|
in_track.frame(in_frame, i);
|
|
out_track.frame(out_frame, i);
|
|
frame_convert(in_frame, tmp, out_frame, out_type);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void fbank(EST_Wave &sig,
|
|
EST_Track &fbank_track,
|
|
const float factor,
|
|
EST_WindowFunc *wf,
|
|
const bool use_power_rather_than_energy,
|
|
const bool take_log)
|
|
{
|
|
|
|
// still to add : high/low pass filtering
|
|
|
|
int window_start, window_size, pos, k;
|
|
EST_FVector frame,fbank_frame;
|
|
|
|
// get_order(...) gives wrong answer ... Paul ?
|
|
int fbank_order = fbank_track.num_channels();
|
|
|
|
// sanity check
|
|
if(fbank_order < 1)
|
|
{
|
|
EST_error("Filterbank order of %i makes no sense.\n",fbank_order);
|
|
return;
|
|
}
|
|
|
|
for (k = 0; k < fbank_track.num_frames(); ++k)
|
|
{
|
|
if (factor < 0) // want fixed frame rate
|
|
window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
|
|
else
|
|
window_size = irint(get_frame_size(fbank_track, k, sig.sample_rate())
|
|
* factor);
|
|
pos = irint(fbank_track.t(k) * sig.sample_rate());
|
|
window_start = pos - window_size/2;
|
|
EST_Window::window_signal(sig, wf, window_start, window_size,frame, 1);
|
|
|
|
fbank_track.frame(fbank_frame,k);
|
|
sig2fbank(frame,fbank_frame,sig.sample_rate(),
|
|
use_power_rather_than_energy,take_log);
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
void melcep(EST_Wave &sig, EST_Track &mfcc_track,
|
|
float factor,
|
|
int fbank_order,
|
|
float liftering_parameter,
|
|
EST_WindowFunc *wf,
|
|
const bool include_c0,
|
|
const bool use_power_rather_than_energy)
|
|
{
|
|
|
|
EST_FVector frame,mfcc_frame,fbank_frame;
|
|
int k;
|
|
|
|
// first, do filterbank analysis
|
|
// need a temporary track, with the same setup as mfcc_track
|
|
EST_Track fbank_track;
|
|
|
|
// cout << "MELPCEP\n" << fbank_order << endl;
|
|
|
|
fbank_track.resize(mfcc_track.num_frames(), fbank_order);
|
|
fbank_track.fill_time(mfcc_track);
|
|
fbank_track.set_equal_space(false);
|
|
|
|
// temp removed by pault 24/02/99
|
|
// make_timed_track(mfcc_track, fbank_track, "filter", fbank_order, 0);
|
|
|
|
// 'true' makes fbank(...) take logs
|
|
fbank(sig, fbank_track, factor, wf, use_power_rather_than_energy, true);
|
|
|
|
/*
|
|
if(include_c0)
|
|
cerr << "melcep c0" << endl;
|
|
else
|
|
cerr << "melcep no c0" << endl;
|
|
*/
|
|
for (k = 0; k < mfcc_track.num_frames(); ++k)
|
|
{
|
|
|
|
mfcc_track.frame(mfcc_frame,k);
|
|
fbank_track.frame(fbank_frame,k);
|
|
|
|
fbank2melcep(fbank_frame, mfcc_frame,liftering_parameter,include_c0);
|
|
}
|
|
}
|