583 lines
16 KiB
C++
583 lines
16 KiB
C++
|
/*************************************************************************/
|
||
|
/* */
|
||
|
/* Centre for Speech Technology Research */
|
||
|
/* University of Edinburgh, UK */
|
||
|
/* Copyright (c) 1995,1996 */
|
||
|
/* All Rights Reserved. */
|
||
|
/* */
|
||
|
/* Permission is hereby granted, free of charge, to use and distribute */
|
||
|
/* this software and its documentation without restriction, including */
|
||
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||
|
/* distribute, sublicense, and/or sell copies of this work, and to */
|
||
|
/* permit persons to whom this work is furnished to do so, subject to */
|
||
|
/* the following conditions: */
|
||
|
/* 1. The code must retain the above copyright notice, this list of */
|
||
|
/* conditions and the following disclaimer. */
|
||
|
/* 2. Any modifications must be clearly marked as such. */
|
||
|
/* 3. Original authors' names are not deleted. */
|
||
|
/* 4. The authors' names are not used to endorse or promote products */
|
||
|
/* derived from this software without specific prior written */
|
||
|
/* permission. */
|
||
|
/* */
|
||
|
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
|
||
|
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
|
||
|
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
|
||
|
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
|
||
|
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
|
||
|
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
|
||
|
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
|
||
|
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
|
||
|
/* THIS SOFTWARE. */
|
||
|
/* */
|
||
|
/*************************************************************************/
|
||
|
/* Author : Alan W Black */
|
||
|
/* Date : May 1998 */
|
||
|
/*-----------------------------------------------------------------------*/
|
||
|
/* EST_Utterance class source file */
|
||
|
/* */
|
||
|
/*=======================================================================*/
|
||
|
#include <cstdlib>
|
||
|
#include <cstdio>
|
||
|
#include <iostream>
|
||
|
#include <fstream>
|
||
|
#include "EST_error.h"
|
||
|
#include "EST_string_aux.h"
|
||
|
#include "ling_class/EST_Utterance.h"
|
||
|
#include "EST_UtteranceFile.h"
|
||
|
#include "EST_string_aux.h"
|
||
|
|
||
|
const EST_String DEF_FILE_TYPE = "est_ascii";
|
||
|
|
||
|
static void clear_up_sisilist(EST_TKVL<EST_Item_Content *,EST_Item *> &s);
|
||
|
static EST_Item *map_ling_item(EST_Item *si,
|
||
|
EST_TKVL<EST_Item_Content *,
|
||
|
EST_Item *> &s);
|
||
|
static void copy_relation(EST_Item *to,EST_Item *from,
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> &slist);
|
||
|
|
||
|
Declare_KVL_T(EST_Item_Content *, EST_Item *, KVL_ICP_IP)
|
||
|
|
||
|
#if defined(INSTANTIATE_TEMPLATES)
|
||
|
|
||
|
#include "../base_class/EST_TList.cc"
|
||
|
#include "../base_class/EST_TKVL.cc"
|
||
|
|
||
|
Instantiate_KVL_T(EST_Item_Content *, EST_Item *, KVL_ICP_IP)
|
||
|
#endif
|
||
|
|
||
|
EST_Utterance::EST_Utterance()
|
||
|
{
|
||
|
init();
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::init()
|
||
|
{
|
||
|
highest_id = 0;
|
||
|
f.set("max_id", 0);
|
||
|
}
|
||
|
|
||
|
int EST_Utterance::next_id()
|
||
|
{
|
||
|
int i = f.val("max_id").Int();
|
||
|
f.set("max_id", i+1);
|
||
|
return i+1;
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::clear()
|
||
|
{
|
||
|
relations.clear();
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::clear_relations()
|
||
|
{
|
||
|
EST_Features::Entries p;
|
||
|
for (p.begin(relations); p; p++)
|
||
|
::relation(p->v)->clear();
|
||
|
}
|
||
|
|
||
|
EST_Relation *EST_Utterance::create_relation(const EST_String &n)
|
||
|
{
|
||
|
EST_Relation *r = relation(n,FALSE);
|
||
|
if (r) // there is one already, so clear it
|
||
|
r->clear();
|
||
|
else
|
||
|
{
|
||
|
r = new EST_Relation(n);
|
||
|
r->set_utt(this);
|
||
|
relations.set_val(n,est_val(r));
|
||
|
}
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static EST_Item *item_id(EST_Item *p, const EST_String &n)
|
||
|
{
|
||
|
EST_Item *s, *t;
|
||
|
|
||
|
t = 0;
|
||
|
if ((p == 0) || (p->S("id","0") == n))
|
||
|
return p;
|
||
|
|
||
|
for (s = daughter1(p); s; s = s->next())
|
||
|
{
|
||
|
t = item_id(s, n);
|
||
|
if (t != 0)
|
||
|
return t;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
EST_Item *EST_Utterance::id(const EST_String &n) const
|
||
|
{
|
||
|
EST_Item *s, *t;
|
||
|
EST_Features::Entries p;
|
||
|
|
||
|
for (p.begin(relations); p; p++)
|
||
|
for (s = ::relation(p->v)->head(); s; s = next_item(s))
|
||
|
if ((t = item_id(s, n)) != 0)
|
||
|
return t;
|
||
|
EST_error("Could not find item matching id %s\n", (const char *)n);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::evaluate_all_features()
|
||
|
{
|
||
|
EST_Features::Entries p;
|
||
|
|
||
|
for (p.begin(relations); p; p++)
|
||
|
::relation(p->v)->evaluate_item_features();
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::remove_relation(const EST_String &n)
|
||
|
{
|
||
|
EST_Relation *r = relation(n,FALSE);
|
||
|
|
||
|
if (r != 0)
|
||
|
relations.remove(n);
|
||
|
}
|
||
|
|
||
|
EST_Relation *EST_Utterance::relation(const char *name,int err) const
|
||
|
{
|
||
|
if (err)
|
||
|
return ::relation(relations.f(name));
|
||
|
else
|
||
|
{
|
||
|
EST_Relation *r = 0;
|
||
|
return ::relation(relations.f(name,est_val(r)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool EST_Utterance::relation_present(const EST_String name) const
|
||
|
{
|
||
|
if (!name.contains("("))
|
||
|
return relations.present(name);
|
||
|
EST_StrList s;
|
||
|
BracketStringtoStrList(name, s);
|
||
|
return relation_present(s);
|
||
|
}
|
||
|
|
||
|
bool EST_Utterance::relation_present(EST_StrList &names) const
|
||
|
{
|
||
|
for (EST_Litem *p = names.head(); p ; p = p->next())
|
||
|
if (!relations.present(names(p)))
|
||
|
return false;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
EST_Utterance &EST_Utterance::operator=(const EST_Utterance &s)
|
||
|
{
|
||
|
copy(s);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
ostream& operator << (ostream &st, const EST_Utterance &u)
|
||
|
{
|
||
|
u.save(st,"est_ascii");
|
||
|
return st;
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::copy(const EST_Utterance &u)
|
||
|
{
|
||
|
// Make a copy of the utterance
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> sisilist;
|
||
|
EST_Relation *nrel;
|
||
|
EST_Item *rnode;
|
||
|
|
||
|
clear();
|
||
|
f = u.f;
|
||
|
|
||
|
EST_Features::Entries r;
|
||
|
for (r.begin(u.relations); r; r++)
|
||
|
{
|
||
|
EST_Relation *rr = ::relation(r->v);
|
||
|
nrel = create_relation(rr->name());
|
||
|
nrel->f = rr->f;
|
||
|
if (rr->head() != 0)
|
||
|
{
|
||
|
rnode = nrel->append(map_ling_item(rr->head(),sisilist));
|
||
|
copy_relation(rnode,rr->head(),sisilist);
|
||
|
}
|
||
|
}
|
||
|
clear_up_sisilist(sisilist);
|
||
|
}
|
||
|
|
||
|
static void extra_sub_utterance(EST_Utterance &u,EST_Item *i)
|
||
|
{
|
||
|
sub_utterance(u,i);
|
||
|
}
|
||
|
|
||
|
void EST_Utterance::sub_utterance(EST_Item *i)
|
||
|
{
|
||
|
extra_sub_utterance(*this,i);
|
||
|
}
|
||
|
|
||
|
static void merge_tree(EST_Relation *urel,
|
||
|
EST_Relation *rel,
|
||
|
EST_Item *uroot,
|
||
|
EST_Item *root,
|
||
|
EST_Features &items,
|
||
|
EST_String feature)
|
||
|
{
|
||
|
EST_Item *n=0;
|
||
|
merge_features(uroot->features(), root->features());
|
||
|
// copy horizontally
|
||
|
if (root->next()!= NULL)
|
||
|
{
|
||
|
EST_Item *old = item(items.f(root->next()->S(feature),est_val(n)));
|
||
|
EST_Item *new_root = old?uroot->insert_after(old):uroot->insert_after();
|
||
|
merge_tree(urel, rel, new_root, root->next(), items, feature);
|
||
|
}
|
||
|
// vertically
|
||
|
if (root->down()!= NULL)
|
||
|
{
|
||
|
EST_Item *old = item(items.f(root->down()->S(feature),est_val(n)));
|
||
|
EST_Item *new_root = old?uroot->insert_below(old):uroot->insert_below();
|
||
|
merge_tree(urel, rel, new_root, root->down(), items, feature);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int utterance_merge(EST_Utterance &utt,
|
||
|
EST_Utterance &extra,
|
||
|
EST_String feature)
|
||
|
{
|
||
|
// Global merge. Uses the feature to determine which items correspond.
|
||
|
|
||
|
// First build a table of existing contents.
|
||
|
|
||
|
EST_Features items;
|
||
|
EST_Features::Entries ri;
|
||
|
for(ri.begin(utt.relations); ri; ri++)
|
||
|
{
|
||
|
EST_Relation *rel = relation(ri->v);
|
||
|
for(EST_Item *i=rel->head(); i != NULL; i=i->next_item())
|
||
|
{
|
||
|
EST_String id = i->S(feature);
|
||
|
items.set_val(id,est_val(i));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
EST_Features::Entries eri;
|
||
|
for(eri.begin(extra.relations); eri; eri++)
|
||
|
{
|
||
|
EST_Relation *rel = relation(eri->v);
|
||
|
EST_String rel_name = rel->name();
|
||
|
|
||
|
while (utt.relation_present(rel_name))
|
||
|
rel_name += "+";
|
||
|
|
||
|
EST_Relation *urel = utt.create_relation(rel_name);
|
||
|
|
||
|
if (rel->head() != NULL)
|
||
|
{
|
||
|
EST_Item *n = 0;
|
||
|
EST_Item *old = item(items.f(rel->head()->S(feature),est_val(n)));
|
||
|
EST_Item *new_root = old?urel->append(old):urel->append();
|
||
|
merge_tree(urel, rel, new_root, rel->head(), items, feature);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
int utterance_merge(EST_Utterance &utt,
|
||
|
EST_Utterance &sub_utt,
|
||
|
EST_Item *utt_root,
|
||
|
EST_Item *sub_root)
|
||
|
{
|
||
|
// Joins sub_utt to utt at ling_item at, merging the root
|
||
|
// of relname in sub_utt with ling_item at. All other relations
|
||
|
// in sub_utt get their root's appended (not merged) with the
|
||
|
// corresponding relations in utt (and created if necessary).
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> sisilist;
|
||
|
EST_Item *rnode;
|
||
|
EST_Relation *nrel;
|
||
|
|
||
|
if (utt_root->relation_name() != sub_root->relation_name())
|
||
|
EST_error("utterance_merge: items not is same relation");
|
||
|
|
||
|
if ((utt_root == 0) || (sub_root == 0))
|
||
|
EST_error("utterance_merge: items are null");
|
||
|
|
||
|
// merge features but preserve root id
|
||
|
EST_String root_id = utt_root->S("id");
|
||
|
merge_features(utt_root->features(), sub_root->features());
|
||
|
utt_root->set("id", root_id);
|
||
|
// in case root item in sub is referenced elsewhere in the structure
|
||
|
sisilist.add_item(sub_root->contents(),utt_root);
|
||
|
copy_relation(utt_root,sub_root,sisilist);
|
||
|
|
||
|
EST_Features::Entries r;
|
||
|
for (r.begin(sub_utt.relations); r; r++)
|
||
|
{
|
||
|
EST_Relation *rr = ::relation(r->v);
|
||
|
if (rr->name() != utt_root->relation_name())
|
||
|
{
|
||
|
if (!utt.relation_present(rr->name()))
|
||
|
nrel = utt.create_relation(rr->name());
|
||
|
else
|
||
|
nrel = utt.relation(rr->name());
|
||
|
if (rr->head() != 0)
|
||
|
{
|
||
|
EST_Item *nn = map_ling_item(rr->head(),sisilist);
|
||
|
rnode = nrel->append(nn);
|
||
|
copy_relation(rnode,rr->head(),sisilist);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
sisilist.remove_item(sub_root->contents());
|
||
|
clear_up_sisilist(sisilist);
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static void copy_relation(EST_Item *to,EST_Item *from,
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> &slist)
|
||
|
{
|
||
|
// Construct next and down nodes of from, into to, mapping
|
||
|
// stream_items through slist
|
||
|
|
||
|
if (from->next())
|
||
|
copy_relation(to->insert_after(map_ling_item(from->next(),slist)),
|
||
|
from->next(),
|
||
|
slist);
|
||
|
if (from->down())
|
||
|
copy_relation(to->insert_below(map_ling_item(from->down(),slist)),
|
||
|
from->down(),
|
||
|
slist);
|
||
|
}
|
||
|
|
||
|
static EST_Item *map_ling_item(EST_Item *si,
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> &s)
|
||
|
{
|
||
|
// If si is already in s return its map otherwise copy
|
||
|
// si and add it to the list
|
||
|
EST_Item *msi;
|
||
|
EST_Item *def = 0;
|
||
|
|
||
|
msi = s.val_def(si->contents(),def);
|
||
|
if (msi == def)
|
||
|
{ // First time, so copy it and add to map list
|
||
|
msi = new EST_Item(*si);
|
||
|
msi->f_remove("id");
|
||
|
s.add_item(si->contents(),msi);
|
||
|
}
|
||
|
return msi;
|
||
|
}
|
||
|
|
||
|
static void clear_up_sisilist(EST_TKVL<EST_Item_Content *,EST_Item *> &s)
|
||
|
{
|
||
|
// The EST_Items in the value of this need to be freed, its
|
||
|
// contents however will not be freed as they will be referenced
|
||
|
// somewhere in the copied utterance
|
||
|
|
||
|
for (EST_Litem *r=s.list.head(); r != 0; r=r->next())
|
||
|
delete s.list(r).v;
|
||
|
|
||
|
}
|
||
|
|
||
|
static EST_Item *mapped_parent(EST_Item *i,const EST_String &relname,
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> &s)
|
||
|
{
|
||
|
EST_Item *p;
|
||
|
|
||
|
if ((p=parent(i,relname)) == 0)
|
||
|
return 0;
|
||
|
else if (s.present(p->contents()))
|
||
|
return map_ling_item(p,s)->as_relation(relname);
|
||
|
else
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void sub_utt_copy(EST_Utterance &sub,EST_Item *i,
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> &s)
|
||
|
{
|
||
|
if (s.present(i->contents()))
|
||
|
return;
|
||
|
else
|
||
|
{
|
||
|
EST_Item *np,*d;
|
||
|
EST_Litem *r;
|
||
|
EST_Item *ni = map_ling_item(i,s);
|
||
|
for (r = i->relations().list.head(); r; r = r->next())
|
||
|
{
|
||
|
EST_String relname = i->relations().list(r).k;
|
||
|
if (!sub.relation_present(relname))
|
||
|
sub.create_relation(relname)->append(ni);
|
||
|
else if ((np=mapped_parent(i,relname,s)) != 0)
|
||
|
np->append_daughter(ni);
|
||
|
else
|
||
|
sub.relation(relname)->append(ni);
|
||
|
|
||
|
// Do its daughters
|
||
|
for (d = daughter1(i,relname); d ; d=d->next())
|
||
|
sub_utt_copy(sub,d,s);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void sub_utterance(EST_Utterance &sub,EST_Item *i)
|
||
|
{
|
||
|
// Extract i and all its relations, and daughters ... to build
|
||
|
// a new utterance in sub.
|
||
|
EST_TKVL<EST_Item_Content *,EST_Item *> sisilist;
|
||
|
|
||
|
sub.clear();
|
||
|
sub_utt_copy(sub,i,sisilist);
|
||
|
|
||
|
clear_up_sisilist(sisilist);
|
||
|
}
|
||
|
|
||
|
EST_read_status EST_Utterance::load(const EST_String &filename)
|
||
|
{
|
||
|
EST_TokenStream ts;
|
||
|
EST_read_status v=format_ok;
|
||
|
|
||
|
if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
|
||
|
{
|
||
|
cerr << "load_utt: can't open utterance input file "
|
||
|
<< filename << endl;
|
||
|
return misc_read_error;
|
||
|
}
|
||
|
|
||
|
v = load(ts);
|
||
|
|
||
|
if (v == read_ok)
|
||
|
f.set("filename", filename);
|
||
|
|
||
|
ts.close();
|
||
|
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
EST_read_status EST_Utterance::load(EST_TokenStream &ts)
|
||
|
{
|
||
|
EST_read_status stat=read_error;
|
||
|
int pos = ts.tell();
|
||
|
int max_id;
|
||
|
|
||
|
init(); // we're committed to reading something so clear utterance
|
||
|
|
||
|
for(int n=0; n< EST_UtteranceFile::map.n() ; n++)
|
||
|
{
|
||
|
EST_UtteranceFileType t = EST_UtteranceFile::map.token(n);
|
||
|
|
||
|
if (t == uff_none)
|
||
|
continue;
|
||
|
|
||
|
EST_UtteranceFile::Info *info = &(EST_UtteranceFile::map.info(t));
|
||
|
|
||
|
if (! info->recognise)
|
||
|
continue;
|
||
|
|
||
|
EST_UtteranceFile::Load_TokenStream * l_fun = info->load;
|
||
|
|
||
|
if (l_fun == NULL)
|
||
|
continue;
|
||
|
|
||
|
ts.seek(pos);
|
||
|
|
||
|
stat = (*l_fun)(ts, *this, max_id);
|
||
|
|
||
|
if (stat == read_ok)
|
||
|
{
|
||
|
// set_file_type(EST_UtteranceFile::map.value(t));
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
highest_id = max_id;
|
||
|
return stat;
|
||
|
}
|
||
|
|
||
|
EST_write_status EST_Utterance::save(const EST_String &filename,
|
||
|
const EST_String &type) const
|
||
|
{
|
||
|
EST_write_status v;
|
||
|
ostream *outf;
|
||
|
|
||
|
if (filename == "-")
|
||
|
outf = &cout;
|
||
|
else
|
||
|
outf = new ofstream(filename);
|
||
|
|
||
|
if (!(*outf))
|
||
|
return write_fail;
|
||
|
|
||
|
v = save(*outf,type);
|
||
|
|
||
|
if (outf != &cout)
|
||
|
delete outf;
|
||
|
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
EST_write_status EST_Utterance::save(ostream &outf,
|
||
|
const EST_String &type) const
|
||
|
{
|
||
|
EST_String save_type = (type == "") ? DEF_FILE_TYPE : type;
|
||
|
|
||
|
EST_UtteranceFileType t = EST_UtteranceFile::map.token(save_type);
|
||
|
|
||
|
if (t == uff_none)
|
||
|
{
|
||
|
cerr << "Utterance: unknown filetype in saving " << save_type << endl;
|
||
|
return write_fail;
|
||
|
}
|
||
|
|
||
|
EST_UtteranceFile::Save_TokenStream * s_fun = EST_UtteranceFile::map.info(t).save;
|
||
|
|
||
|
if (s_fun == NULL)
|
||
|
{
|
||
|
cerr << "Can't save utterances to files type " << save_type << endl;
|
||
|
return write_fail;
|
||
|
}
|
||
|
|
||
|
return (*s_fun)(outf, *this);
|
||
|
}
|
||
|
|
||
|
void utt_2_flat_repr( const EST_Utterance &utt,
|
||
|
EST_String &flat_repr )
|
||
|
{
|
||
|
EST_Item *phrase = utt.relation("Phrase")->head();
|
||
|
for( ; phrase; phrase=phrase->next() ){
|
||
|
flat_repr += "<";
|
||
|
|
||
|
EST_Item *word = daughter1(phrase);
|
||
|
for( ; word; word=word->next() ){
|
||
|
flat_repr += "{";
|
||
|
|
||
|
EST_Item *syllable = daughter1(word, "SylStructure");
|
||
|
for( ; syllable; syllable=syllable->next() ){
|
||
|
flat_repr += EST_String::cat( "(", syllable->S("stress") );
|
||
|
|
||
|
EST_Item *phone = daughter1(syllable);
|
||
|
for( ; phone; phone=phone->next() )
|
||
|
flat_repr += EST_String::cat( " ", phone->S("name"), " " );
|
||
|
flat_repr += ")";
|
||
|
}
|
||
|
flat_repr += "}";
|
||
|
}
|
||
|
flat_repr += EST_String::cat( "> _", phrase->S("name"), " " );
|
||
|
}
|
||
|
}
|