speech-tools/ling_class/EST_Relation.cc
2015-09-19 10:52:26 +02:00

703 lines
18 KiB
C++

/*************************************************************************/
/* */
/* Centre for Speech Technology Research */
/* University of Edinburgh, UK */
/* Copyright (c) 1998 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author : Alan W Black */
/* Date : February 1998 */
/*-----------------------------------------------------------------------*/
/* Generalised relations in utterances */
/* */
/*=======================================================================*/
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <fstream>
#include "ling_class/EST_Relation.h"
#include "ling_class/EST_Item.h"
#include "relation_io.h"
VAL_REGISTER_CLASS(relation,EST_Relation)
EST_Relation::EST_Relation(const EST_String &name)
{
p_name = name;
p_head = 0;
p_tail = 0;
p_utt = 0;
}
EST_Relation::EST_Relation()
{
p_head = 0;
p_tail = 0;
p_utt = 0;
}
void EST_Relation::copy(const EST_Relation &r)
{
// Do a *full* copy include the contents of all the items
// But not the name (?)
EST_String tmp_name;
p_name = r.p_name;
p_head = 0;
p_tail = 0;
p_utt = 0; // can't be in the same utterance as r
tmp_name = f.S("name", "");
f = r.f;
f.set("name", tmp_name);
if (r.root() != 0)
{
EST_Item i = *r.root();
EST_Item *to_root = append(&i);
copy_node_tree_contents(r.root(),to_root);
}
}
EST_Item *EST_Relation::append(EST_Item *si)
{
EST_Item *nn;
if (this == 0)
{
EST_warning("EST_Relation: no relation to append to");
return 0;
}
else if (p_tail == 0)
{
nn = new EST_Item(this, si);
p_head = nn;
}
else
nn = p_tail->insert_after(si);
p_tail = nn;
// if (!si->f_present("id") && utt())
// si->fset("id", utt()->next_id());
return nn;
}
EST_Item *EST_Relation::append()
{
return append(0);
}
EST_Item *EST_Relation::prepend()
{
return prepend(0);
}
EST_Item *EST_Relation::prepend(EST_Item *si)
{
EST_Item *nn;
if (this == 0)
{
EST_warning("EST_Relation: no relation to prepend to");
return 0;
}
else if (p_head == 0)
{
nn = new EST_Item(this,si);
p_tail = nn;
}
else
nn = p_head->insert_before(si);
p_head = nn;
return nn;
}
EST_Relation::~EST_Relation()
{
clear();
}
int EST_Relation::length() const
{
EST_Item *node;
int i;
if (this == 0)
return 0;
for (i=0,node=p_head; node; node=node->next())
i++;
return i;
}
void EST_Relation::evaluate_item_features()
{
for (EST_Item *s = head(); s; s = s->next())
s->evaluate_features();
}
void EST_Relation::clear()
{
EST_Item *nn,*nnn;
for (nn = p_head; nn != 0; nn = nnn)
{
nnn = nn->next();
delete nn;
}
p_head = p_tail = 0;
}
void EST_Relation::remove_item(EST_Item *node)
{
if (p_head == node)
p_head = node->next();
if (p_tail == node)
p_tail = node->prev();
delete node;
}
void EST_Relation::remove_item_feature(const EST_String &name)
{
for (EST_Item *s = p_head; s; s = next_item(s))
s->f_remove(name);
}
void copy_relation(const EST_Relation &from, EST_Relation &to)
{
// clone the relation structure from into to, deleting any existing
// nodes in to.
to.clear();
if (from.root() != 0)
{
EST_Item *to_root = to.append(from.root());
copy_node_tree(from.root(),to_root);
}
}
EST_write_status EST_Relation::save(ostream &outf,
const EST_String &type,
bool evaluate_ff) const
{
if (type == "esps")
return save_esps_label(&outf,*this,evaluate_ff);
else if (type == "htk")
return save_htk_label(&outf,*this);
else
{
EST_warning("EST_Relation: unsupported type: \"%s\"", (const char *)type);
return write_fail;
}
}
EST_write_status EST_Relation::save(const EST_String &filename,
const EST_String &type,
bool evaluate_ff) const
{
if (type == "esps")
return save_esps_label(filename,*this,evaluate_ff);
else if (type == "htk")
return save_htk_label(filename,*this);
else
{
EST_warning("EST_Relation: unsupported type: \"%s\"", (const char *)type);
return write_fail;
}
}
EST_write_status EST_Relation::save(const EST_String &filename,
bool evaluate_ff) const
{
return save(filename,"esps",evaluate_ff);
}
EST_write_status EST_Relation::save(ostream &outf,
EST_TKVL<void *,int> contents) const
{
EST_TKVL<void *,int> nodenames;
int node_count = 1;
outf << "Relation " << name() << " ; ";
f.save(outf);
outf << endl;
save_items(p_head,outf,contents,nodenames,node_count);
outf << "End_of_Relation" << endl;
return write_ok;
}
EST_write_status EST_Relation::save_items(EST_Item *node,
ostream &outf,
EST_TKVL<void *,int> &cnames,
EST_TKVL<void *,int> &nodenames,
int &node_count) const
{
if (node != 0)
{
EST_Item *n = node;
int myname;
while (n)
{
myname = node_count++;
nodenames.add_item(n,myname);
n = n->next();
}
n = node;
while (n)
{
// This will need to be expanded if the we make Relations
// have more complex structures
save_items(n->down(),outf,cnames,nodenames,node_count);
outf << nodenames.val(n) << " " <<
(n->contents() == 0 ? 0 : cnames.val(n->contents())) << " " <<
(n->up() == 0 ? 0 : nodenames.val(n->up())) << " " <<
(n->down() == 0 ? 0 : nodenames.val(n->down())) << " " <<
(n->next() == 0 ? 0 : nodenames.val(n->next())) << " " <<
(n->prev() == 0 ? 0 : nodenames.val(n->prev())) << endl;
n = n->next();
}
}
return write_ok;
}
#if 0
EST_read_status EST_Relation::load(EST_TokenStream &ts,
const EST_THash<int,EST_Val> &contents)
{
if (ts.get() != "Relation")
{
cerr << "load_relation: " << ts.pos_description() <<
" no new Relation" << endl;
return misc_read_error;
}
p_name = ts.get().string();
if (ts.get() != ";")
{
cerr << "load_relation: " << ts.pos_description() <<
" semicolon missing after Relation name \"" <<
p_name << "\"" << endl;
return misc_read_error;
}
if (f.load(ts) != format_ok)
return misc_read_error;
if (load_items(ts,contents) != format_ok)
return misc_read_error;
return format_ok;
}
#endif
EST_read_status EST_Relation::load(EST_TokenStream &ts,
const EST_TVector < EST_Item_Content * > &contents
)
{
if (ts.get() != "Relation")
{
cerr << "load_relation: " << ts.pos_description() <<
" no new Relation" << endl;
return misc_read_error;
}
p_name = ts.get().string();
if (ts.get() != ";")
{
cerr << "load_relation: " << ts.pos_description() <<
" semicolon missing after Relation name \"" <<
p_name << "\"" << endl;
return misc_read_error;
}
if (f.load(ts) != format_ok)
return misc_read_error;
if (load_items(ts,contents) != format_ok)
return misc_read_error;
return format_ok;
}
void EST_Relation::node_tidy_up_val(int &k, EST_Val &v)
{
// Called to delete the nodes in the hash table when a load
// fails
(void)k;
EST_Item *node = item(v);
node->u = 0;
node->d = 0;
node->n = 0;
node->p = 0;
delete node;
}
void EST_Relation::node_tidy_up(int &k, EST_Item *node)
{
// Called to delete the nodes in the hash table when a load
// fails
(void)k;
node->u = 0;
node->d = 0;
node->n = 0;
node->p = 0;
delete node;
}
#if 0
EST_read_status EST_Relation::load_items(EST_TokenStream &ts,
const EST_THash<int,EST_Val> &contents)
{
// Load a set of nodes from a TokenStream, the file contains node
// descriptions one per line as 5 ints, this nodes name, the
// stream item it is to be related to, then the name of the
// nodes above, below, before and after it.
EST_THash<int,EST_Val> nodenames(100);
EST_read_status r = format_ok;
EST_Item *node = 0;
EST_Relation *rel=NULL;
// int expect_links=0;
while (ts.peek() != "End_of_Relation")
{
int name = atoi(ts.get().string());
int siname;
node = get_item_from_name(nodenames,name);
if (!node)
EST_error("Unknown item %d", name);
if (rel==NULL)
{
rel=node->relation();
// EST_String type = rel->f.S("type", "");
// expect_links = (type == "ladder");
}
siname = atoi(ts.get().string());
if (siname != 0)
{
int found;
EST_Val v = contents.val(siname,found);
if (!found)
{
cerr << "load_nodes: " << ts.pos_description() <<
" node's item contents" << siname << " doesn't exist\n";
r = misc_read_error;
break;
}
else
node->set_contents(icontent(v));
}
// up down next previous
node->u = get_item_from_name(nodenames,atoi(ts.get().string()));
node->d = get_item_from_name(nodenames,atoi(ts.get().string()));
node->n = get_item_from_name(nodenames,atoi(ts.get().string()));
node->p = get_item_from_name(nodenames,atoi(ts.get().string()));
// Read ladder links
#if 0
if (expect_links)
{
int numlinks = atoi(ts.get().string());
// node->link_feats.set("num_links",numlinks);
for (int i=0;i<numlinks;++i)
{
EST_Item * item = get_item_from_name(nodenames,atoi(ts.get().string()));
node->link_feats.set_val("link" + itoString(i),est_val(item));
}
}
#endif
}
ts.get(); // skip End_of_Relation
if (r == format_ok)
{
if (node != 0) // at least one node
{
p_head = get_item_from_name(nodenames,1);
p_tail = p_head->last();
if (!p_head->verify())
{
cerr << "load_nodes: " << ts.pos_description() <<
" nodes do not form consistent graph" << endl;
r = misc_read_error;
}
}
}
if (r != format_ok)
{
// failed to read this relation so clear the created nodes
// before returning, no idea what state the links are in so
// explicitly unlink them before deleting them
nodenames.map(node_tidy_up_val);
}
return r;
}
#endif
EST_read_status EST_Relation::load_items(EST_TokenStream &ts,
const EST_TVector < EST_Item_Content * > &contents
)
{
// Load a set of nodes from a TokenStream, the file contains node
// descriptions one per line as 5 ints, this nodes name, the
// stream item it is to be related to, then the name of the
// nodes above, below, before and after it.
EST_TVector < EST_Item * > nodenames(100);
// EST_THash<int,EST_Val> nodenames(100);
EST_read_status r = format_ok;
EST_Item *node = 0;
EST_Relation *rel=NULL;
// int expect_links=0;
while (ts.peek() != "End_of_Relation")
{
int name = atoi(ts.get().string());
int siname;
node = get_item_from_name(nodenames,name);
if (!node)
EST_error("Unknown item %d", name);
if (rel==NULL)
{
rel=node->relation();
// EST_String type = rel->f.S("type", "");
// expect_links = (type == "ladder");
}
siname = atoi(ts.get().string());
if (siname != 0)
{
EST_Item_Content *c = contents(siname);
if (c==NULL)
{
cerr << "load_nodes: " << ts.pos_description() <<
" node's stream item " << siname << " doesn't exist\n";
r = misc_read_error;
break;
}
else
node->set_contents(c);
}
// up down next previous
node->u = get_item_from_name(nodenames,atoi(ts.get().string()));
node->d = get_item_from_name(nodenames,atoi(ts.get().string()));
node->n = get_item_from_name(nodenames,atoi(ts.get().string()));
node->p = get_item_from_name(nodenames,atoi(ts.get().string()));
#if 0
// Read ladder links
if (expect_links)
{
int numlinks = atoi(ts.get().string());
// node->link_feats.set("num_links",numlinks);
for (int i=0;i<numlinks;++i)
{
EST_Item * item = get_item_from_name(nodenames,atoi(ts.get().string()));
// node->link_feats.set_val("link" + itoString(i),est_val(item));
}
}
#endif
}
ts.get(); // skip End_of_Relation
if (r == format_ok)
{
if (node != 0) // at least one node
p_head = get_item_from_name(nodenames,1);
p_tail = p_head->last();
if (!p_head->verify())
{
cerr << "load_nodes: " << ts.pos_description() <<
" nodes do not form consistent graph" << endl;
r = misc_read_error;
}
}
if (r != format_ok)
{
// failed to read this relation so clear the created nodes
// before returning, no idea what state the links are in so
// explicitly unlink them before deleting them
for(int ni=0; ni<nodenames.length(); ni++)
{
EST_Item *node = nodenames(ni);
if (node != NULL)
node_tidy_up(ni, node);
}
}
return r;
}
EST_Item *EST_Relation::get_item_from_name(EST_THash<int,EST_Val> &nodenames,
int name)
{
// Return node named by name or create a new one if it doesn't
// already exist
EST_Item *node;
int found;
if (name == 0)
return 0;
EST_Val v = nodenames.val(name,found);
if (!found)
{
node = new EST_Item(this, 0);
nodenames.add_item(name,est_val(node));
}
else
node = item(v);
return node;
}
EST_Item *EST_Relation::get_item_from_name(EST_TVector< EST_Item * > &nodenames,
int name)
{
// Return node named by name or create a new one if it doesn't
// already exist
if (name == 0)
return 0;
if (name >= nodenames.length())
{
nodenames.resize(name*2, 1);
}
EST_Item *node = nodenames(name);
if (node==NULL)
{
node = new EST_Item(this, 0);
nodenames[name] = node;
}
return node;
}
EST_read_status EST_Relation::load(const EST_String &filename,
EST_TokenStream &ts,
const EST_String &type)
{
EST_read_status r;
f.set("filename",filename);
if (type == "esps")
r = load_esps_label(ts,*this);
else if (type == "ogi")
r = load_ogi_label(ts,*this);
else if (type == "htk")
r = load_sample_label(ts,*this,10000000);
else if ((type == "ascii") || (type == "timit"))
r = load_sample_label(ts,*this,1);
else if (type == "words")
r = load_words_label(ts,*this);
else // currently esps is the default
r = load_esps_label(ts,*this);
return r;
}
EST_read_status EST_Relation::load(const EST_String &filename,
const EST_String &type)
{
// Load an isolated relation from a file, assuming Xlabel format
EST_TokenStream ts;
EST_read_status r;
if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
{
cerr << "load_relation: can't open relation input file "
<< filename << endl;
return misc_read_error;
}
r = load(filename, ts, type);
ts.close();
return r;
}
int num_leaves(const EST_Item *h)
{
int count = 0;
EST_Item *n;
for (n = h->first_leaf(); n != 0; n=n->next_leaf())
count++;
return count;
}
EST_Item *EST_Relation::first_leaf() const
{
return head()->first_leaf();
}
EST_Item *EST_Relation::last_leaf() const
{
return head()->last_leaf();
}
EST_Utterance *get_utt(EST_Item *s)
{
// Occasionally you need to get the utterance from a stream_item
// This finds any relations in s and follows them to the utterance
// If there aren't any Relations the this streamitem isn't in an
// utterances
if (s == 0)
return 0;
if (s->relation())
return s->relation()->utt();
else
return 0; // can't find an utterance
}
EST_Relation &EST_Relation::operator=(const EST_Relation &s)
{
copy(s);
return *this;
}
ostream& operator << (ostream &s, const EST_Relation &a)
{
s << a.f << endl;
for (EST_Item *p = a.head(); p; p = p->next())
s << *p << endl;
return s;
}