speech-tools/include/EST_SCFG_Chart.h
2015-09-19 10:52:26 +02:00

185 lines
7.8 KiB
C++

/*************************************************************************/
/* */
/* Centre for Speech Technology Research */
/* University of Edinburgh, UK */
/* Copyright (c) 1997 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author : Alan W Black */
/* Date : June 1997 */
/*-----------------------------------------------------------------------*/
/* */
/* A SCFG chart parser, general functions */
/* */
/*=======================================================================*/
#ifndef __EST_SCFG_CHART_H__
#define __EST_SCFG_CHART_H__
#include "EST_String.h"
#include "EST_simplestats.h"
#include "EST_string_aux.h"
#include "EST_SCFG.h"
#include "ling_class/EST_Relation.h"
class EST_SCFG_Chart_Edge;
/** An internal class for \Ref{EST_SCFG_Chart} for representing edges
in the chart during parsing with SCFGs.
A standard Earley type chart edge, with representations for two
daughters and a position or what has been recognised. A probability
is also included.
*/
class EST_SCFG_Chart_Edge {
private:
int p_d1;
int p_d2;
int p_pos;
double p_prob;
public:
/**@name Constructor and initialisation functions */
//@{
EST_SCFG_Chart_Edge();
EST_SCFG_Chart_Edge(double prob, int d1, int d2, int pos);
~EST_SCFG_Chart_Edge();
//@}
/// Postion, 0 1 or 2, where 0 is empty, 1 is incomplete 2 is complete.
int pos(void) { return p_pos; }
/// Edge probability
double prob(void) { return p_prob; }
/// (Non)terminal of daughter 1
int d1() { return p_d1; }
/// (Non)terminal of daughter 2
int d2() { return p_d2; }
};
/** A class for parsing with a probabilistic grammars.
The chart (sort of closer to CKY table) consists of indexes of
edges indexed by vertex number of mother non-terminal.
The initial values (well-formed substring table) are taken from
an \Ref{EST_Stream} with a given feature. The grammar may be
specified as LISP rules or as an already constructed \Ref{EST_SCFG}.
This produces a single best parse. It treats the grammar as
strictly context free in that the probability of a nonterminal
over vertex n to m, is the sum of all the possible analyses
of that sub-tree. Only the best analysis is kept for the
resulting parse tree.
@author Alan W Black (awb@cstr.ed.ac.uk): October 1997
*/
class EST_SCFG_Chart {
private:
/// pointer to grammar
EST_SCFG *grammar;
/// TRUE is grammar was created internally, FALSE is can't be freed
int grammar_local;
/// Number of vertices (number of words + 1)
int n_vertices;
/// Index of edges by vertex start x vertex end x nonterminal
EST_SCFG_Chart_Edge ****edges;
/// Index of basic symbols indexed by (start) vertex.
EST_SCFG_Chart_Edge **wfst;
/// An empty edge, denotes 0 probability edge.
EST_SCFG_Chart_Edge *emptyedge;
// Find the best analysis of nonterminal {\tt p} from {\tt start}
// to {\tt end}. Used after parsing
double find_best_tree(int start,int end,int p)
{ EST_SCFG_Chart_Edge *r;
if ((r=edges[start][end][p]) != 0) return r->prob();
else return find_best_tree_cal(start,end,p); }
// Calculate best tree/probability
double find_best_tree_cal(int start,int end,int p);
void setup_edge_table();
void delete_edge_table();
LISP print_edge(int start, int end, int name, EST_SCFG_Chart_Edge *e);
// Extract edge from chart and add it to stream
void extract_edge(int start, int end, int p,
EST_SCFG_Chart_Edge *e,
EST_Item *s,
EST_Item **word);
// Build parse from distinguished symbol alone
void extract_forced_parse(int start, int end, EST_Item *s, EST_Item *w);
public:
/**@name Constructor and initialisation functions */
//@{
EST_SCFG_Chart();
~EST_SCFG_Chart();
//@}
/**@name Grammar and parse string initialisation functions */
//@{
/// Initialize from LISP rules set
void set_grammar_rules(LISP r);
/// Initialize from existing \Ref{EST_SCFG} grammar
void set_grammar_rules(EST_SCFG &grammar);
/** Initialize for parsing from relation using {\tt name} feature
setting up the "Well Formed Substring Table" */
void setup_wfst(EST_Relation *s,const EST_String &name="name");
/** Initialize for parsing from s to e using {\tt name} feature
setting up the "Well Formed Substring Table" */
void setup_wfst(EST_Item *s, EST_Item *e,const EST_String &name="name");
//@}
/**@name parsing functions */
//@{
/// Parses the loaded WFST with the loaded grammar.
void parse();
/// Return the parse in full LISP form.
LISP find_parse();
/// Extract parse tree and add it to syn linking leafs to word
void extract_parse(EST_Relation *syn,EST_Relation *word,int force=0);
/// Extract parse tree and add it to syn linking leafs to items s to e
void extract_parse(EST_Relation *syn,EST_Item *s,
EST_Item *e,int force=0);
//@}
};
/** Build a relation from a LISP list of items.
*/
void EST_SCFG_chart_load_relation(EST_Relation *s,LISP sent);
/** Parse a given string using the given grammar.
*/
LISP scfg_parse(LISP string,LISP grammar);
/** Parse the given string using the given \Ref{EST_SCFG}.
*/
LISP scfg_parse(LISP string,EST_SCFG &grammar);
/** Parse named features in (list) relation Word into (tree)
** relation Syntax
*/
void scfg_parse(class EST_Relation *Word, const EST_String &name,
class EST_Relation *Syntax, EST_SCFG &grammar);
#endif