speech-tools/include/EST_PST.h
2015-09-19 10:52:26 +02:00

154 lines
6.8 KiB
C++

/*************************************************************************/
/* */
/* Centre for Speech Technology Research */
/* University of Edinburgh, UK */
/* Copyright (c) 1996 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author : Alan W Black */
/* Date : July 1996 */
/*-----------------------------------------------------------------------*/
/* */
/* A general class for PredictionSuffixTrees */
/* */
/*=======================================================================*/
#ifndef __PredictionSuffixTree_H__
#define __PredictionSuffixTree_H__
#include "EST_simplestats.h"
#include "EST_types.h"
#include "EST_Features.h"
class EST_PredictionSuffixTree_tree_node {
private:
protected:
int p_level;
int state;
EST_DiscreteProbDistribution pd;
EST_String path; /* context */
void delete_node(void *n) { if (n != 0) delete (EST_PredictionSuffixTree_tree_node *)n;}
public:
// EST_StringTrie nodes;
// EST_TKVL <EST_String, EST_PredictionSuffixTree_tree_node *> nodes;
EST_Features nodes;
EST_PredictionSuffixTree_tree_node() {p_level=0;}
~EST_PredictionSuffixTree_tree_node();
void clear(void);
const EST_String &get_path(void) const {return path;}
void set_path(const EST_String &s) {path=s;}
void set_level(int l) {p_level=l;}
void set_state(int s) {state=s;}
int get_state(void) const {return state;}
int get_level(void) const {return p_level;}
void cumulate(const EST_String &s,double count=1) {pd.cumulate(s,count);}
void cumulate(const int i,double count=1) {pd.cumulate(i,count);}
const EST_String &most_probable(double *p) const;
const EST_DiscreteProbDistribution &prob_dist() const {return pd;}
void print_freqs(ostream &os);
void print_probs(ostream &os);
};
VAL_REGISTER_CLASS_DCLS(pstnode,EST_PredictionSuffixTree_tree_node)
class EST_PredictionSuffixTree {
private:
enum EST_filetype {PredictionSuffixTree_ascii, PredictionSuffixTree_binary};
protected:
int p_order;
int num_states;
EST_PredictionSuffixTree_tree_node *nodes;
EST_DiscreteProbDistribution *pd; // distribution of predictees
const EST_String &ppredict(EST_PredictionSuffixTree_tree_node *node,
const EST_StrVector &words,
double *prob, int *state,
const int index=0) const;
void p_accumulate(EST_PredictionSuffixTree_tree_node *node,
const EST_StrVector &words,
double count,
const int index=0);
const EST_DiscreteProbDistribution &p_prob_dist(
EST_PredictionSuffixTree_tree_node *node,
const EST_StrVector &words,
const int index=0) const;
public:
EST_PredictionSuffixTree();
EST_PredictionSuffixTree(const int order) {init(order);}
EST_PredictionSuffixTree(const EST_String filename);
EST_PredictionSuffixTree(const EST_TList<EST_String> &vocab,int order=2);
~EST_PredictionSuffixTree();
void clear(void);
void init(const int order);
double samples() const { return pd->samples(); }
int states() const { return num_states; }
int order(void) const {return p_order;}
void accumulate(const EST_StrVector &words,const double count=1,const int index=0);
int load(const EST_String filename);
int save(const EST_String filename,const EST_PredictionSuffixTree::EST_filetype type=PredictionSuffixTree_ascii);
// build EST_PredictionSuffixTree from train data
void build(const EST_String filename,
const EST_String prev,
const EST_String prev_prev,
const EST_String last);
void build(const EST_StrList &input); // to go
void test(const EST_String filename); // test EST_PredictionSuffixTree against test data
void print_freqs(ostream &os);
void print_probs(ostream &os);
const EST_String &predict(const EST_StrVector &words) const;
const EST_String &predict(const EST_StrVector &words,double *prob) const;
const EST_String &predict(const EST_StrVector &words,double *prob,int *state) const;
const EST_DiscreteProbDistribution &prob_dist(const EST_StrVector &words)
const
{return p_prob_dist(nodes,words);}
/* Reverse probability, given X what is prob of EST_PredictionSuffixTree Y */
double rev_prob(const EST_StrVector &words) const;
double rev_prob(const EST_StrVector &words,
const EST_DiscreteProbDistribution &pd) const;
/* print frequency or probability models */
/* build model from file */
/* predict and measure success */
};
#endif // __PredictionSuffixTree_H__