speech-tools/stats/EST_viterbi.cc

/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                       Copyright (c) 1996,1997                         */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                 Authors:  Alan W Black                                */
/*                 Date   :  July 1996                                   */
/*-----------------------------------------------------------------------*/
/*  A viterbi decoder                                                    */
/*                                                                       */
/*  User provides the candidates, target and combine score function      */
/*  and it searches for the best path through the candidates             */
/*                                                                       */
/*=======================================================================*/
#include <cstdio>
#include "EST_viterbi.h"

static void init_paths_array(EST_VTPoint *n,int num_states);
static void debug_output_1(EST_VTPoint *p,EST_VTCandidate *c,
			   EST_VTPath *np, int i);

EST_VTPoint::~EST_VTPoint()
{
    int i;

    if (paths != 0) delete paths;
    if (num_states != 0)
    {
	for (i=0; i<num_states; i++)
	    if (st_paths[i] != 0)
		delete st_paths[i];
	delete [] st_paths;
    }
    if (cands != 0) delete cands;
    if (next != 0) delete next;
}

EST_Viterbi_Decoder::EST_Viterbi_Decoder(uclist_f_t a, unpath_f_t b)
  : vit_a_big_number(1.0e10)
{
    beam_width = 0;
    cand_width = 0;
    user_clist = a;
    user_npath = b;
    num_states = 0;

    do_pruning = FALSE;
    overall_path_pruning_envelope_width = -1;
    candidate_pruning_envelope_width = -1;

    debug = FALSE;
    trace = FALSE;
    big_is_good = TRUE;  // for probabilities it is
}

EST_Viterbi_Decoder::EST_Viterbi_Decoder(uclist_f_t a, unpath_f_t b, int s)
  : vit_a_big_number(1.0e10)
{
    beam_width = 0;
    cand_width = 0;
    user_clist = a;
    user_npath = b;

    do_pruning = FALSE;
    overall_path_pruning_envelope_width = -1;
    candidate_pruning_envelope_width = -1;

    // if s == -1 number of states is determined by number of cands
    // this is specific to a particular search but very efficient
    num_states = s;  // can do the lattice search more directly
    debug = FALSE;
    trace = FALSE;
    big_is_good = TRUE;  // for probabilities it is
}

EST_Viterbi_Decoder::~EST_Viterbi_Decoder()
{
    delete timeline;
}

void EST_Viterbi_Decoder::initialise(EST_Relation *p)
{
    // Creates a time line with each point pointing at each item in p
    EST_Item *i;
    EST_VTPoint *t = 0,*n;

    for (i=p->head(); i != 0; i=i->next())
    {
	n = new EST_VTPoint;
	n->s = i;
	if (num_states > 0)
	    init_paths_array(n,num_states);
	if (t == 0)
	    timeline = n;
	else
	    t->next = n;
	t=n;
    }
    // Extra one at the end for final paths
    n = new EST_VTPoint;
    if (num_states > 0)
	init_paths_array(n,num_states);

    // Need init path on first point so a search can start
    if (num_states == 0)   // general search case
	timeline->paths = new EST_VTPath;
    if (num_states == -1)
	init_paths_array(timeline,1);  // a start point

    if (t == 0)
	timeline = n;   // its an empty stream
    else
	t->next = n;
}

static void init_paths_array(EST_VTPoint *n,int num_states)
{
    // Create the states array and initialize it
    int j;

    n->num_states = num_states;
    n->st_paths = new EST_VTPath*[num_states];
    for (j=0;j<num_states;j++)
	n->st_paths[j] = 0;
}

const int EST_Viterbi_Decoder::betterthan(const float a,const float b) const
{
    // Some thing big is better, others want it to be as small as possible
    // this just tells you if a is better than b by checking the variable
    // in the decoder itself.

    // For probabilities big_is_good == TRUE (or log probabilities)

    if (big_is_good)
	return (a > b);
    else
	return (a < b);
}

static int init_dynamic_states(EST_VTPoint *p, EST_VTCandidate *cands)
{
    // In a special (hmm maybe not so special), the number of "states"
    // is the number of candidates
    EST_VTCandidate *c;
    int i;

    for (i=0, c=cands; c != 0; c=c->next,i++)
	c->pos = i;
    init_paths_array(p,i);

    return i;
}

void EST_Viterbi_Decoder::set_pruning_parameters(float beam, float
						 ob_beam)
{

    do_pruning = TRUE;
    overall_path_pruning_envelope_width = beam;
    candidate_pruning_envelope_width = ob_beam;

}

void EST_Viterbi_Decoder::turn_on_debug()
{
    debug = TRUE;
}

void EST_Viterbi_Decoder::turn_on_trace()
{
    trace = TRUE;
}


void EST_Viterbi_Decoder::search(void)
{
    // Searches for the best path
    EST_VTPoint *p;
    EST_VTPath *t,*np;
    EST_VTCandidate *c;
    int i=0;

    double best_score=0.0,score_cutoff=0.0;
    double best_candidate_score=0.0,candidate_cutoff=0;
    int dcount,pcount;
    int cand_count=0, cands_considered=0;

    for (p=timeline; p->next != 0; p=p->next)
    {   // For each point in time
	// Find the candidates
	p->cands  = (*user_clist)(p->s,f);  // P(S|B)
	if (do_pruning)
	    prune_initialize(p,best_score,best_candidate_score,
			     score_cutoff,candidate_cutoff,
			     cand_count);
	if (num_states != 0)  // true viterbi -- optimized for states
	{
	    if (num_states == -1)  // special case, dynamic state size
		init_dynamic_states(p->next,p->cands);

	    cands_considered=0; // moved by simonk
	    for (i=0; i<p->num_states; i++)
	    {   // for each path up to here
		//cands_considered=0;
		if (((p == timeline) && i==0) || (p->st_paths[i] != 0))
		    for (c=p->cands; c != 0; c=c->next)
		    {
			// for each new candidate
			// candidate pruning (a.k.a. observation pruning)
			if(!do_pruning ||
			   betterthan(c->score,candidate_cutoff))
			{
			    cands_considered++;
			    // Find path extension costs
			    np = (*user_npath)(p->st_paths[i],c,f);
			    if (debug) debug_output_1(p,c,np,i);
			    if (do_pruning && betterthan(np->score,best_score))
			    {
				best_score = np->score;
				if(big_is_good)
				    score_cutoff = best_score
					- overall_path_pruning_envelope_width;
				else
				    score_cutoff = best_score
					+ overall_path_pruning_envelope_width;
			    }
			    // can prune here, although score_cutoff will
			    // be generally too generous at this point.
			    // It's unclear whether this pruning takes more
			    // time than it saves !
			    if(!do_pruning||betterthan(np->score,score_cutoff))
				vit_add_paths(p->next,np);
			    else
				delete np;
			}
		    }
	    }

	    if (do_pruning)
	    {
		if(big_is_good)
		    score_cutoff =
			best_score - overall_path_pruning_envelope_width;
		else
		    score_cutoff =
			best_score + overall_path_pruning_envelope_width;
		if(trace)
		{
		    cerr << "Considered " << cands_considered << " of ";
		    cerr << cand_count*p->num_states << " candidate paths" << endl;
		    cerr << "FRAME: best score " << best_score;
		    cerr << "  score cutoff " << score_cutoff << endl;
		    cerr << "       best candidate score " << best_candidate_score;
		    cerr << "  candidate cutoff " << candidate_cutoff << endl;
		}

		dcount=0; pcount=0;
		for (i=0; i<p->next->num_states; i++)
		    if(p->next->st_paths[i] != 0)
		    {
			pcount++;
			if(!betterthan(p->next->st_paths[i]->score,
				       score_cutoff))
			{
			    delete p->next->st_paths[i];
			    p->next->st_paths[i] = 0;
			    dcount++;
			}
		    }
		if(trace)
		{
		    cerr << "Pruned " << dcount << " of " << pcount;
		    cerr << " paths" << endl << endl;
		}
	    }
	}
	else                  // general beam search
	    for (t=p->paths; t != 0; t=t->next)
	    {   // for each path up to here
		for (c=p->cands; c != 0; c=c->next)
		{   // for each new candidate
		    np = (*user_npath)(t,c,f);
		    add_path(p->next,np);      // be a little cleverer
		}
	    }
	if (debug) fprintf(stdout,"\n");
    }

}

void EST_Viterbi_Decoder::
     prune_initialize(EST_VTPoint *p,
		      double &best_score, double &best_candidate_score,
		      double &score_cutoff, double &candidate_cutoff,
		      int &cand_count)
{   // Find best candidate, count them and set some vars.
    EST_VTCandidate *c;
    if (big_is_good)
    {
	best_score = -vit_a_big_number;
	best_candidate_score = -vit_a_big_number;
	score_cutoff = -vit_a_big_number;
	candidate_cutoff = - candidate_pruning_envelope_width;
    }
    else
    {
	best_candidate_score = vit_a_big_number;
	best_score = vit_a_big_number;
	score_cutoff = vit_a_big_number;
	candidate_cutoff = candidate_pruning_envelope_width;
    }

    for (cand_count=0,c=p->cands; c; c=c->next,cand_count++)
	if (betterthan(c->score,best_candidate_score))
	    best_candidate_score = c->score;
    candidate_cutoff += best_candidate_score;
}

static void debug_output_1(EST_VTPoint *p,EST_VTCandidate *c,
			   EST_VTPath *np,int i)
{
    printf("%s: ",(const char *)p->s->name());
    cout << c->name;
    printf(" %1.3f B %1.3f (%1.3f) st %d s %1.3f ",
	   np->c->score,
	   (np->c->score==0 ? 0 :
	    ((float)np->f("lscore"))/np->c->score),
	   (float)np->f("lscore"),np->state,
	   np->score);
    if (p->st_paths[i] == 0)
	cout << "(I)" << endl;
    else
	cout << p->st_paths[i]->c->name << endl;
}

void EST_Viterbi_Decoder::vit_add_paths(EST_VTPoint *pt, EST_VTPath *np)
{
    // Add set of paths
    EST_VTPath *p,*next_p;

    for (p=np; p != 0; p=next_p)
    {
	next_p = p->next;  // need to save this as p could be deleted
	vit_add_path(pt,p);
    }

}
void EST_Viterbi_Decoder::vit_add_path(EST_VTPoint *p, EST_VTPath *np)
{
    // We are doing true viterbi so we need only keep the best
    // path for each state.  This means we can index into the
    // array of paths ending at P and only keep np if its score
    // is better than any other path of that state

    if ((np->state < 0) || (np->state > p->num_states))
    {
	cerr << "EST_Viterbi: state too big (" << np->state << ")" << endl;
    }
    else if ((p->st_paths[np->state] == 0) ||
	     (betterthan(np->score,p->st_paths[np->state]->score)))
    {
	// This new one is better so replace it
	if (p->st_paths[np->state] != 0)
	    delete p->st_paths[np->state];
	p->st_paths[np->state] = np;
    }
    else
	delete np;
}

bool EST_Viterbi_Decoder::vit_prune_path(double path_score, double score_cutoff)
{

    // a bit of a simple function !!

    // if it falls below cutoff, then prune point
    // typically will only be one path at this point anyway (Viterbi)
    if(!betterthan(path_score,score_cutoff))
	return TRUE;

    return FALSE;
}


void EST_Viterbi_Decoder::add_path(EST_VTPoint *p, EST_VTPath *np)
{
    // Add new path to point,  Prunes as appropriate to strategy
    EST_VTPath *pp;

    if (p == 0)
	cerr << "Viterbi: tried to add path to NULL point\n";
    else
    {
	if ((beam_width == 0) ||            // Add if no beam restrictions or
	    (p->num_paths < beam_width) ||  //        beam not filled  or
	    (betterthan(np->score,p->paths->score)))//this is better than best
//	    (np->score > p->paths->score))  //        this is better than best
	{
	    EST_VTPath **l = &p->paths;
	    EST_VTPath *a;

	    for (a=p->paths; /* scary */ ; a=a->next)
	    {
		if ((a == 0) || (betterthan(a->score,np->score)))
		{   // Add new path here
		    np->next = a;
		    *l = np;
		    p->num_paths++;
		    break;
		}
		l = &a->next;
	    }
	    // Prune the first one of the list
	    if ((beam_width > 0) &&
		(p->num_paths > beam_width))
	    {
		pp = p->paths;
		p->paths = pp->next;
		pp->next = 0;
		p->num_paths--;
		delete pp;
	    }
	}
	else
	{
	    delete np;  // failed to make it
	}
    }

}

EST_VTCandidate *EST_Viterbi_Decoder::add_cand_prune(EST_VTCandidate *newcand,
					     EST_VTCandidate *allcands)
{
    // Add newcand to allcand, in score order and prune it to
    // cand_width, delete newcand if its not good enough
    EST_VTCandidate *newlist = allcands;
    EST_VTCandidate *pp;
    int numcands;

    if (allcands == 0)
	numcands = 0;
    else
	numcands = allcands->pos;

    if ((cand_width == 0) ||	// Add if no candbeam restrictions or
	(numcands < cand_width) || //        candbeam not filled  or
	(betterthan(newcand->score,allcands->score))) //this better than best
    {
	EST_VTCandidate **l = &newlist;
	EST_VTCandidate *a;

	for (a=newlist;		/* scary */ ; a=a->next)
	{
	    if ((a == 0) || (betterthan(a->score,newcand->score)))
	    {			// Add new path here
		newcand->next = a;
		*l = newcand;
		numcands++;
		break;
	    }
	    l = &a->next;
	}
	// Prune the first one off the list
	if ((cand_width > 0) &&
	    (numcands > cand_width))
	{
	    pp = newlist;
	    newlist = pp->next;
	    pp->next = 0;
	    numcands--;
	    delete pp;
	}
    }
    else
	delete newcand;		// failed to make it

    newlist->pos = numcands;
    return newlist;
}

bool EST_Viterbi_Decoder::result(const EST_String &n)
{
    // Finds best path through the search space (previously searched)
    // Adds field to the EST_Items, named n, with chosen value
    EST_VTPath *p;

    if ((timeline == 0) || (timeline->next == 0))
	return TRUE;  // it's an empty list so it has succeeded
    p = find_best_end();
    if (p == 0)
	return FALSE; // there isn't any answer

    for (; p != 0; p=p->from)
    {
	// Hmm need the original EST_Item
	if (p->c != 0)
	{
	    p->c->s->set_val(n,p->c->name);
	    p->c->s->set(n+"_score",p->f.F("lscore",0.0));
	}
    }
    return TRUE;
}

bool EST_Viterbi_Decoder::result( EST_VTPath **bestPathEnd )
{
  // Finds best path through the search space (previously searched)
  *bestPathEnd = 0;

  if ((timeline == 0) || (timeline->next == 0))
    return TRUE;  // it's an empty list so it has succeeded

  *bestPathEnd = find_best_end();

  if (*bestPathEnd == 0)
    return FALSE; // there isn't any answer

  return TRUE;
}


void EST_Viterbi_Decoder::copy_feature(const EST_String &n)
{
    // Copy feature from path to related stream
    EST_VTPath *p;

    p = find_best_end();
    if(p == 0)
	return;

    for (; p != 0; p=p->from)
    {
	// Hmm need the original EST_Item
	if ((p->c != 0) && (p->f.present(n)))
	    p->c->s->set_val(n,p->f(n));
    }
}

EST_VTPath *EST_Viterbi_Decoder::find_best_end() const
{
    EST_VTPoint *t;
    double best,worst;
    EST_VTPath *p, *best_p=0;
    int i;
    // I'd like to use HUGE_VAL or FLT_MAX for this but its not portable
    // (on alphas)

    if (big_is_good)
	worst = -vit_a_big_number;	// worst possible;
    else
	worst = vit_a_big_number;	// worst possible;
    best = worst;		// hopefully we can find something better;

    for (i=0,t=timeline; t->next != 0; t=t->next,i++)
    {
	if ((t->num_states == 0) && (t->num_paths == 0))
	{
	    cerr << "No paths at frame " << i << " " << t->s->name() << endl;
	    return 0;
	}
    }

    if (num_states != 0)
	for (i=0; i<t->num_states; i++)
	{
	    if ((t->st_paths[i] != 0) &&
		(betterthan(t->st_paths[i]->score,best)))
	    {
		best = t->st_paths[i]->score;
		best_p = t->st_paths[i];
	    }
	}
    else
	for (p=t->paths; p!=0; p=p->next)
	{
	    if (betterthan(p->score,best))
	    {
		best = p->score;
		best_p = p;
	    }
	}


    if (debug)
    {
	if (best == worst)
	    cerr << "Failed to find path" << endl;
	cout << "Best score is " << best << endl;
    }

    return best_p;
}