speech-tools/scripts/pm.prl

#!__PERL__ -w


 ###########################################################################
 ##                                                                       ##
 ##                Centre for Speech Technology Research                  ##
 ##                     University of Edinburgh, UK                       ##
 ##                         Copyright (c) 1996                            ##
 ##                        All Rights Reserved.                           ##
 ##                                                                       ##
 ##  Permission is hereby granted, free of charge, to use and distribute  ##
 ##  this software and its documentation without restriction, including   ##
 ##  without limitation the rights to use, copy, modify, merge, publish,  ##
 ##  distribute, sublicense, and/or sell copies of this work, and to      ##
 ##  permit persons to whom this work is furnished to do so, subject to   ##
 ##  the following conditions:                                            ##
 ##   1. The code must retain the above copyright notice, this list of    ##
 ##      conditions and the following disclaimer.                         ##
 ##   2. Any modifications must be clearly marked as such.                ##
 ##   3. Original authors' names are not deleted.                         ##
 ##   4. The authors' names are not used to endorse or promote products   ##
 ##      derived from this software without specific prior written        ##
 ##      permission.                                                      ##
 ##                                                                       ##
 ##  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ##
 ##  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ##
 ##  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ##
 ##  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ##
 ##  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ##
 ##  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ##
 ##  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ##
 ##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ##
 ##  THIS SOFTWARE.                                                       ##
 ##                                                                       ##
 ###########################################################################
 ##                                                                       ##
 ##                   Author: Richard caley (rjc@cstr.ed.ac.uk)           ##
 ##                     Date: June 1997                                   ##
 ## --------------------------------------------------------------------- ##
 ## Simple pitchmarking script. Not very clever, but produces             ##
 ## reasonable pitchmarks given good luck, a fair wind and an R in the    ##
 ## month.                                                                ##
 ##                                                                       ##
 ###########################################################################

sub useage
{
    print <<END;

    Useage: pm [-f] [-i] WAVE_FILE_NAME PM_FILE_NAME
	-f	Remake LPC analysis even if the files already exist.
	-i	Invert the waveform first.
	-d	Write intermediate results to help finding
		good parameter values

END
}

#__SHARED_SETUP__

while ($#ARGV>=0)
    {
    if ($ARGV[0] eq "-f")
	{
	$forcelpc=1;
	shift @ARGV;
	}
    elsif ($ARGV[0] eq "-i")
	{
	$invert=1;
	shift @ARGV;
	}
    elsif ($ARGV[0] eq "-d")
	{
	$debug=1;
	shift @ARGV;
	}
    else
	{
	last;
	}
    }

if ($#ARGV != 1)
    {
    useage();
    exit(1);
    }

$name=$ARGV[0];
$pmfile=$ARGV[1];

$name =~ m%([^/.]*)(\..*)?%;
$root = $1;

run("ch_wave $name -o $root.raw -otype raw");

open(W, "$root.raw") || die "can't open $ARGV[0]";
if ( $forcelpc || ! -f "${root}_res.raw")
    {
    run("lpc_analysis $name -o ${root}_slpc.esps -otype esps -r ${root}_res.nist -rtype nist -shift 0.005 -length 0.005 -window rectangle");
    run("ch_wave ${root}_res.nist  -o ${root}_res.raw -otype raw");
    }
open(R, "${root}_res.raw") || die "can't open ${root}_res.raw";
open(PM, ">$pmfile") || die "can't write $root.pm";

if ($debug)
    {
    open(RMS, ">${root}_rrms.ascii") || die "can't write ${root}_rrms.ascii";
    open(P, ">${root}_peak.ascii") || die "can't write ${root}_peak.ascii";
    }

print "reading\n";

while(1)
    {
    $n=sysread(W, $buf, 2000)/2;
    push(@samps, unpack("s$n", $buf));

    last
	if $n < 1000;
    }

if ($invert)
    {
    print "inverting\n";
    for($i=0; $i<= $#samps; $i++)
	{
	$samps[$i] = - $samps[$i];
	}
    }

while(1)
    {
    $n=sysread(R, $buf, 2000)/2;
    push(@resid, unpack("s$n", $buf));

    last
	if $n < 1000;
    }

print STDERR $#resid+1, " samples\n";

print "calculating\n";
for($i=0; $i <= $#samps; $i+=16)
    {
    $rsum = 0;
    $sum = 0;
    $max = 0;
    $zeroc = 0;
    $n = 0;
    for($j=$i-20; $j < $i+20; $j++)
	{
	next
	    if $j < 0 || $j >= $#samps;
	$max = $samps[$j]
	    if $samps[$j] > $max;
	$zeroc++
	    if $j > 0 && $samps[$j] * $samps[$j+1] < 0;
	$sum += $samps[$j] * $samps[$j];
	$n++;
	next
	    if $j < 0 || $j >= $#resid;
	$rsum += $resid[$j] * $resid[$j];
	}
    push(@max, $max);
    push(@rms, sqrt($sum/$n));
    push(@rrms, sqrt($rsum/$n));
    push(@zeroc, $zeroc);
    print RMS $rrms[$#rrms] * 10, "\n"
	if $debug;
    }

if ($debug)
    {
    close(RMS);
    run("ch_track ${root}_rrms.ascii -o ${root}_rrms.esps -otype esps -s 0.001");
    unlink "${root}_rrms.ascii";
    print "residual RMS track in ${root}_rrms.esps\n";
    }

print "finding\n";
for($i=0; $i <= $#rms; $i++)
    {
    if ($i>1 && $i < $#rms && $rrms[$i-1] < $rrms[$i] && $rrms[$i] > $rrms[$i+1] &&
	$zeroc[$i] < 15 &&
	$rms[$i] > 500)
	{
	push(@peaks, 2000);
	}
    else
	{
	push(@peaks, 0);
	}
    }

print "picking\n";
unshift(@pm, 0);

for($i=0; $i <= $#peaks; $i++)
    {
    print P "$peaks[$i]\n"
	if $debug;

    if ($peaks[$i] > 0)
	{
	$pm = find_peak(\@samps, ($i-1)*16, ($i+2)*16);

	push(@pm, $pm);
	}
    }

push(@pm, $#samps);

if ($debug)
    {
    close(P);
    run("ch_track ${root}_peak.ascii -o ${root}_peak.esps -otype esps -s 0.001");
    unlink "${root}_peak.ascii";
    print "potential peaks in ${root}_peak.esps\n";
    }

print "output\n";
$last_pm=0;
for($i=1; $i <= $#pm; $i++)
   {
   next
       if $pm[$i] - $last_pm < 50;
   if ($pm[$i] - $last_pm > 250)
       {
       $extra = ($pm[$i] - $last_pm) % 100;
       $num = int(($pm[$i] - $last_pm) / 100);
       $chunk = 160+$extra/$num;
       for($j=$last_pm+$chunk; $j < $pm[$i] - 50; $j += $chunk)
	   {
	   print PM ($j)/16.0, "\n";
	   }
       }
   print PM $pm[$i]/16.0, "\n";
   $last_pm = $pm[$i];
   }

sub find_peak
{
    my ($samps, $from, $to) = @_;
    my ($center) = ($to+$from)/2;
    my ($peak) = $from;
    my ($pwval) = $$samps[$peak]*(1-abs($from-$center)/32);
    my ($i);

    for($i=$from; $i < $to; $i++)
	{
	my ($wval) = $$samps[$i]*(1-abs($from-$center)/32);
	if ($wval > $pwval)
	    {
	    $peak = $i;
	    $pwval = $wval;
	    }
	}

    $peak;
}

sub run

{
    my ($command) = @_;

    print "\ndoing:\n    $command\n\n";
    system $command;
}