552 lines
17 KiB
Scheme
552 lines
17 KiB
Scheme
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; ;;
|
|
;;; Centre for Speech Technology Research ;;
|
|
;;; University of Edinburgh, UK ;;
|
|
;;; Copyright (c) 2002 ;;
|
|
;;; All Rights Reserved. ;;
|
|
;;; ;;
|
|
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
|
;;; this software and its documentation without restriction, including ;;
|
|
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
|
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
|
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
|
;;; the following conditions: ;;
|
|
;;; 1. The code must retain the above copyright notice, this list of ;;
|
|
;;; conditions and the following disclaimer. ;;
|
|
;;; 2. Any modifications must be clearly marked as such. ;;
|
|
;;; 3. Original authors' names are not deleted. ;;
|
|
;;; 4. The authors' names are not used to endorse or promote products ;;
|
|
;;; derived from this software without specific prior written ;;
|
|
;;; permission. ;;
|
|
;;; ;;
|
|
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
|
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
|
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
|
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
|
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
|
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
|
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
|
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
|
;;; THIS SOFTWARE. ;;
|
|
;;; ;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Author: Rob Clark
|
|
;;; Date: July 2002
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;
|
|
;; Sets up the current voice to synthesise from APML.
|
|
;;
|
|
;;
|
|
|
|
(require 'apml_f2bf0lr)
|
|
(require 'apml_kaldurtreeZ)
|
|
|
|
;; Default pitch settings (if unspecified in current voice.)
|
|
|
|
(defvar apml_default_pitch_mean 170 )
|
|
(defvar apml_default_pitch_standard_deviation 34 )
|
|
|
|
;; apml sythesis wrappers.
|
|
|
|
(define (apml_client_synth apml)
|
|
"(apml_client_synth apml)
|
|
Synthesise apml and return waveform(s) to client."
|
|
(utt.send.wave.client (apml_synth apml)))
|
|
|
|
(define (apml_synth apml)
|
|
"(apml_synth xml)
|
|
Synthesis an apml string."
|
|
(let ((tmpfile (make_tmp_filename))
|
|
utt)
|
|
(string_to_file tmpfile apml)
|
|
(set! utt (apml_file_synth tmpfile))
|
|
(delete-file tmpfile)
|
|
utt))
|
|
|
|
(define (apml_file_synth filename)
|
|
"(apml_file_synth filename)
|
|
Synthesis an apml file."
|
|
(let ((utt (Utterance Tokens nil)))
|
|
(utt.load utt filename)
|
|
(utt.synth utt)))
|
|
|
|
(define (string_to_file file s)
|
|
"(string_to_file file string)
|
|
Write string to file."
|
|
(let ((fd))
|
|
(set! fd (fopen file "wb"))
|
|
(format fd "%s" s)
|
|
(fclose fd)))
|
|
|
|
|
|
;;;
|
|
;;; Phrasing.
|
|
;;;
|
|
|
|
;; phrasing CART.
|
|
;
|
|
; It has been decided that by default, only punctuation should affect
|
|
; phrasing (and subsequently pauses)
|
|
;
|
|
(set! apml_phrase_tree
|
|
'
|
|
((lisp_apml_punc in ("?" "." ":")) ; big punctuation
|
|
((BB))
|
|
((lisp_apml_punc in ("'" "\"" "," ";")) ; else little punctuation
|
|
((B))
|
|
((lisp_apml_last_word is 1)
|
|
((BB)) ; need a BB at the end!
|
|
((NB)))))) ; else nothing
|
|
|
|
;; feature functions for phrasing
|
|
(define (apml_punc word)
|
|
(item.feat (item.relation.parent word 'Token) 'punc))
|
|
|
|
(define (apml_last_word word)
|
|
(if (item.next word)
|
|
"0" "1"))
|
|
|
|
|
|
;;;
|
|
;;; Pauses
|
|
;;;
|
|
|
|
;; feature functions for pauses
|
|
(define (apml_is_pause word)
|
|
(if (item.relation (item.relation.parent word 'Token) 'Pause)
|
|
t
|
|
nil))
|
|
|
|
(define (apml_pause word)
|
|
(if (item.relation word 'Pause)
|
|
(item.feat (item.relation.parent (item.relation.parent word 'Token) 'Pause) "sec")
|
|
0))
|
|
|
|
(define (Apml_Pauses utt)
|
|
"(Pauses UTT)
|
|
Predict pause insertion for apml."
|
|
(let ((words (utt.relation.items utt 'Word)) lastword tpname)
|
|
(if words
|
|
(begin
|
|
(insert_initial_pause utt) ;; always have a start pause
|
|
(set! lastword (car (last words)))
|
|
(mapcar
|
|
(lambda (w)
|
|
(let ((pbreak (item.feat w "pbreak"))
|
|
(emph (item.feat w "R:Token.parent.EMPH")))
|
|
(cond
|
|
((apml_is_pause w)
|
|
(insert_pause utt w))
|
|
((or (string-equal "B" pbreak)
|
|
(string-equal "BB" pbreak))
|
|
(insert_pause utt w))
|
|
((equal? w lastword)
|
|
(insert_pause utt w)))))
|
|
words)
|
|
;; The embarassing bit. Remove any words labelled as punc or fpunc
|
|
(mapcar
|
|
(lambda (w)
|
|
(let ((pos (item.feat w "pos")))
|
|
(if (or (string-equal "punc" pos)
|
|
(string-equal "fpunc" pos))
|
|
(let ((pbreak (item.feat w "pbreak"))
|
|
(wp (item.relation w 'Phrase)))
|
|
(if (and (string-matches pbreak "BB?")
|
|
(item.relation.prev w 'Word))
|
|
(item.set_feat
|
|
(item.relation.prev w 'Word) "pbreak" pbreak))
|
|
(item.relation.remove w 'Word)
|
|
;; can't refer to w as we've just deleted it
|
|
(item.relation.remove wp 'Phrase)))))
|
|
words)))
|
|
utt))
|
|
|
|
|
|
|
|
;;;
|
|
;;; Intonation.
|
|
;;;
|
|
|
|
;; Accent prediction (well transfer really).
|
|
;;
|
|
;; We treat L+H* L-H% on a single syllable as a special case.
|
|
|
|
(set! apml_accent_cart
|
|
'
|
|
((lisp_apml_accent is "Hstar")
|
|
((H*))
|
|
((lisp_apml_accent is "Lstar")
|
|
((L*))
|
|
((lisp_apml_LHLH is "LHLH")
|
|
((L+H*L-H%))
|
|
((lisp_apml_accent is "LplusHstar")
|
|
((L+H*))
|
|
((lisp_apml_accent is "LstarplusH")
|
|
((L*+H))
|
|
((NONE))))))))
|
|
|
|
(set! apml_boundary_cart
|
|
'
|
|
((lisp_apml_boundary is "LL")
|
|
((L-L%))
|
|
((lisp_apml_LHLH is "LHLH")
|
|
((NONE)) ; this is dealt with by the accent feature
|
|
((lisp_apml_boundary is "LH")
|
|
((L-H%))
|
|
((lisp_apml_boundary is "HH")
|
|
((H-H%))
|
|
((lisp_apml_boundary is "HL")
|
|
((H-L%))
|
|
((NONE))))))))
|
|
|
|
;; feature functions.
|
|
(define (apml_accent syl)
|
|
(let ((token (item.relation.parent (item.relation.parent syl 'SylStructure) 'Token)))
|
|
(if (and (eq (item.feat syl 'stress) 1)
|
|
(item.relation.parent token 'Emphasis))
|
|
(item.feat (item.relation.parent token 'Emphasis) 'x-pitchaccent)
|
|
0)))
|
|
|
|
(define (apml_boundary syl)
|
|
(let ((token (item.relation.parent (item.relation.parent syl 'SylStructure) 'Token)))
|
|
(if (and (> (item.feat syl 'syl_break) 0)
|
|
(item.relation.parent token 'Boundary))
|
|
(item.feat (item.relation.parent token 'Boundary) 'type)
|
|
0)))
|
|
|
|
(define (apml_LHLH syl)
|
|
(let ((accent (apml_accent syl))
|
|
(boundary (apml_boundary syl)))
|
|
(if (and (string-equal accent "LplusHstar")
|
|
(string-equal boundary "LH"))
|
|
"LHLH"
|
|
0)))
|
|
|
|
|
|
(define (apml_seg_is_LHLH_vowel seg)
|
|
(if (and (string-equal (apml_LHLH (item.relation.parent seg 'SylStructure))
|
|
"LHLH")
|
|
(string-equal (item.feat seg 'ph_vc) "+"))
|
|
"LHLH"
|
|
0))
|
|
|
|
|
|
;;;; feature functions:
|
|
|
|
(define (apml_tgtype syl)
|
|
(let ((l (apml_boundl (item.relation.parent syl 'SylStructure)))
|
|
(r (apml_boundr (item.relation.parent syl 'SylStructure))))
|
|
(if (eq (item.feat syl 'accented) 0)
|
|
0 ; this is a quirk related to the way the models were trained
|
|
(cond
|
|
((eq l 0)
|
|
1)
|
|
((eq r 1)
|
|
3)
|
|
(t 2)))))
|
|
|
|
|
|
(define (apml_iecount syl)
|
|
(if (eq (item.feat syl 'accented) 0)
|
|
0 ; this is a quirk related to the way the models were trained
|
|
(+ (item.feat syl 'asyl_in) 1)))
|
|
|
|
;; suport functions.
|
|
(define (apml_boundl word)
|
|
"(apml_boundl word)
|
|
Number of boundaries in this performative to the left of this word."
|
|
(let ((w (item.prev word))
|
|
(c 0))
|
|
(while (and w (apml_same_p w word))
|
|
(if (item.relation.parent (item.relation.parent w 'Token) 'Boundary)
|
|
(set! c (+ c 1)))
|
|
(set! w (item.prev w)))
|
|
c))
|
|
|
|
(define (apml_boundr word)
|
|
"(apml_boundr word)
|
|
Number of boundaries in this performative to the right of this word."
|
|
(let ((w word)
|
|
(c 0))
|
|
(while (and w (apml_same_p w word))
|
|
(if (item.relation.parent (item.relation.parent w 'Token) 'Boundary)
|
|
(set! c (+ c 1)))
|
|
(set! w (item.next w)))
|
|
c))
|
|
|
|
(define (apml_same_p w1 w2)
|
|
"(apml_same_p w1 w2)
|
|
Are these two words in the same performative?"
|
|
(let ((p1 (item.relation.parent (item.relation.parent w1 'Token) 'SemStructure))
|
|
(p2 (item.relation.parent (item.relation.parent w1 'Token) 'SemStructure)))
|
|
(if (and (item.parent p1) (item.parent p2)) ; not true if theme/rheme omitted.
|
|
(equal? (item.parent p1) (item.parent p2))
|
|
(equal? p1 p2))))
|
|
|
|
;;;
|
|
;;; segment timings
|
|
;;;
|
|
|
|
(define (apml_seg_times utt)
|
|
"(apml_seg_times utt)
|
|
Output the segment timings for an apml utterance."
|
|
(let ((segs (utt.relation.items utt 'Segment)))
|
|
(mapcar
|
|
(lambda (x)
|
|
(format t "%s %s\n" (item.name x) (item.feat x 'end)))
|
|
segs)
|
|
t))
|
|
|
|
;;;
|
|
;;; Additional functions for f0model.
|
|
;;;
|
|
|
|
|
|
(define (find_hstar_left syl)
|
|
"(find_hstar_left syl)
|
|
If the closest accent or boundary to the left is H* return how many syllables away it is. Returns 0 if nearest accent is not H*"
|
|
(let ((count 0))
|
|
;; if this syllable has a pitch event
|
|
(if (or (not (string-equal (item.feat syl 'tobi_accent) "NONE"))
|
|
(not (string-equal (item.feat syl 'tobi_endtone) "NONE")))
|
|
0)
|
|
(while (and syl
|
|
(string-equal (item.feat syl 'tobi_accent) "NONE")
|
|
(string-equal (item.feat syl 'tobi_endtone) "NONE"))
|
|
(set! count (+ count 1))
|
|
(set! syl (item.prev syl)))
|
|
(cond
|
|
;; run out of syllables before finding accent
|
|
((null syl)
|
|
0)
|
|
((string-equal (item.feat syl 'tobi_accent) "H*")
|
|
count)
|
|
(t 0))))
|
|
|
|
(define (find_ll_right syl)
|
|
"(find_ll_right syl)
|
|
If the closest accent or boundary to the right is L-L% return how many syllables away it is. Returns 0 if nearest is not L-L%."
|
|
(let ((count 0))
|
|
;; if this syllable has a pitch event
|
|
(if (or (not (string-equal (item.feat syl 'tobi_accent) "NONE"))
|
|
(not (string-equal (item.feat syl 'tobi_endtone) "NONE")))
|
|
0)
|
|
(while (and syl
|
|
(string-equal (item.feat syl 'tobi_accent) "NONE")
|
|
(string-equal (item.feat syl 'tobi_endtone) "NONE"))
|
|
(set! count (+ count 1))
|
|
(set! syl (item.next syl)))
|
|
(cond
|
|
;; run out of syllables before finding boundary
|
|
((null syl)
|
|
0)
|
|
((string-equal (item.feat syl 'tobi_endtone) "L-L%")
|
|
count)
|
|
(t 0))))
|
|
|
|
(define (l_spread syl)
|
|
"(l_spread syl)
|
|
Proportion of pitch lowering required due to L- spreading backwards."
|
|
(let ((l (find_hstar_left syl))
|
|
(r (find_ll_right syl)))
|
|
(cond
|
|
((or (eq l 0)
|
|
(eq r 0))
|
|
0)
|
|
(t
|
|
(/ r (- (+ l r) 1))))))
|
|
|
|
|
|
;;;
|
|
;;; Debuging and other useful stuff.
|
|
;;;
|
|
|
|
|
|
|
|
(define (apml_print_semstruct utt)
|
|
"(apml_print_semstruct utt)
|
|
Pretty print APML semantic structure."
|
|
(let ((i (utt.relation.first utt 'SemStructure)))
|
|
(while (not (null i))
|
|
(apml_pss_item 0 i)
|
|
(apml_pss_daughters 1 (item.daughters i))
|
|
(set! i (item.next i)))))
|
|
|
|
(define (apml_pss_daughters depth list)
|
|
(mapcar
|
|
(lambda (x)
|
|
(apml_pss_item depth x)
|
|
(apml_pss_daughters (+ depth 1) (item.daughters x))
|
|
)
|
|
list))
|
|
|
|
|
|
(define (apml_pss_item depth item)
|
|
(let ((c 0))
|
|
(while (< c depth)
|
|
(format t " ")
|
|
(set! c (+ c 1)))
|
|
(format t "%s\n" (item.name item))))
|
|
|
|
|
|
(define (apml_print_words utt)
|
|
"(apml_print_words utt)
|
|
Pretty print APML words with associated accents."
|
|
(mapcar
|
|
(lambda (x)
|
|
(format t "%s (" (item.name x))
|
|
(apml_pww_accent x)
|
|
(apml_pww_boundary x)
|
|
(apml_pww_pause x)
|
|
(format t ")\n"))
|
|
(utt.relation.items utt 'Word))
|
|
t)
|
|
|
|
(define (apml_pww_accent item)
|
|
(let ((p (item.relation.parent (item.relation.parent item 'Token) 'Emphasis)))
|
|
(if p (apml_ppw_list (item.features p)))))
|
|
|
|
(define (apml_pww_boundary item)
|
|
(let ((p (item.relation.parent (item.relation.parent item 'Token) 'Boundary)))
|
|
(if p (apml_ppw_list (item.features p)))))
|
|
|
|
(define (apml_pww_pause item)
|
|
(let ((p (item.relation.parent (item.relation.parent item 'Token) 'Pause)))
|
|
(if p (apml_ppw_list (item.features p)))))
|
|
|
|
(define (apml_ppw_list l)
|
|
(mapcar
|
|
(lambda (x)
|
|
(format t " %s" x))
|
|
(flatten l)))
|
|
|
|
|
|
(define (apml_print_sylstructure utt filename)
|
|
"(apml_print_sylstructure utt filename)
|
|
Pretty print APML syllable structure. Filename t for stdout"
|
|
(let (fd)
|
|
(if (not (eq? filename t))
|
|
(set! fd (fopen filename "wb"))
|
|
(set! fd t))
|
|
(mapcar
|
|
(lambda (x)
|
|
(format fd "%s\n" (item.name x))
|
|
(apml_psyl fd x))
|
|
(utt.relation.items utt 'Word))
|
|
t))
|
|
|
|
(define (apml_psyl fd word)
|
|
(mapcar
|
|
(lambda (x)
|
|
(apml_psegs fd x)
|
|
(if (eq (item.feat x 'stress) 1)
|
|
(format fd " (1)"))
|
|
(if (item.relation.daughter1 x 'Intonation)
|
|
(begin
|
|
(let ((ie (item.relation.daughter1 x 'Intonation)))
|
|
(format fd " [")
|
|
(while ie
|
|
(format fd "%s" (item.name ie))
|
|
(set! ie (item.next ie))
|
|
(if ie (format t " ")))
|
|
(format fd "]"))))
|
|
(format fd "\n"))
|
|
(item.daughters (item.relation word 'SylStructure))))
|
|
|
|
(define (apml_psegs fd syl)
|
|
(let ((segs (item.daughters syl)))
|
|
(format fd " ")
|
|
(while segs
|
|
(format fd "%s" (item.name (car segs)))
|
|
(if (cdr segs)
|
|
(format fd "."))
|
|
(set! segs (cdr segs)))))
|
|
|
|
|
|
(define (apml_get_lr_params)
|
|
(let ((m 0)
|
|
(s 0))
|
|
(if (or (equal? (Parameter.get 'Int_Target_Method) Int_Targets_LR)
|
|
(equal? (Parameter.get 'Int_Target_Method) Int_Targets_5_LR))
|
|
(begin
|
|
(set! m (car (cdr (car int_lr_params))))
|
|
(set! s (car (cdr (car (cdr int_lr_params))))))
|
|
(begin
|
|
(set! m apml_default_pitch_mean)
|
|
(set! s apml_default_pitch_standard_deviation)))
|
|
(list m s)))
|
|
|
|
|
|
|
|
|
|
(define (apml_initialise)
|
|
"(apml_initialise)
|
|
Set up the current voice for apml use."
|
|
(if (not (string-matches current-voice ".*multisyn.*")) ; nothing if multisyn
|
|
(cond
|
|
((or (string-equal (Parameter.get 'Language) "americanenglish")
|
|
(string-equal (Parameter.get 'Language) "britishenglish"))
|
|
(begin
|
|
(format t "Initialising APML for English.\n")
|
|
;; Phrasing.
|
|
(Parameter.set 'Phrase_Method 'cart_tree)
|
|
(set! phrase_cart_tree apml_phrase_tree)
|
|
;; Pauses.
|
|
;;(set! duration_cart_tree apml_kal_duration_cart_tree)
|
|
;;(set! duration_ph_info apml_kal_durs)
|
|
;;(Parameter.set 'Pause_Method Apml_Pauses)
|
|
;; Lexicon.
|
|
;;;; We now assume the lexicon you have already set is suitable,
|
|
;;;; You probably want to ensure this is "apmlcmu" or "unilex"
|
|
;;(if (not (member_string "apmlcmu" (lex.list)))
|
|
;; (load (path-append lexdir "apmlcmu/apmlcmulex.scm")))
|
|
;;(lex.select "apmlcmu")
|
|
;; Add other lex entries here:
|
|
;;(lex.add.entry '("minerals" nil (((m ih n) 1) ((er) 0) ((ax l z) 0))))
|
|
;;(lex.add.entry '("fibre" nil (((f ay b) 1) ((er) 0))))
|
|
;;(lex.add.entry '("dont" v (((d ow n t) 1))))
|
|
;;(lex.add.entry '("pectoris" nil (((p eh k) 2) ((t ao r) 1) ((ih s) 0))))
|
|
;;(lex.add.entry '("sideeffects" nil (((s ay d) 1) ((ax f) 0) ((eh k t s) 2))))
|
|
|
|
;; Intonation events.
|
|
(set! int_accent_cart_tree apml_accent_cart)
|
|
(set! int_tone_cart_tree apml_boundary_cart)
|
|
(Parameter.set 'Int_Method Intonation_Tree)
|
|
;; Intonation f0 contour.
|
|
(set! f0_lr_start apml_f2b_f0_lr_start)
|
|
(set! f0_lr_left apml_f2b_f0_lr_left)
|
|
(set! f0_lr_mid apml_f2b_f0_lr_mid)
|
|
(set! f0_lr_right apml_f2b_f0_lr_right)
|
|
(set! f0_lr_end apml_f2b_f0_lr_end)
|
|
(set! int_lr_params
|
|
(list (list 'target_f0_mean (car (apml_get_lr_params)))
|
|
(list 'target_f0_std (car (cdr (apml_get_lr_params))))
|
|
(list 'model_f0_mean 170)
|
|
(list 'model_f0_std 40)))
|
|
(Parameter.set 'Int_Target_Method Int_Targets_5_LR)
|
|
nil))
|
|
((string-equal (Parameter.get 'Language) "italian")
|
|
(begin
|
|
(format t "Initialising APML for Italian.\n")
|
|
;; Phrasing.
|
|
(Parameter.set 'Phrase_Method 'cart_tree)
|
|
(set! phrase_cart_tree apml_phrase_tree)
|
|
;; Intonation events.
|
|
(set! int_accent_cart_tree apml_accent_cart)
|
|
(set! int_tone_cart_tree apml_boundary_cart)
|
|
(Parameter.set 'Int_Method Intonation_Tree)
|
|
;; Intonation f0 contour.
|
|
(set! f0_lr_start apml_f2b_f0_lr_start)
|
|
(set! f0_lr_mid apml_f2b_f0_lr_mid)
|
|
(set! f0_lr_end apml_f2b_f0_lr_end)
|
|
(set! int_lr_params
|
|
(list (list 'target_f0_mean (car (apml_get_lr_params)))
|
|
(list 'target_f0_std (car (cdr (apml_get_lr_params))))
|
|
(list 'model_f0_mean 170)
|
|
(list 'model_f0_std 34)))
|
|
(Parameter.set 'Int_Target_Method Int_Targets_LR)
|
|
nil))
|
|
(t nil))))
|
|
|
|
(provide 'apml)
|