Blame view
egs/gp/s1/local/gp_make_questions.pl
2.55 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
#!/usr/bin/env perl use warnings; #sed replacement for -w perl parameter # Copyright 2012 Arnab Ghoshal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # 'phonesets_mono' contains sets of phones that are shared when building the # monophone system and when asking questions based on an automatic clustering # of phones, for the triphone system. # 'roots' contain the information about which phones share a common root in # the phonetic decision tree and which have distinct pdfs. It also states # whether the tree-building should split the roots or not. my $usage = "Usage: gp_make_questions.pl [-p] -i phones -m phoneset_mono -r roots\ Creates sharerd phonesets for monophone and context-dependent training.\ Required arguments:\ -i\tInput list of phones (can contain stress/position markers)\ -m\tOutput shared phoneset for use in monophone training\ -r\tOutput sharing and splitting info for context-dependent training\ Options:\ -p\tSignal that input phone list contains position markers "; use strict; use Getopt::Long; my ($in_phones, $mono, $roots, $posdep, %phoneset); GetOptions ("p" => \$posdep, # Using position-dependent phones "i=s" => \$in_phones, # Input list of phones "m=s" => \$mono, # Shared phone-set for monophone system "r=s" => \$roots ); # roots file for context-dependent systems die "$usage" unless(defined($in_phones) && defined($mono) && defined($roots)); open(P, "<$in_phones") or die "Cannot read from file '$in_phones': $!"; open(MONO, ">$mono") or die "Cannot write to file '$mono': $!"; open(ROOTS, ">$roots") or die "Cannot write to file '$roots': $!"; while (<P>) { next if m/eps|SIL|SPN/; chomp; m/^(\S+)(_.)?\s+\S+$/ or die "Bad line: $_ "; my $full_phone = defined($2)? $1.$2 : $1; push @{$phoneset{$1}}, $full_phone; } print MONO "SIL SPN "; print ROOTS "not-shared not-split SIL SPN "; foreach my $p (sort keys %phoneset) { print MONO join(" ", @{$phoneset{$p}}), " "; print ROOTS "shared split ", join(" ", @{$phoneset{$p}}), " "; } |