Blame view
LIA_kaldiUtils/generate_stm.pl
1.3 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
#!/usr/bin/perl use strict; if(scalar(@ARGV) != 2) { print "USAGE: ./generate_stm.pl rep_in_trs rep_out_stm "; exit; } my $REP_TRS = $ARGV[0]; my $REP_STM_OUT = $ARGV[1]; # my $REP_TRS = "/local_disk/hera2/REPERE/Databases/Phase1/train/trs"; # my $REP_STM_OUT = "stm"; my $TOOLS_BDLEX = "/users/ferreira/TOOLS/OTMEDIA_TOOLS/lia_ltbox/lia_tagg/bin"; my @files = `ls $REP_TRS | grep .trs\$`; foreach my $file (@files) { chomp($file); $file =~ s/\.trs$//; print "Generation du .stm pour le fichier $file "; # First step: trs cleaning feat. pronounciations replacement (ex : 1,5 milliards => 1 milliard 5) system("bin/preprocess_trs.pl < $REP_TRS/$file.trs | bin/replacePronTrs.pl > $REP_STM_OUT/$file.trsTmp"); # Second step: generating a raw stm from the trs file system("bin/trs2stm.pl -c -f stm $REP_STM_OUT/$file.trsTmp > $REP_STM_OUT/$file.raw.stm"); # Third step: normalizing the stm system("bin/normalize.v0.55 -if stm -n 2 -l 1 -d bin/normalize.v2.0.dic -i $REP_STM_OUT/$file.raw.stm -o $REP_STM_OUT/$file.stm"); # Last Step : Generate the .txt associated to .stm (essentialy for LM) system("cat $REP_STM_OUT/$file.stm | bin/stm2txt.pl | $TOOLS_BDLEX/reacc_win2bdlex > $REP_STM_OUT/$file.txt"); # Removing the unused files system("rm -f $REP_STM_OUT/$file.trsTmp $REP_STM_OUT/$file.raw.stm"); } |