Blame view
Scripts/utils/stm2txt.pl~
665 Bytes
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
#!/usr/bin/perl -w use strict; use warnings; if (@ARGV < 1) { die "<input : stm file> <option : remove tags (1 if activated> "; } my $RemoveTags=0; open STM, $ARGV[0]; if (@ARGV > 1 && $ARGV[1] == "1") { $RemoveTags=1; } foreach (<STM>) { if (!($_=~/ignore_time_segment_in_scoring/)) { if ($_=~/(.*?) .*? .*? (.*?) (.*?) \<.*?\> (.*?)( \(.*?\))*$/) { my $utterance_id=$1; my $start=$2*100; my $end=$3*100; my $sentence=$4; if ($RemoveTags) { $sentence=~s/\{.*?\}//g; $sentence=~s/\<.*?\>//g; } $sentence=~s/ +/ /g; $sentence=~s/^ //; printf "%s_%s-%s %s ", $utterance_id, $start, $end, $sentence; } } } |