Blame view
tools/scripts/srt2stm.pl
1.33 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
#!/usr/bin/perl -w use File::Basename; print ';; Script export with encoding UTF-8 ;; transcribed by LIA;; ;; CATEGORY "0" "" "" ;; LABEL "O" "Overall" 0"Overall" ;; ;; CATEGORY "1" "Hub4 Focus Conditions" "" ;; LABEL "F0" "Baseline//Broadcast//Speech" "" ;; LABEL "F1" "Spontaneous//Broadcast//Speech" "" ;; LABEL "F2" "Speech Over//Telephone//Channels" "" ;; LABEL "F3" "Speech in the//Presence of//Background Music" "" ;; LABEL "F4" "Speech Under//Degraded//Acoustic Conditions" "" ;; LABEL "F5" "Speech from//Non-Native//Speakers" "" ;; LABEL "FX" "All other speech" "" ;; CATEGORY "2" "Speaker Sex" "" ;; LABEL "female" "Female" "" ;; LABEL "male" "Male" "" ;; LABEL "unknown" "Unknown" ""'; print " "; my $file; open($file, $ARGV[0]) or die "can't open $ARGV[0] file"; my $basename = basename($ARGV[0], ".tmp2.txt"); #file in tab my @fileContent = <$file>; chomp(@fileContent); print "$basename 1 UNK 0.0 3600.00 <o,f0,unknown>"; for(my $i = 5; $i < @fileContent - 1; $i += 4){ $text = cleanText($fileContent[$i + 1]); if($text !~ /^$/){ print " ".$text; } } sub cleanText { my ($text) = @_; $text =~ s/- ?[A-Za-z \.]+ ?://g; $text =~ s/^-//g; $text =~ s/''//g; $text =~ s/-[A-Z]//g; $text =~ s/\.+ *$//g; $text =~ s/^ *\.+//g; $text =~ s/^ +//g; $text =~ s/ +$//g; return $text; } |