Blame view
egs/csj/s5/local/csj_make_trans/csjconnect.pl
1.8 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
#!/usr/bin/env perl use warnings; # Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki) # 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) # Apache 2.0 # Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. # Connects CSJ segments to each moderate length utterance units by csj2kaldi4m.pl. use utf8; use open IO => ":utf8"; use open ":std"; if (@ARGV != 4) { die "$0 gap maxlen file spk_id "; } $gap = $ARGV[0]; $maxlen = $ARGV[1]; $file = $ARGV[2]; $spk = $ARGV[3]; if ($file eq '-') { $in = STDIN; } else { open($in, $file) || die "$! : $file "; } $psgid = -1; $pspk_id = ""; $pend = 0; $line = ""; while (<$in>) { chomp; if (! /(\d+) ([\d\.]+)-([\d\.]+) (.):-\d+-\d+ (.+)/) { die "Unexpected format: $_ "; } $sgid = $1; $start = $2; $end = $3; $pch = $4; $wpp = $5; if ( $spk =~ /^D/ ){ $ch = "\-$pch"; } else { $ch = ""; } $spk_id = "$spk$ch"; if ($psgid == -1) { $ostart = $start; $osgid = $sgid; $ospk_id = $spk_id; $line = "$wpp "; } elsif ($psgid eq $sgid && $pspk_id eq $spk_id) { $line .= "$wpp "; } else { if ($gap < $start - $pend || $maxlen < $pend - $ostart || $ospk_id ne $spk_id ) { if ($opt_t) { print "$osgid $ostart $pend "; } else { unless($line=~ /\×/){ print "$ospk_id\_$osgid $ostart $pend <s> $line</s> "; } } $ostart = $start; $osgid = $sgid; $ospk_id = $spk_id; $line = "$wpp "; } else { $line .= "<sp> $wpp "; } } $psgid = $sgid; $pspk_id = $spk_id; $pend = $end; } if ($line ne "") { if ($opt_t) { print "$osgid $ostart $end "; } else { unless($line =~ /\×/){ print "$ospk_id\_$osgid $ostart $end <s> $line</s> "; } } } |