Blame view

egs/csj/s5/local/csj_make_trans/csjconnect.pl 1.8 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  #!/usr/bin/env perl
  use warnings;
  
  # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
  #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
  # Apache 2.0
  # Acknowledgement  This work was supported by JSPS KAKENHI Grant Number 26280055.
  
  # Connects CSJ segments to each moderate length utterance units by csj2kaldi4m.pl.
  
  use utf8;
  use open IO => ":utf8";
  use open ":std";
  
  if (@ARGV != 4) {
      die "$0 gap maxlen file spk_id
  ";
  }
  
  $gap = $ARGV[0];
  $maxlen = $ARGV[1];
  $file = $ARGV[2];
  $spk = $ARGV[3];
  
  if ($file eq '-') {
      $in = STDIN;
  } else {
      open($in, $file) || die "$! : $file
  ";
  }
  $psgid = -1;
  $pspk_id = "";
  $pend = 0;
  $line = "";
  
  while (<$in>) {
      chomp;
      if (! /(\d+) ([\d\.]+)-([\d\.]+) (.):-\d+-\d+ (.+)/) {
  	die  "Unexpected format: $_
  ";
      }
      $sgid = $1;
      $start = $2;
      $end = $3;
      $pch = $4;
      $wpp = $5;
      if ( $spk =~ /^D/ ){
  	$ch = "\-$pch";
      } else {
  	$ch = "";
      }
      $spk_id = "$spk$ch";
  
  
      if ($psgid == -1) {
  	$ostart = $start;
  	$osgid = $sgid;
  	$ospk_id = $spk_id;
  	$line = "$wpp ";
      } elsif ($psgid eq $sgid && $pspk_id eq $spk_id) {
  	$line .= "$wpp ";
      } else {
  	if ($gap < $start - $pend || $maxlen < $pend - $ostart || $ospk_id ne $spk_id ) {
  	    if ($opt_t) {
  		print "$osgid $ostart $pend
  ";
  	    } else {
  		unless($line=~ /\×/){
  		    print "$ospk_id\_$osgid $ostart $pend <s> $line</s>
  ";
  		}
  	    }
  	    $ostart = $start;
  	    $osgid = $sgid;
  	    $ospk_id = $spk_id;
  	    $line = "$wpp ";
  	} else {
  	    $line .= "<sp> $wpp ";
  	}
      }
  
      $psgid = $sgid;
      $pspk_id = $spk_id;
      $pend = $end;
  }
  
  if ($line ne "") {
      if ($opt_t) {
  	print "$osgid $ostart $end
  ";
      } else {
  	unless($line =~ /\×/){
  	    print "$ospk_id\_$osgid $ostart $end <s> $line</s>
  ";
  	}
      }
  }