Blame view

spkDiarization/scripts/gen_UBM_list.pl~ 1.5 KB
3f2992b2c   bostx   V1.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  #!/usr/bin/perl
  
  # Perl program to train Universal Background Model
  # using all speech segments in spkDiarization/data/sph/
  #
  # Author: Xavier Bost
  # email: xavier.bost@univ-avignon.fr
  #
  # Synopsis:
  #
  # Source files: spkDiarization/data/sph/*.sph
  #
  # Retained informations are written in:
  #   spkDiarization/gmm/world.gmm
  
  use strict;
  use List::Util qw(min max);
  
  my $line;                  # current line
  my $dir;                   # directory containing label files
  my $file;                  # current file
  my $episode;               # episode name
  my $base_name;             # file base name
  my $start;                 # current segment beginning
  my $end;                   # current segment end
  my %starts = ();           # segments beginning
  my %ends = ();             # segments end
  my %index = ();
  
  $dir = "spkDiarization/data/sph/";
  opendir(REP, $dir);
  my @files = grep /.+_\d+_\d+\.sph/, readdir REP;
  closedir(REP);
  
  # looping over files, i.e. speech segments
  foreach $file (sort @files) {
  
      if ($file =~ /(.+)_(\d+)_(\d+)\.sph$/) {
  	$episode = $1;
  	$start = $2;
  	$end = $3;
      }
  
      $ends{$episode}{$start}{$end} = 1;
      $starts{$episode}{$end}{$start} = 1;
  }
  
  foreach $episode (sort keys %ends) {
      foreach $start (sort {$a<=>$b} keys %{$ends{$episode}}) {
  	$end = max(keys %{$ends{$episode}{$start}}), "
  ";
  	$index{$episode}{$start}{$end} = 1;
      }
  }
  
  foreach $episode (sort keys %starts) {
      foreach $end (sort {$a<=>$b} keys %{$starts{$episode}}) {
  	$start = min(keys %{$starts{$episode}{$end}}), "
  ";
      }
  }