gen_UBM_list.pl~
1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/perl
# Perl program to train Universal Background Model
# using all speech segments in spkDiarization/data/sph/
#
# Author: Xavier Bost
# email: xavier.bost@univ-avignon.fr
#
# Synopsis:
#
# Source files: spkDiarization/data/sph/*.sph
#
# Retained informations are written in:
# spkDiarization/gmm/world.gmm
use strict;
use List::Util qw(min max);
my $line; # current line
my $dir; # directory containing label files
my $file; # current file
my $episode; # episode name
my $base_name; # file base name
my $start; # current segment beginning
my $end; # current segment end
my %starts = (); # segments beginning
my %ends = (); # segments end
my %index = ();
$dir = "spkDiarization/data/sph/";
opendir(REP, $dir);
my @files = grep /.+_\d+_\d+\.sph/, readdir REP;
closedir(REP);
# looping over files, i.e. speech segments
foreach $file (sort @files) {
if ($file =~ /(.+)_(\d+)_(\d+)\.sph$/) {
$episode = $1;
$start = $2;
$end = $3;
}
$ends{$episode}{$start}{$end} = 1;
$starts{$episode}{$end}{$start} = 1;
}
foreach $episode (sort keys %ends) {
foreach $start (sort {$a<=>$b} keys %{$ends{$episode}}) {
$end = max(keys %{$ends{$episode}{$start}}), "\n";
$index{$episode}{$start}{$end} = 1;
}
}
foreach $episode (sort keys %starts) {
foreach $end (sort {$a<=>$b} keys %{$starts{$episode}}) {
$start = min(keys %{$starts{$episode}{$end}}), "\n";
}
}