Blame view
spkDiarization/scripts/gen_UBM_list.pl~
1.5 KB
3f2992b2c V1.0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
#!/usr/bin/perl # Perl program to train Universal Background Model # using all speech segments in spkDiarization/data/sph/ # # Author: Xavier Bost # email: xavier.bost@univ-avignon.fr # # Synopsis: # # Source files: spkDiarization/data/sph/*.sph # # Retained informations are written in: # spkDiarization/gmm/world.gmm use strict; use List::Util qw(min max); my $line; # current line my $dir; # directory containing label files my $file; # current file my $episode; # episode name my $base_name; # file base name my $start; # current segment beginning my $end; # current segment end my %starts = (); # segments beginning my %ends = (); # segments end my %index = (); $dir = "spkDiarization/data/sph/"; opendir(REP, $dir); my @files = grep /.+_\d+_\d+\.sph/, readdir REP; closedir(REP); # looping over files, i.e. speech segments foreach $file (sort @files) { if ($file =~ /(.+)_(\d+)_(\d+)\.sph$/) { $episode = $1; $start = $2; $end = $3; } $ends{$episode}{$start}{$end} = 1; $starts{$episode}{$end}{$start} = 1; } foreach $episode (sort keys %ends) { foreach $start (sort {$a<=>$b} keys %{$ends{$episode}}) { $end = max(keys %{$ends{$episode}{$start}}), " "; $index{$episode}{$start}{$end} = 1; } } foreach $episode (sort keys %starts) { foreach $end (sort {$a<=>$b} keys %{$starts{$episode}}) { $start = min(keys %{$starts{$episode}{$end}}), " "; } } |