#!/usr/bin/perl # Perl program to train Universal Background Model # using all speech segments in spkDiarization/data/sph/ # # Author: Xavier Bost # email: xavier.bost@univ-avignon.fr # # Synopsis: # # Source files: spkDiarization/data/sph/*.sph # # Retained informations are written in: # spkDiarization/gmm/world.gmm use strict; use List::Util qw(min max); my $line; # current line my $dir; # directory containing label files my $file; # current file my $episode; # episode name my $base_name; # file base name my $start; # current segment beginning my $end; # current segment end my %starts = (); # segments beginning my %ends = (); # segments end my %index = (); $dir = "spkDiarization/data/sph/"; opendir(REP, $dir); my @files = grep /.+_\d+_\d+\.sph/, readdir REP; closedir(REP); # looping over files, i.e. speech segments foreach $file (sort @files) { if ($file =~ /(.+)_(\d+)_(\d+)\.sph$/) { $episode = $1; $start = $2; $end = $3; } $ends{$episode}{$start}{$end} = 1; $starts{$episode}{$end}{$start} = 1; } foreach $episode (sort keys %ends) { foreach $start (sort {$a<=>$b} keys %{$ends{$episode}}) { $end = max(keys %{$ends{$episode}{$start}}), "\n"; $index{$episode}{$start}{$end} = 1; } } foreach $episode (sort keys %starts) { foreach $end (sort {$a<=>$b} keys %{$starts{$episode}}) { $start = min(keys %{$starts{$episode}{$end}}), "\n"; } }