gen_UBM_list.pl~ 1.5 KB
#!/usr/bin/perl

# Perl program to train Universal Background Model
# using all speech segments in spkDiarization/data/sph/
#
# Author: Xavier Bost
# email: xavier.bost@univ-avignon.fr
#
# Synopsis:
#
# Source files: spkDiarization/data/sph/*.sph
#
# Retained informations are written in:
#   spkDiarization/gmm/world.gmm

use strict;
use List::Util qw(min max);

my $line;                  # current line
my $dir;                   # directory containing label files
my $file;                  # current file
my $episode;               # episode name
my $base_name;             # file base name
my $start;                 # current segment beginning
my $end;                   # current segment end
my %starts = ();           # segments beginning
my %ends = ();             # segments end
my %index = ();

$dir = "spkDiarization/data/sph/";
opendir(REP, $dir);
my @files = grep /.+_\d+_\d+\.sph/, readdir REP;
closedir(REP);

# looping over files, i.e. speech segments
foreach $file (sort @files) {

    if ($file =~ /(.+)_(\d+)_(\d+)\.sph$/) {
	$episode = $1;
	$start = $2;
	$end = $3;
    }

    $ends{$episode}{$start}{$end} = 1;
    $starts{$episode}{$end}{$start} = 1;
}

foreach $episode (sort keys %ends) {
    foreach $start (sort {$a<=>$b} keys %{$ends{$episode}}) {
	$end = max(keys %{$ends{$episode}{$start}}), "\n";
	$index{$episode}{$start}{$end} = 1;
    }
}

foreach $episode (sort keys %starts) {
    foreach $end (sort {$a<=>$b} keys %{$starts{$episode}}) {
	$start = min(keys %{$starts{$episode}{$end}}), "\n";
    }
}