Blame view
egs/tunisian_msa/s5/local/answers_make_lists.pl
1.93 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
#!/usr/bin/env perl # Copyright 2018 John Morgan # Apache 2.0. # answers_make_lists.pl - make acoustic model training lists use strict; use warnings; use Carp; use File::Spec; use File::Copy; use File::Basename; my $tmpdir = 'data/local/tmp/tunis'; system "mkdir -p $tmpdir/answers"; # input wav file list my $wav_list = "$tmpdir/answers_wav.txt"; # output temporary wav.scp files my $wav_scp = "$tmpdir/answers/wav.scp"; # output temporary utt2spk files my $u = "$tmpdir/answers/utt2spk"; # output temporary text files my $t = "$tmpdir/answers/text"; # initialize hash for prompts my %prompt = (); # store prompts in hash LINEA: while ( my $line = <> ) { chomp $line; my ($num,$sent) = split /\t/sxm, $line, 2; my ($machine,$s,$mode,$language,$i) = split /\_/sxm, $num; # the utterance name my $utt = $machine . '_' . $s . '_' . 'a' . '_' . $i; $prompt{$utt} = $sent; } # Write wav.scp, utt2spk and text files. open my $W, '<', $wav_list or croak "problem with $wav_list $!"; open my $O, '+>', $wav_scp or croak "problem with $wav_scp $!"; open my $U, '+>', $u or croak "problem with $u"; open my $T, '+>', $t or croak "problem with $t"; LINE: while ( my $line = <$W> ) { chomp $line; next LINE if ( $line !~ /Answers/sxm ); next LINE if ( $line =~ /Recordings/sxm ); my ($volume,$directories,$file) = File::Spec->splitpath( $line ); my @dirs = split /\//sxm, $directories; my $r = basename $line, '.wav'; my $machine = $dirs[-3]; my $s = $dirs[-1]; my $rid = $machine . '_' . $s . '_' . 'a' . '_' . $r; if ( exists $prompt{$rid} ) { print ${T} "$rid\t$prompt{$rid} " or croak; } elsif ( defined $rid ) { print STDERR "problem\t$rid" or croak; next LINE; } else { croak "$line"; } print ${O} "$rid sox $line -t wav - | " or croak; print ${U} "$rid ${machine}_${s}_a " or croak; } close $U or croak; close $T or croak; close $W or croak; close $O or croak; |