Blame view
egs/heroico/s5/local/usma_native_make_lists.pl
1.76 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
#!/usr/bin/env perl # Copyright 2017 John Morgan # Apache 2.0. #usma_native_make_lists.pl - make acoustic model training lists use strict; use warnings; use Carp; use File::Spec; use File::Copy; use File::Basename; my $tmpdir = "data/local/tmp/usma/native"; # input wav file list my $w = "$tmpdir/wav_list.txt"; # output temporary wav.scp files my $o = "$tmpdir/wav.scp"; # output temporary utt2spk files my $u = "$tmpdir/utt2spk"; # output temporary text files my $t = "$tmpdir/text"; # initialize hash for prompts my %p = (); # store prompts in hash LINEA: while ( my $line = <> ) { chomp $line; my ($num,$sent) = split /\t/, $line, 2; $p{$num} = $sent; } open my $W, '<', $w or croak "problem with $w $!"; open my $O, '+>', $o or croak "problem with $o $!"; open my $U, '+>', $u or croak "problem with $u $!"; open my $T, '+>', $t or croak "problem with $t $!"; LINE: while ( my $line = <$W> ) { chomp $line; next LINE unless ( $line =~ /native/ ); next LINE if ( $line =~ /nonnative/ ); my ($volume,$directories,$file) = File::Spec->splitpath( $line ); my @dirs = split /\//, $directories; my $r = basename $line, ".wav"; next LINE unless ( $r =~ /^s/ ); my $s = $dirs[-1]; my ($nativeness,$gender,$country,$weight,$age,$height,$dlpt,$idx) = split /\-/, $s, 9; $s = $nativeness . '_' . $gender . '_' . $country . '_' . $weight . '_' . $age . '_' . $height . '_' . $dlpt . '_' . $idx; my $rid = $s . '_' . $r; if ( exists $p{$r} ) { print $T "$rid $p{$r} "; } elsif ( defined $rid ) { warn "problem\t$rid"; next LINE; } else { croak "$line"; } print $O "$rid sox -r 22050 -e signed -b 16 $line -r 16000 -t wav - | "; print $U "$rid $s "; } close $T; close $O; close $U; close $W; |