Blame view
egs/aspire/s5/local/fisher_fix_speakerid.pl
2.85 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
#!/usr/bin/env perl use warnings; #sed replacement for -w perl parameter # Author: Peng Qi (pengqi@cs.stanford.edu) # This script maps Switchboard speaker IDs to the true physical speakers # and fixes the utterances IDs accordingly. Expected to be run one level of # directory above. sub trim { (my $s = $_[0]) =~ s/^\s+|\s+$//g; return $s; } if ($#ARGV != 1) { print "Usage: swbd1_fix_speakerid.pl <fisher-calldata-tbl-file> <data-dir> "; print "E.g.: swbd1_fix_speakerid.pl data/local/train/combined-calldata.tbl data/train_all "; } $tab_file = $ARGV[0]; $dir = $ARGV[1]; %conv_to_spk = (); open(my $conv_tab, '<', $tab_file) or die "Could not open '$tab_file' $! "; while (my $line = <$conv_tab>) { chomp $line; my @fields = split "," , $line; #$fields[0] = trim($fields[0]); $fields[5] = trim($fields[5]); $fields[10] = trim($fields[10]); $conv_to_spk{'fe_03_' . $fields[0] . '-A'} = $fields[5]; $conv_to_spk{'fe_03_' . $fields[0] . '-B'} = $fields[10]; } close($conv_tab); # fix utt2spk %missingconv = (); open(my $utt2spk, '<', $dir . '/utt2spk') or die "Could not open '$dir/utt2spk' $! "; open(my $utt2spk_new, '>', $dir . '/utt2spk.new'); while (my $line = <$utt2spk>) { chomp $line; my @fields = split " " , $line; my $convid = substr $fields[0], 0, 13; if (exists $conv_to_spk{ $convid }) { my $spkid = $conv_to_spk{ $convid }; $spkid = "fe_03_" . $spkid; my $newuttid = $spkid . '-' . (substr $fields[0], 6); print $utt2spk_new "$newuttid $spkid "; } else { my $convid = substr $convid, 6, 5; $missingconv{$convid} = 1; print $utt2spk_new $fields[0]." ".$fields[1]." "; } } close($utt2spk); close($utt2spk_new); foreach my $conv (keys %missingconv) { print "Warning: Conversation ID '$conv' not found in conv.tab, retaining old speaker IDs " } # fix spk2gender if (open(my $spk2gender, '<', $dir . '/spk2gender')) { open(my $spk2gender_new, '>', $dir . '/spk2gender.new'); while (my $line = <$spk2gender>) { chomp $line; my @fields = split " ", $line; my $convid = $fields[0]; if (exists $conv_to_spk{ $convid }) { my $spkid = $conv_to_spk{ $convid }; $spkid = "fe_03_" . $spkid; print $spk2gender_new $spkid." ".$fields[1]." "; } else { print $spk2gender_new $fields[0]." ".$fields[1]." "; } } close($spk2gender); close($spk2gender_new); } # fix segments and text foreach my $file ('segments','text') { open(my $oldfile, '<', "$dir/$file") or die "Could not open '$dir/$file' $! "; open(my $newfile, '>', "$dir/$file.new"); while (my $line = <$oldfile>) { chomp $line; my $convid = substr $line, 0, 13; if (exists $conv_to_spk{$convid}) { my $spkid = $conv_to_spk{$convid}; print $newfile "fe_03_$spkid-" . (substr $line, 6) . " "; } else { print $newfile "$line "; } } } |