Blame view
egs/wsj/s5/utils/apply_map.pl
2.89 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
#!/usr/bin/env perl use warnings; #sed replacement for -w perl parameter # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. # This program is a bit like ./sym2int.pl in that it applies a map # to things in a file, but it's a bit more general in that it doesn't # assume the things being mapped to are single tokens, they could # be sequences of tokens. See the usage message. $permissive = 0; for ($x = 0; $x <= 2; $x++) { if (@ARGV > 0 && $ARGV[0] eq "-f") { shift @ARGV; $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) if ($1 ne "") { $field_begin = $1 - 1; # Change to zero-based indexing. } if ($2 ne "") { $field_end = $2 - 1; # Change to zero-based indexing. } } if (!defined $field_begin && !defined $field_end) { die "Bad argument to -f option: $field_spec"; } } if (@ARGV > 0 && $ARGV[0] eq '--permissive') { shift @ARGV; # Mapping is optional (missing key is printed to output) $permissive = 1; } } if(@ARGV != 1) { print STDERR "Invalid usage: " . join(" ", @ARGV) . " "; print STDERR <<'EOF'; Usage: apply_map.pl [options] map <input >output options: [-f <field-range> ] [--permissive] This applies a map to some specified fields of some input text: For each line in the map file: the first field is the thing wae map from, and the remaining fields are the sequence we map it to. The -f (field-range) option says which fields of the input file the map map should apply to. If the --permissive option is supplied, fields which are not present in the map will be left as they were. Applies the map 'map' to all input text, where each line of the map is interpreted as a map from the first field to the list of the other fields Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field range in the input to apply the map to. e.g.: echo A B | apply_map.pl a.txt where a.txt is: A a1 a2 B b will produce: a1 a2 b EOF exit(1); } ($map_file) = @ARGV; open(M, "<$map_file") || die "Error opening map file $map_file: $!"; while (<M>) { @A = split(" ", $_); @A >= 1 || die "apply_map.pl: empty line."; $i = shift @A; $o = join(" ", @A); $map{$i} = $o; } while(<STDIN>) { @A = split(" ", $_); for ($x = 0; $x < @A; $x++) { if ( (!defined $field_begin || $x >= $field_begin) && (!defined $field_end || $x <= $field_end)) { $a = $A[$x]; if (!defined $map{$a}) { if (!$permissive) { die "apply_map.pl: undefined key $a in $map_file "; } else { print STDERR "apply_map.pl: warning! missing key $a in $map_file "; } } else { $A[$x] = $map{$a}; } } } print join(" ", @A) . " "; } |