integerize.pl 905 Bytes
#!/usr/bin/perl


if(@ARGV == 0) {
    die "Usage: integerize.pl symtab1 [symtab2].. < input > output\n";
   # integerize.pl takes as its arguments a symbol-table file like in OpenFst, and
   # converts the symbols on the standard input into integers.  Anything it does not 
   # recognize it leaves unchanged.
}

while( @ARGV > 0) {
    $symtab  = shift @ARGV;
    open(F, "<$symtab") || die "Error opening file $symtab\n";

    while(<F>){ 
        @A = split(" ", $_);
        @A == 2 || die "Bad line in symtab file $_: line $.\n";
        if(defined $sym2int{$A[0]}) {
            die "Multiply defined symbol $A[0]";
        }
        $sym2int{$A[0]} = $A[1];
    }
}

while(<STDIN>) {
    @A = split(" ", $_);
    foreach $a (@A) {
        $int = $sym2int{$a};
        if(defined $int) {
            print $int . " ";
        } else { 
            print $a . " ";
        }
    }
    print "\n";
}