Blame view
egs/wsj/s5/utils/filt.py
353 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
#!/usr/bin/env python # Apache 2.0 from __future__ import print_function import sys vocab=set() with open(sys.argv[1]) as vocabfile: for line in vocabfile: vocab.add(line.strip()) with open(sys.argv[2]) as textfile: for line in textfile: print(" ".join([word if word in vocab else '<UNK>' for word in line.strip().split()])) |