Blame view

egs/wsj/s5/utils/filt.py 353 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
  #!/usr/bin/env python
  
  # Apache 2.0
  
  from __future__ import print_function
  import sys
  
  vocab=set()
  with open(sys.argv[1]) as vocabfile:
      for line in vocabfile:
          vocab.add(line.strip())
  
  with open(sys.argv[2]) as textfile:
      for line in textfile:
          print(" ".join([word if word in vocab else '<UNK>' for word in line.strip().split()]))