Blame view

egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh 521 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
  #!/bin/bash
  
  # To be run from the directory egs/ptb/s5.
  
  # . path.sh
  set -e
  
  
  # it should contain things like
  # foo.txt, bar.txt, and dev.txt (dev.txt is a special filename that's obligatory).
  mkdir -p data/text
  cp data/ptb/ptb.txt  data/text/
  cp data/ptb/dev.txt  data/text/
  
  # validata data dir
  rnnlm/validate_text_dir.py data/text
  
  # get unigram counts; these are used by rnnlm/get_vocab.py.
  rnnlm/ensure_counts_present.sh data/text
  
  # get vocab
  mkdir -p data/vocab
  rnnlm/get_vocab.py data/text > data/vocab/words.txt