Blame view
egs/ptb/s5/local/rnnlm/prepare_rnnlm_data.sh
521 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
#!/bin/bash # To be run from the directory egs/ptb/s5. # . path.sh set -e # it should contain things like # foo.txt, bar.txt, and dev.txt (dev.txt is a special filename that's obligatory). mkdir -p data/text cp data/ptb/ptb.txt data/text/ cp data/ptb/dev.txt data/text/ # validata data dir rnnlm/validate_text_dir.py data/text # get unigram counts; these are used by rnnlm/get_vocab.py. rnnlm/ensure_counts_present.sh data/text # get vocab mkdir -p data/vocab rnnlm/get_vocab.py data/text > data/vocab/words.txt |