Blame view
Scripts/.02_lexicon.sh
967 Bytes
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
#!/bin/sh . 00_init_paths.sh mkdir lang mkdir lang/dict #nonsilence_phones.txt #cat $LEXICON | cut -f2- -d" " | tr " " " " | sort -u > lang/dict/nonsilence_phones.txt #+remove last lines #silence_phones.txt #done manually !!!!! UH is a phoneme code => use UHH instead !!! #extra_questions.txt touch lang/dict/extra_questions.txt #lexicon.txt cat $LEXICON | sed 's/(.)//' > lang/dict/lexicon.txt #+ add silence entries into the lexicon + add <UNK> with a "garbage" phone echo "SIL" > lang/dict/optional_silence.txt echo "<UNK>" > lang/oov.txt echo "BREATH" > lang/dict/silence_phones.txt echo "COUGH" >> lang/dict/silence_phones.txt echo "NOISE" >> lang/dict/silence_phones.txt echo "SMACK" >> lang/dict/silence_phones.txt echo "UHH" >> lang/dict/silence_phones.txt echo "UM" >> lang/dict/silence_phones.txt echo "SPN" >> lang/dict/silence_phones.txt echo "SIL" >> lang/dict/silence_phones.txt utils/prepare_lang.sh lang/dict/ "<UNK>" lang/tmp/ lang/ |