.02_lexicon.sh 967 Bytes
#!/bin/sh

. 00_init_paths.sh 

mkdir lang
mkdir lang/dict


#nonsilence_phones.txt
#cat $LEXICON | cut -f2- -d" "  | tr " " "\n" | sort -u > lang/dict/nonsilence_phones.txt
#+remove last lines

#silence_phones.txt
#done manually  !!!!! UH is a phoneme code => use UHH instead !!!

#extra_questions.txt
touch lang/dict/extra_questions.txt

#lexicon.txt
cat $LEXICON | sed 's/(.)//' > lang/dict/lexicon.txt
#+ add silence entries into the lexicon + add <UNK> with a "garbage" phone

echo "SIL" >  lang/dict/optional_silence.txt

echo "<UNK>" > lang/oov.txt

echo "BREATH" > lang/dict/silence_phones.txt
echo "COUGH" >> lang/dict/silence_phones.txt
echo "NOISE" >> lang/dict/silence_phones.txt
echo "SMACK" >> lang/dict/silence_phones.txt
echo "UHH" >> lang/dict/silence_phones.txt
echo "UM" >> lang/dict/silence_phones.txt
echo "SPN" >> lang/dict/silence_phones.txt
echo "SIL" >> lang/dict/silence_phones.txt


utils/prepare_lang.sh lang/dict/ "<UNK>" lang/tmp/ lang/