.02_lexicon.sh
967 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/sh
. 00_init_paths.sh
mkdir lang
mkdir lang/dict
#nonsilence_phones.txt
#cat $LEXICON | cut -f2- -d" " | tr " " "\n" | sort -u > lang/dict/nonsilence_phones.txt
#+remove last lines
#silence_phones.txt
#done manually !!!!! UH is a phoneme code => use UHH instead !!!
#extra_questions.txt
touch lang/dict/extra_questions.txt
#lexicon.txt
cat $LEXICON | sed 's/(.)//' > lang/dict/lexicon.txt
#+ add silence entries into the lexicon + add <UNK> with a "garbage" phone
echo "SIL" > lang/dict/optional_silence.txt
echo "<UNK>" > lang/oov.txt
echo "BREATH" > lang/dict/silence_phones.txt
echo "COUGH" >> lang/dict/silence_phones.txt
echo "NOISE" >> lang/dict/silence_phones.txt
echo "SMACK" >> lang/dict/silence_phones.txt
echo "UHH" >> lang/dict/silence_phones.txt
echo "UM" >> lang/dict/silence_phones.txt
echo "SPN" >> lang/dict/silence_phones.txt
echo "SIL" >> lang/dict/silence_phones.txt
utils/prepare_lang.sh lang/dict/ "<UNK>" lang/tmp/ lang/