Blame view

Scripts/.02_lexicon.sh 967 Bytes
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
  #!/bin/sh
  
  . 00_init_paths.sh 
  
  mkdir lang
  mkdir lang/dict
  
  
  #nonsilence_phones.txt
  #cat $LEXICON | cut -f2- -d" "  | tr " " "
  " | sort -u > lang/dict/nonsilence_phones.txt
  #+remove last lines
  
  #silence_phones.txt
  #done manually  !!!!! UH is a phoneme code => use UHH instead !!!
  
  #extra_questions.txt
  touch lang/dict/extra_questions.txt
  
  #lexicon.txt
  cat $LEXICON | sed 's/(.)//' > lang/dict/lexicon.txt
  #+ add silence entries into the lexicon + add <UNK> with a "garbage" phone
  
  echo "SIL" >  lang/dict/optional_silence.txt
  
  echo "<UNK>" > lang/oov.txt
  
  echo "BREATH" > lang/dict/silence_phones.txt
  echo "COUGH" >> lang/dict/silence_phones.txt
  echo "NOISE" >> lang/dict/silence_phones.txt
  echo "SMACK" >> lang/dict/silence_phones.txt
  echo "UHH" >> lang/dict/silence_phones.txt
  echo "UM" >> lang/dict/silence_phones.txt
  echo "SPN" >> lang/dict/silence_phones.txt
  echo "SIL" >> lang/dict/silence_phones.txt
  
  
  utils/prepare_lang.sh lang/dict/ "<UNK>" lang/tmp/ lang/