Blame view

egs/tunisian_msa/s5/local/prepare_dict.sh 871 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  #!/bin/bash -u
  
  # Copyright 2018 John Morgan
  # Apache 2.0.
  
  set -o errexit
  
  [ -f ./path.sh ] && . ./path.sh
  
  if [ ! -d data/local/dict ]; then
    mkdir -p data/local/dict
  fi
  
  l=$1
  export LC_ALL=C
  
  cut -f2- -d " " $l | tr -s '[:space:]' '[
  *]' | grep -v SPN | \
      sort -u | tail -n+2 > data/local/dict/nonsilence_phones.txt
  
  expand -t 1 $l | sort -u | \
      sed "1d" > data/local/dict/lexicon.txt
  
  echo "<UNK> SPN" >> data/local/dict/lexicon.txt
  
  # silence phones, one per line.
  {
      echo SIL;
      echo SPN;
  } \
      > \
      data/local/dict/silence_phones.txt
  
  echo SIL > data/local/dict/optional_silence.txt
  
  # get the phone list from the lexicon file
  (
      tr '
  ' ' ' < data/local/dict/silence_phones.txt;
      echo;
      tr '
  ' ' ' < data/local/dict/nonsilence_phones.txt;
      echo;
  ) >data/local/dict/extra_questions.txt
  
  echo "$0: Finished dictionary preparation."