prepare_dict.sh 716 Bytes
#!/bin/bash

# Copyright      2017  Chun Chieh Chang
#                2017  Hossein Hadian

set -e
dir=data/local/dict

mkdir -p $dir

cut -d' ' -f2- data/train/text | tr -cs '[a-z][A-Z][0-9][:punct:]' '\n' | sort -u | \
  awk '{len=split($0,chars,""); printf($0);
       for (i=0;i<=len;i++) {
         if(chars[i]=="#") {chars[i]="<HASH>"}
         printf(chars[i]" ")
       };
       printf("\n")};' | \
  sed 's/.$//' > $dir/lexicon.txt;

cut -d' ' -f2- $dir/lexicon.txt | tr ' ' '\n' | sort -u >$dir/nonsilence_phones.txt

echo '<sil> SIL' >> $dir/lexicon.txt
echo '<unk> SIL' >> $dir/lexicon.txt

echo SIL > $dir/silence_phones.txt

echo SIL > $dir/optional_silence.txt

echo -n "" > $dir/extra_questions.txt