chime1_prepare_dict.sh
932 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Kaldi scripts for preparing dictionary for the GRID corpus (or CHiME 1)
echo "Preparing dictionary"
. ./config.sh # Needed for REC_ROOT and WAV_ROOT
# Prepare relevant folders
dict="$REC_ROOT/data/local/dict"
mkdir -p $dict
utils="utils"
# Copy lexicon
lexicon="input/lexicon.txt" # phone models
cp $lexicon $dict/lexicon.txt
# Generate phone list
sil="SIL"
phone_list="$dict/phone.list"
awk '{for (n=2;n<=NF;n++)print $n;}' $lexicon | sort -u > $phone_list
echo $sil >> $phone_list
# Create phone lists
grep -v -w $sil $phone_list > $dict/nonsilence_phones.txt
echo $sil > $dict/silence_phones.txt
echo $sil > $dict/optional_silence.txt
# list of "extra questions"-- empty; we don't have things like tone or
# word-positions or stress markings.
touch $dict/extra_questions.txt
echo "-->Dictionary preparation succeeded"
exit 0