fame_dict_prep.sh
1.12 KB
#!/bin/bash
# Copyright 2015-2016 Sarah Flora Juan
# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal)
# Copyright 2016 Radboud University (Author: Emre Yilmaz)
# Apache 2.0
corpus=$1
if [ -z "$corpus" ] ; then
echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech corpus"
exit 1
fi
if [ ! -d "$corpus" ] ; then
echo >&2 "The directory $corpus does not exist"
fi
mkdir -p data/lang data/local/dict
cat $corpus/lexicon/lex.asr $corpus/lexicon/lex.oov > data/local/dict/lexicon.txt
echo "!SIL SIL" >> data/local/dict/lexicon.txt
echo "<UNK> SPN" >> data/local/dict/lexicon.txt
env LC_ALL=C sort -u -o data/local/dict/lexicon.txt data/local/dict/lexicon.txt
cat data/local/dict/lexicon.txt | \
perl -ane 'print join("\n", @F[1..$#F]) . "\n"; ' | \
sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt
touch data/local/dict/extra_questions.txt
touch data/local/dict/optional_silence.txt
echo "SIL" > data/local/dict/optional_silence.txt
echo "SIL" > data/local/dict/silence_phones.txt
echo "<UNK>" > data/local/dict/oov.txt
echo "Dictionary preparation succeeded"