prepare_dict.sh 884 Bytes
#!/bin/bash
# Copyright 2015-2016  Sarah Flora Juan
# Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
# Apache 2.0

corpus=$1
if [ -z "$corpus" ] ; then
    echo >&2 "The script $0 expects one parameter -- the location of the Iban corpus"
    exit 1
fi
if [ ! -d "$corpus" ] ; then
    echo >&2 "The directory $corpus does not exist"
fi

mkdir -p data/lang data/local/dict


cp $corpus/lang/dict/lexicon.txt data/local/dict/lexicon.txt
cat data/local/dict/lexicon.txt | \
    perl -ane 'print join("\n", @F[1..$#F]) . "\n"; '  | \
    sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt


touch data/local/dict/extra_questions.txt
touch data/local/dict/optional_silence.txt

echo "SIL"   > data/local/dict/optional_silence.txt
echo "SIL"   > data/local/dict/silence_phones.txt
echo "<UNK>" > data/local/dict/oov.txt

echo "Dictionary preparation succeeded"