Blame view
egs/aishell/s5/local/aishell_prepare_dict.sh
1.17 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
#!/bin/bash # Copyright 2017 Xingyu Na # Apache 2.0 # prepare dict resources . ./path.sh [ $# != 1 ] && echo "Usage: $0 <resource-path>" && exit 1; res_dir=$1 dict_dir=data/local/dict mkdir -p $dict_dir cp $res_dir/lexicon.txt $dict_dir cat $dict_dir/lexicon.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}'| \ perl -e 'while(<>){ chomp($_); $phone = $_; next if ($phone eq "sil"); m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$1} .= "$phone "; } foreach $l (values %q) {print "$l ";} ' | sort -k1 > $dict_dir/nonsilence_phones.txt || exit 1; echo sil > $dict_dir/silence_phones.txt echo sil > $dict_dir/optional_silence.txt # No "extra questions" in the input to this setup, as we don't # have stress or tone cat $dict_dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf " ";}' > $dict_dir/extra_questions.txt || exit 1; cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) { $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l ";}' \ >> $dict_dir/extra_questions.txt || exit 1; echo "$0: AISHELL dict preparation succeeded" exit 0; |