Blame view
egs/vystadial_cz/s5/local/prepare_cs_transcription.sh
566 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
#!/bin/bash locdata=$1; shift locdict=$1; shift mkdir -p $locdict perl local/phonetic_transcription_cs.pl $locdata/vocab-full.txt $locdict/cs_transcription.txt echo "--- Searching for OOV words ..." gawk 'NR==FNR{words[$1]; next;} !($1 in words)' \ $locdict/cs_transcription.txt $locdata/vocab-full.txt |\ egrep -v '<.?s>' > $locdict/vocab-oov.txt gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $locdata/vocab-full.txt $locdict/cs_transcription.txt |\ egrep -v '<.?s>' > $locdict/lexicon.txt wc -l $locdict/vocab-oov.txt wc -l $locdict/lexicon.txt |