qcri_buckwalter2utf8.sh
423 Bytes
#!/bin/bash
# Copyright 2018 John Morgan
# Apache 2.0.
# write separate files for word and pronunciation fields
cut -d " " -f 1 qcri.txt > qcri_words_buckwalter.txt
cut -d " " -f 2- qcri.txt > qcri_prons.txt
# convert words to utf8
local/buckwalter2unicode.py -i qcri_words_buckwalter.txt -o qcri_words_utf8.txt
paste qcri_words_utf8.txt qcri_prons.txt
rm qcri_words_buckwalter.txt qcri_words_utf8.txt qcri_prons.txt