Blame view
egs/callhome_egyptian/s5/local/callhome_prepare_script_dict.py
838 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#!/usr/bin/env python import sys import codecs romanToScript = {} lexiconLocation = sys.argv[1] wordLexiconLoc = sys.argv[2] outLexiconLoc = sys.argv[3] # First create a map from the romanized to the script version lexicon = codecs.open(lexiconLocation + "callhome_arabic_lexicon_991012/ar_lex.v07", \ encoding="iso-8859-6") for line in lexicon: lineComp = line.strip().split('\t') romanToScript[lineComp[0]] = lineComp[1] lexicon.close() # Now read the word lexicon and write out a script lexicon wordLexicon = open(wordLexiconLoc) outLexicon = codecs.open(outLexiconLoc, "w+", encoding="utf-8") for line in wordLexicon: lineComp = line.strip().split(" ") if lineComp[0] in romanToScript: lineComp[0] = romanToScript[lineComp[0]] outLexicon.write(" ".join(lineComp) + ' ') wordLexicon.close() outLexicon.close() |