Blame view
egs/zeroth_korean/s5/local/update_segmentation.sh
975 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
#!/bin/bash # Copyright 2017 Lucas Jo (Atlas Guide) # Apache 2.0 # do this when the segmentation rule is changed dataDir=$1 lmDir=$2 exists(){ command -v "$1" >/dev/null 2>&1 } # check morfessor installation if ! exists morfessor; then echo "You appear to not have Morfessor installed, either on your path." echo "See tools/extras/install_morfessor.sh installation instructions." exit 1 fi trans=$dataDir/text echo "Re-segment transcripts: $trans --------------------------------------------" if [ ! -f $trans ]; then echo "transcription file is not found in "$dataDir exit 1 fi cp $trans $trans".old" awk '{print $1}' $trans".old" > $trans"_tmp_index" cut -d' ' -f2- $trans".old" |\ sed -E 's/\s+/ /g; s/^\s//g; s/\s$//g' |\ morfessor -e 'utf-8' -l $lmDir/zeroth_morfessor.seg -T - -o - \ --output-format '{analysis} ' --output-newlines \ --nosplit-re '[0-9\[\]\(\){}a-zA-Z&.,\-]+' \ | paste -d" " $trans"_tmp_index" - > $trans rm -f $trans"_tmp_index" |