Blame view
egs/farsdat/s5/local/farsdat_norm_trans.sh
3.54 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
#!/bin/bash # Copyright 2014 University of Tehran (Author: Bagher BabaAli) # Apache 2.0. # This script normalizes the TIMIT phonetic transcripts that have been # extracted in a format where each line contains an utterance ID followed by # the transcript, e.g.: #Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a #smaller set defined by the -m option. This script assumes that the mapping is #done in the \"standard\" fashion, i.e. to 48 or 39 phones. The input is #assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can #be changed using the -from option. The input format is assumed to be utterance #ID followed by transcript on the same line. if [ $# -ne 1 ]; then echo "Argument should be a transcription file in a format where each line contains an utterance ID followed by the transcript." exit 1; fi cat $1 | awk '{ for(i=1; i<=NF; ++i) { if ( $i == "\\" ) { if ( ( i+1 == NF ) || ( $(i+1) != "p" ) ) { printf("p ") } } else if ( $i == "`" ) { if ( ( i+1 == NF ) || ( $(i+1) != "b" ) ) { printf("b ") } } else if ( $i == "-" ) { if ( ( i+1 == NF ) || ( $(i+1) != "t" ) ) { printf("t ") } } else if ( $i == "=" ) { if ( ( i+1 == NF ) || ( $(i+1) != "d" ) ) { printf("d ") } } else if ( $i == "@" ) { if ( ( i+1 == NF ) || ( $(i+1) != "c" ) ) { printf("c ") } } else if ( $i == "*" ) { if ( ( i+1 == NF ) || ( $(i+1) != "k" ) ) { printf("k ") } } else if ( $i == "!" ) { if ( ( i+1 == NF ) || ( $(i+1) != ";" ) ) { printf("; ") } } else if ( $i == "&" ) { if ( ( i+1 == NF ) || ( $(i+1) != "g" ) ) { printf("g ") } } else if ( $i == "^" ) { if ( ( i+1 == NF ) || ( $(i+1) != "q" ) ) { printf("q ") } } else if ( $i == "#" ) { if ( ( i+1 == NF ) || ( $(i+1) != "," ) ) { printf(", ") } } else if ( $i == "$" ) { if ( ( i+1 == NF ) || ( $(i+1) != "'\''" ) ) { printf("'\'' ") } } else if ( $i == "(" ) { if ( ( i+1 == NF ) || ( $(i+1) != "]" ) ) { printf("] ") } } else { printf("%s ",$i) } } printf(" "); }' | tr 'c' 'k' | tr ';' 'g' | sed -r 's/j/sil/g' |