Commit 1fd315c89e313c446c6f6ecf3a4ea98ac859ac88
1 parent
0bf609bcce
Exists in
master
add Extract audio and check results scripts
Showing 8 changed files with 247 additions and 43 deletions Inline Diff
main_tools/CheckResults.sh
File was created | 1 | #!/bin/bash | |
2 | |||
3 | ################################# | ||
4 | # File : CheckResults.sh # | ||
5 | # Brief : Check the results # | ||
6 | # Autohr : Jean-François Rey # | ||
7 | # Date : 30/07/2013 # | ||
8 | # Version : 1.0 # | ||
9 | ################################# | ||
10 | |||
11 | |||
12 | # CheckDir( dir ) | ||
13 | # dir : a directory path | ||
14 | # Brief : Check $dir results | ||
15 | function CheckDir() | ||
16 | { | ||
17 | if [ $# -eq 1 ] | ||
18 | then | ||
19 | plp=$(ls $1/PLP/*.plp 2> /dev/null | wc -l) | ||
20 | res_p1=$(ls $1/res_p1/*.res 2> /dev/null | wc -l) | ||
21 | res_p2=$(ls $1/res_p2/*.treil 2> /dev/null | wc -l) | ||
22 | res_p3=$(ls $1/res_p3/*.treil 2> /dev/null | wc -l) | ||
23 | usf_p2=$1/$(basename $1).res_p2.usf | ||
24 | usf_p3=$1/$(basename $1).res_p3.usf | ||
25 | |||
26 | if [ -e $usf_p2 ] | ||
27 | then | ||
28 | casix=`grep -c -E 'confidence="0.600"' ${usf_p2}` | ||
29 | call=`grep -c -E 'confidence=' ${usf_p2}` | ||
30 | if [ $call -eq 0 ] | ||
31 | then | ||
32 | pourcentage_p2=100 | ||
33 | else | ||
34 | pourcentage_p2=$((($casix*100)/$call)) | ||
35 | fi | ||
36 | if [ $pourcentage_p2 -gt 49 ] | ||
37 | then | ||
38 | usf2="ERR" | ||
39 | else | ||
40 | usf2="OK" | ||
41 | fi | ||
42 | else | ||
43 | usf2="NAN" | ||
44 | fi | ||
45 | if [ -e $usf_p3 ] | ||
46 | then | ||
47 | casix=`grep -c -E 'confidence="0.600"' ${usf_p3}` | ||
48 | call=`grep -c -E 'confidence=' ${usf_p3}` | ||
49 | if [ $call -eq 0 ] | ||
50 | then | ||
51 | pourcentage_p3=100 | ||
52 | else | ||
53 | pourcentage_p3=$((($casix*100)/$call)) | ||
54 | fi | ||
55 | if [ $pourcentage_p3 -gt 49 ] | ||
56 | then | ||
57 | usf3="ERR" | ||
58 | else | ||
59 | usf3="OK" | ||
60 | fi | ||
61 | else | ||
62 | usf3="NAN" | ||
63 | fi | ||
64 | fi | ||
65 | echo -e "$(basename $1)\t$plp\t$res_p1\t$res_p2\t\t$res_p3\t\t$usf2\t$usf3" | ||
66 | |||
67 | } | ||
68 | |||
69 | |||
70 | # Check OTMEDIA_HOME env var | ||
71 | if [ -z ${OTMEDIA_HOME} ] | ||
72 | then | ||
73 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | ||
74 | export OTMEDIA_HOME=$OTMEDIA_HOME | ||
75 | fi | ||
76 | |||
77 | # where is CheckResults.sh | ||
78 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | ||
79 | |||
80 | |||
81 | RECURSIVE=0 | ||
82 | |||
83 | #---------------# | ||
84 | # Parse Options # | ||
85 | #---------------# | ||
86 | while getopts ":hr" opt | ||
87 | do | ||
88 | case $opt in | ||
89 | h) | ||
90 | echo -e "$0 [OPTIONS] <DIRECTORY>\n" | ||
91 | echo -e "\t Options:" | ||
92 | echo -e "\t\t-h :\tprint this message" | ||
93 | echo -e "\t\t-r :\trecursive mode" | ||
94 | exit 1 | ||
95 | ;; | ||
96 | r) | ||
97 | RECURSIVE=1 | ||
98 | ;; | ||
99 | :) | ||
100 | echo "Option -$OPTARG requires an argument." >&2 | ||
101 | exit 1 | ||
102 | ;; | ||
103 | \?) | ||
104 | echo "BAD USAGE : unknow opton -$OPTARG" | ||
105 | exit 1 | ||
106 | ;; | ||
107 | esac | ||
108 | done | ||
109 | |||
110 | # Check USAGE by arguments number | ||
111 | if [ $(($#-($OPTIND-1))) -ne 1 ] | ||
112 | then | ||
113 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <DIRECTORY>" | ||
114 | echo "$0 -h for more info" | ||
115 | exit 1 | ||
116 | fi | ||
117 | |||
118 | shift $((OPTIND-1)) | ||
119 | # check Directory - First argument | ||
120 | if [ -e $1 ] && [ -s $1 ] | ||
121 | then | ||
122 | DIR=$(readlink -e $1) | ||
123 | else | ||
124 | echo "ERROR : can't open directory $1" | ||
125 | exit 1 | ||
126 | fi | ||
127 | |||
128 | |||
129 | # Check directory results | ||
130 | echo -e "Directory name\t\t#plp\t#res_p1\t#treil_p2\t#treil_p3\tusf_p2\tusf_p3" | ||
131 | if [ $RECURSIVE -eq 0 ] | ||
132 | then | ||
133 | CheckDir "$DIR" | ||
134 | else | ||
135 | for d in `ls $DIR` | ||
136 | do | ||
137 | if [ -d "$DIR/$d" ]; then CheckDir "$DIR/$d"; fi | ||
138 | done | ||
139 | fi | ||
140 | |||
141 | |||
142 |
main_tools/ConfPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ConfPass.sh # | 4 | # File : ConfPass.sh # |
5 | # Brief : Process the ASR Confidence pass # | 5 | # Brief : Process the ASR Confidence pass # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and hugo Mauchrétien works) # | 8 | # and hugo Mauchrétien works) # |
9 | # Version : 1.0 # | 9 | # Version : 1.0 # |
10 | # Date : 17/06/13 # | 10 | # Date : 17/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | #Check OTMEDIA_HOME env var | 13 | #Check OTMEDIA_HOME env var |
14 | if [ -z ${OTMEDIA_HOME} ] | 14 | if [ -z ${OTMEDIA_HOME} ] |
15 | then | 15 | then |
16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
17 | export OTMEDIA_HOME=$OTMEDIA_HOME | 17 | export OTMEDIA_HOME=$OTMEDIA_HOME |
18 | fi | 18 | fi |
19 | 19 | ||
20 | 20 | ||
21 | # where is ConfPath.sh | 21 | # where is ConfPath.sh |
22 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 22 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
23 | 23 | ||
24 | # Scripts Path | 24 | # Scripts Path |
25 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 25 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
26 | 26 | ||
27 | # Include scripts | 27 | # Include scripts |
28 | . $SCRIPT_PATH"/Tools.sh" | 28 | . $SCRIPT_PATH"/Tools.sh" |
29 | . $SCRIPT_PATH"/CheckConfPass.sh" | 29 | . $SCRIPT_PATH"/CheckConfPass.sh" |
30 | 30 | ||
31 | # where is FirstPass.cfg | 31 | # where is FirstPass.cfg |
32 | CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" | 32 | CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" |
33 | if [ -e $CONFPASS_CONFIG_FILE ] | 33 | if [ -e $CONFPASS_CONFIG_FILE ] |
34 | then | 34 | then |
35 | . $CONFPASS_CONFIG_FILE | 35 | . $CONFPASS_CONFIG_FILE |
36 | else | 36 | else |
37 | echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2 | 37 | echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2 |
38 | exit 1 | 38 | exit 1 |
39 | fi | 39 | fi |
40 | 40 | ||
41 | #---------------# | 41 | #---------------# |
42 | # Parse Options # | 42 | # Parse Options # |
43 | #---------------# | 43 | #---------------# |
44 | while getopts ":hDv:cr" opt | 44 | while getopts ":hDv:cr" opt |
45 | do | 45 | do |
46 | case $opt in | 46 | case $opt in |
47 | h) | 47 | h) |
48 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" | 48 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" |
49 | echo -e "\t Options:" | 49 | echo -e "\t Options:" |
50 | echo -e "\t\t-h :\tprint this message" | 50 | echo -e "\t\t-h :\tprint this message" |
51 | echo -e "\t\t-D :\tDEBUG mode on" | 51 | echo -e "\t\t-D :\tDEBUG mode on" |
52 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 52 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
53 | echo -e "\t\t-c :\t Check process, stop if error detected" | 53 | echo -e "\t\t-c :\t Check process, stop if error detected" |
54 | echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" | 54 | echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" |
55 | exit 1 | 55 | exit 1 |
56 | ;; | 56 | ;; |
57 | D) | 57 | D) |
58 | DEBUG=1 | 58 | DEBUG=1 |
59 | ;; | 59 | ;; |
60 | v) | 60 | v) |
61 | VERBOSE=$OPTARG | 61 | VERBOSE=$OPTARG |
62 | ;; | 62 | ;; |
63 | c) | 63 | c) |
64 | CHECK=1 | 64 | CHECK=1 |
65 | ;; | 65 | ;; |
66 | r) | 66 | r) |
67 | RERUN=1 | 67 | RERUN=1 |
68 | ;; | 68 | ;; |
69 | :) | 69 | :) |
70 | echo "Option -$OPTARG requires an argument." >&2 | 70 | echo "Option -$OPTARG requires an argument." >&2 |
71 | exit 1 | 71 | exit 1 |
72 | ;; | 72 | ;; |
73 | \?) | 73 | \?) |
74 | echo "BAD USAGE : unknow opton -$OPTARG" | 74 | echo "BAD USAGE : unknow opton -$OPTARG" |
75 | #exit 1 | 75 | #exit 1 |
76 | ;; | 76 | ;; |
77 | esac | 77 | esac |
78 | done | 78 | done |
79 | 79 | ||
80 | # mode debug enable | 80 | # mode debug enable |
81 | if [ $DEBUG -eq 1 ] | 81 | if [ $DEBUG -eq 1 ] |
82 | then | 82 | then |
83 | set -x | 83 | set -x |
84 | echo -e "## Mode DEBUG ON ##" | 84 | echo -e "## Mode DEBUG ON ##" |
85 | REDIRECTION_OUTPUT="" | ||
86 | else | ||
87 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
85 | fi | 88 | fi |
86 | 89 | ||
87 | # mode verbose enable | 90 | # mode verbose enable |
88 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | 91 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
89 | 92 | ||
90 | # Check USAGE by arguments number | 93 | # Check USAGE by arguments number |
91 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 94 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
92 | then | 95 | then |
93 | echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" | 96 | echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" |
94 | echo "$0 -h for more info" | 97 | echo "$0 -h for more info" |
95 | exit 1 | 98 | exit 1 |
96 | fi | 99 | fi |
97 | 100 | ||
98 | shift $((OPTIND-1)) | 101 | shift $((OPTIND-1)) |
99 | # check input directory - first argument | 102 | # check input directory - first argument |
100 | if [ ! -e $1 ] | 103 | if [ ! -e $1 ] |
101 | then | 104 | then |
102 | print_error "can't open $1" | 105 | print_error "can't open $1" |
103 | exit 1 | 106 | exit 1 |
104 | fi | 107 | fi |
105 | # check treil input directory - second argument | 108 | # check treil input directory - second argument |
106 | if [ ! -e $1/$2 ] | 109 | if [ ! -e $1/$2 ] |
107 | then | 110 | then |
108 | print_error "can't open $1/$2" | 111 | print_error "can't open $1/$2" |
109 | exit 1 | 112 | exit 1 |
110 | fi | 113 | fi |
111 | 114 | ||
112 | #-------------# | 115 | #-------------# |
113 | # GLOBAL VARS # | 116 | # GLOBAL VARS # |
114 | #-------------# | 117 | #-------------# |
115 | INPUT_DIR=$(readlink -e $1) | 118 | INPUT_DIR=$(readlink -e $1) |
116 | OUTPUT_DIR=$INPUT_DIR | 119 | OUTPUT_DIR=$INPUT_DIR |
117 | BASENAME=$(basename $OUTPUT_DIR) | 120 | BASENAME=$(basename $OUTPUT_DIR) |
118 | RES_NAME=$2 | 121 | RES_NAME=$2 |
119 | RES_P="${INPUT_DIR}/${RES_NAME}" | 122 | RES_P="${INPUT_DIR}/${RES_NAME}" |
120 | USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf | 123 | USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf |
121 | CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" | 124 | CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" |
122 | RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" | 125 | RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" |
123 | LOGFILE="$(dirname $OUTPUT_DIR)/info_conf.log" | 126 | LOGFILE="$(dirname $OUTPUT_DIR)/info_conf.log" |
124 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_conf.log" | 127 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_conf.log" |
125 | 128 | ||
126 | 129 | ||
127 | #------------------# | 130 | #------------------# |
128 | # Create Workspace # | 131 | # Create Workspace # |
129 | #------------------# | 132 | #------------------# |
130 | # Lock directory | 133 | # Lock directory |
131 | if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] | 134 | if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] |
132 | then | 135 | then |
133 | print_info "Confpass lock $INPUT_DIR -> exit" 1 | 136 | print_info "Confpass lock $INPUT_DIR -> exit" 1 |
134 | exit 1 | 137 | exit 1 |
135 | fi | 138 | fi |
136 | rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 | 139 | rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 |
137 | touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 | 140 | touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 |
138 | if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi | 141 | if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi |
139 | if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi | 142 | if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi |
140 | mkdir -p $CONF_DIR | 143 | mkdir -p $CONF_DIR > /dev/null 2>&1 |
141 | mkdir -p $RES_CONF_DIR | 144 | mkdir -p $RES_CONF_DIR > /dev/null 2>&1 |
142 | 145 | ||
143 | #---------------# | 146 | #---------------# |
144 | # Check Pass # | 147 | # Check Pass # |
145 | #---------------# | 148 | #---------------# |
146 | 149 | ||
147 | # if usf contains more than 40% of 0.600 confidence -> usf error | 150 | # if usf contains more than 40% of 0.600 confidence -> usf error |
148 | if [ -s $USF_FILE ] | 151 | if [ -s $USF_FILE ] |
149 | then | 152 | then |
150 | conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") | 153 | conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") |
151 | conftoother=$(grep -c -v -E 'confidence="0.600"' "${USF_FILE}") | 154 | conftoother=$(grep -c -v -E 'confidence="0.600"' "${USF_FILE}") |
152 | if [ $conftoother -gt 0 ] | 155 | if [ $conftoother -gt 0 ] |
153 | then | 156 | then |
154 | pourcentageofzerosix=$((($conftozerosix*100)/$conftoother)) | 157 | pourcentageofzerosix=$((($conftozerosix*100)/$conftoother)) |
155 | if [ $pourcentageofzerosix -gt 40 ] | 158 | if [ $pourcentageofzerosix -gt 40 ] |
156 | then | 159 | then |
157 | print_warn "${USF_FILE} got $pourcentageofzerosix% of 0.6 confidence" 1 | 160 | print_warn "${USF_FILE} got $pourcentageofzerosix% of 0.6 confidence" 1 |
158 | mv "${USF_FILE}" "${USF_FILE}.back" | 161 | mv "${USF_FILE}" "${USF_FILE}.back" |
159 | rm -r $CONF_DIR > /dev/null 2>&1 | 162 | rm -r $CONF_DIR > /dev/null 2>&1 |
160 | else | 163 | else |
161 | print_warn "${USF_FILE} already done, skipping it" 1 | 164 | print_warn "${USF_FILE} already done, skipping it" 1 |
162 | exit 0 | 165 | exit 0 |
163 | fi | 166 | fi |
164 | fi | 167 | fi |
165 | fi | 168 | fi |
166 | 169 | ||
167 | #------# | 170 | #------# |
168 | # Save # | 171 | # Save # |
169 | #------# | 172 | #------# |
170 | cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg | 173 | cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg |
171 | echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg | 174 | echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg |
172 | echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg | 175 | echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg |
173 | 176 | ||
174 | #--------------------# | 177 | #--------------------# |
175 | # CONFIDENCE MEASURE # | 178 | # CONFIDENCE MEASURE # |
176 | #--------------------# | 179 | #--------------------# |
177 | 180 | ||
178 | # Check pourcentage of scored_ctm already done, if < 85% done confidence measure | 181 | # Check pourcentage of scored_ctm already done, if < 85% done confidence measure |
179 | nbres_p=$(ls ${RES_P}/*.treil | wc -l) | 182 | nbres_p=$(ls ${RES_P}/*.treil | wc -l) |
180 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) | 183 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) |
181 | if [ $nbres_p -gt 0 ] | 184 | if [ $nbres_p -gt 0 ] |
182 | then | 185 | then |
183 | pourcentageres=$((($nbconf*100)/$nbres_p)) | 186 | pourcentageres=$((($nbconf*100)/$nbres_p)) |
184 | if [ $pourcentageres -lt 85 ] | 187 | if [ $pourcentageres -lt 85 ] |
185 | then | 188 | then |
186 | print_info "Calcul Confidence $INPUT_DIR $RES_NAME" 1 | 189 | print_info "Calcul Confidence $INPUT_DIR $RES_NAME" 1 |
187 | $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME | 190 | $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME $REDIRECTION_OUTPUT |
191 | |||
188 | else | 192 | else |
189 | print_info "skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 | 193 | print_info "skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 |
190 | fi | 194 | fi |
191 | fi | 195 | fi |
192 | 196 | ||
193 | ### Check scored_ctm number res files ! | 197 | ### Check scored_ctm number res files ! |
194 | if [ $CHECK -eq 1 ] | 198 | if [ $CHECK -eq 1 ] |
195 | then | 199 | then |
196 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) | 200 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) |
197 | if [ $nbres_p -ne $nbconf ];then echo "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" >> $LOGFILE;fi | 201 | if [ $nbres_p -ne $nbconf ];then echo "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" >> $LOGFILE;fi |
198 | fi | 202 | fi |
199 | 203 | ||
200 | #---------------------------# | 204 | #---------------------------# |
201 | # FROM RES WITH CONF => USF # | 205 | # FROM RES WITH CONF => USF # |
202 | #---------------------------# | 206 | #---------------------------# |
203 | for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done | 207 | for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done |
204 | # create USF configuration file | 208 | # create USF configuration file |
205 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg | 209 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg |
206 | # create USF file | 210 | # create USF file |
207 | $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg | 211 | $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg $REDIRECTION_OUTPUT |
208 | rm $OUTPUT_DIR/$BASENAME.usf_cfg | 212 | rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 |
209 | cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE | 213 | cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE |
210 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf | 214 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf |
211 | rm $USF_FILE.tmp | 215 | rm $USF_FILE.tmp > /dev/null 2>&1 |
212 | 216 | ||
213 | #----------------# | 217 | #----------------# |
214 | # Check USF file # | 218 | # Check USF file # |
215 | #----------------# | 219 | #----------------# |
216 | if [ $CHECK -eq 1 ] | 220 | if [ $CHECK -eq 1 ] |
217 | then | 221 | then |
218 | check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" | 222 | check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" |
219 | if [ $? -eq 1 ] | 223 | if [ $? -eq 1 ] |
220 | then | 224 | then |
221 | echo -e "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" >> $ERRORFILE | 225 | echo -e "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" >> $ERRORFILE |
222 | exit 1 | 226 | exit 1 |
223 | fi | 227 | fi |
224 | fi | 228 | fi |
225 | 229 | ||
226 | #-------# | 230 | #-------# |
227 | # CLOSE # | 231 | # CLOSE # |
228 | #-------# | 232 | #-------# |
229 | # Seem OK | 233 | # Seem OK |
230 | print_info "<= End $BASENAME ConfPass | $(date +'%d/%m/%y %H:%M:%S')" 1 | 234 | print_info "<= End $BASENAME ConfPass | $(date +'%d/%m/%y %H:%M:%S')" 1 |
231 | echo -e "ConfPass $BASENAME OK" >> $LOGFILE | 235 | echo -e "ConfPass $BASENAME OK" >> $LOGFILE |
232 | 236 | ||
233 | # unlock directory | 237 | # unlock directory |
234 | mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" | 238 | mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" |
235 | 239 | ||
236 | 240 |
main_tools/ExploitConfidencePass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ExploitConfidencePass.sh # | 4 | # File : ExploitConfidencePass.sh # |
5 | # Brief : Exploit the ASR confidence pass to : # | 5 | # Brief : Exploit the ASR confidence pass to : # |
6 | # -> boost the confident zone # | 6 | # -> boost the confident zone # |
7 | # -> find alternative in non confident zone | 7 | # -> find alternative in non confident zone |
8 | # -> dynamicly extend the lexicon # | 8 | # -> dynamicly extend the lexicon # |
9 | # Author : Jean-François Rey # | 9 | # Author : Jean-François Rey # |
10 | # (base on Emmanuel Ferreira # | 10 | # (base on Emmanuel Ferreira # |
11 | # and Hugo Mauchrétien works) # | 11 | # and Hugo Mauchrétien works) # |
12 | # Version : 1.0 # | 12 | # Version : 1.0 # |
13 | # Date : 25/06/13 # | 13 | # Date : 25/06/13 # |
14 | ##################################################### | 14 | ##################################################### |
15 | 15 | ||
16 | # Check OTMEDIA_HOME env var | 16 | # Check OTMEDIA_HOME env var |
17 | if [ -z ${OTMEDIA_HOME} ] | 17 | if [ -z ${OTMEDIA_HOME} ] |
18 | then | 18 | then |
19 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 19 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
20 | export OTMEDIA_HOME=$OTMEDIA_HOME | 20 | export OTMEDIA_HOME=$OTMEDIA_HOME |
21 | fi | 21 | fi |
22 | 22 | ||
23 | # where is ExploitConfidencePass.sh | 23 | # where is ExploitConfidencePass.sh |
24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
25 | 25 | ||
26 | if [ -z ${SCRIPT_PATH} ] | 26 | if [ -z ${SCRIPT_PATH} ] |
27 | then | 27 | then |
28 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 28 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
29 | fi | 29 | fi |
30 | 30 | ||
31 | # Include scripts | 31 | # Include scripts |
32 | . $SCRIPT_PATH"/Tools.sh" | 32 | . $SCRIPT_PATH"/Tools.sh" |
33 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" | 33 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" |
34 | 34 | ||
35 | # where is ExploitConfidencePass.cfg | 35 | # where is ExploitConfidencePass.cfg |
36 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" | 36 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" |
37 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] | 37 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] |
38 | then | 38 | then |
39 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE | 39 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE |
40 | else | 40 | else |
41 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 | 41 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 |
42 | exit 1 | 42 | exit 1 |
43 | fi | 43 | fi |
44 | 44 | ||
45 | #---------------# | 45 | #---------------# |
46 | # Parse Options # | 46 | # Parse Options # |
47 | #---------------# | 47 | #---------------# |
48 | while getopts ":hDv:cf:r" opt | 48 | while getopts ":hDv:cf:r" opt |
49 | do | 49 | do |
50 | case $opt in | 50 | case $opt in |
51 | h) | 51 | h) |
52 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" | 52 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" |
53 | echo -e "\t Options:" | 53 | echo -e "\t Options:" |
54 | echo -e "\t\t-h :\tprint this message" | 54 | echo -e "\t\t-h :\tprint this message" |
55 | echo -e "\t\t-D :\tDEBUG mode on" | 55 | echo -e "\t\t-D :\tDEBUG mode on" |
56 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 56 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
57 | echo -e "\t\t-c :\tCheck process, stop if error detected" | 57 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
58 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 58 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
59 | echo -e "\t\t-r n :\tforce rerun without deleting files" | 59 | echo -e "\t\t-r n :\tforce rerun without deleting files" |
60 | exit 1 | 60 | exit 1 |
61 | ;; | 61 | ;; |
62 | D) | 62 | D) |
63 | DEBUG=1 | 63 | DEBUG=1 |
64 | ;; | 64 | ;; |
65 | v) | 65 | v) |
66 | VERBOSE=$OPTARG | 66 | VERBOSE=$OPTARG |
67 | ;; | 67 | ;; |
68 | c) | 68 | c) |
69 | CHECK=1 | 69 | CHECK=1 |
70 | ;; | 70 | ;; |
71 | f) | 71 | f) |
72 | FORKS="--forks $OPTARG" | 72 | FORKS="--forks $OPTARG" |
73 | ;; | 73 | ;; |
74 | r) | 74 | r) |
75 | RERUN=1 | 75 | RERUN=1 |
76 | ;; | 76 | ;; |
77 | :) | 77 | :) |
78 | echo "Option -$OPTARG requires an argument." >&2 | 78 | echo "Option -$OPTARG requires an argument." >&2 |
79 | exit 1 | 79 | exit 1 |
80 | ;; | 80 | ;; |
81 | \?) | 81 | \?) |
82 | echo "BAD USAGE : unknow opton -$OPTARG" | 82 | echo "BAD USAGE : unknow opton -$OPTARG" |
83 | #exit 1 | 83 | #exit 1 |
84 | ;; | 84 | ;; |
85 | esac | 85 | esac |
86 | done | 86 | done |
87 | 87 | ||
88 | # mode debug enable | 88 | # mode debug enable |
89 | if [ $DEBUG -eq 1 ] | 89 | if [ $DEBUG -eq 1 ] |
90 | then | 90 | then |
91 | set -x | 91 | set -x |
92 | echo -e "## Mode DEBUG ON ##" | 92 | echo -e "## Mode DEBUG ON ##" |
93 | REDIRECTION_OUTPUT="" | ||
94 | else | ||
95 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
93 | fi | 96 | fi |
94 | 97 | ||
95 | # mode verbose enable | 98 | # mode verbose enable |
96 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | 99 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
97 | 100 | ||
98 | # Check USAGE by arguments number | 101 | # Check USAGE by arguments number |
99 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 102 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
100 | then | 103 | then |
101 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" | 104 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" |
102 | echo "$0 -h for more info" | 105 | echo "$0 -h for more info" |
103 | exit 1 | 106 | exit 1 |
104 | fi | 107 | fi |
105 | 108 | ||
106 | shift $((OPTIND-1)) | 109 | shift $((OPTIND-1)) |
107 | # check input directory - first argument | 110 | # check input directory - first argument |
108 | if [ ! -e $1 ] | 111 | if [ ! -e $1 ] |
109 | then | 112 | then |
110 | print_error "can't open $1" | 113 | print_error "can't open $1" |
111 | exit 1 | 114 | exit 1 |
112 | fi | 115 | fi |
113 | 116 | ||
114 | #-------------# | 117 | #-------------# |
115 | # GLOBAL VARS # | 118 | # GLOBAL VARS # |
116 | #-------------# | 119 | #-------------# |
117 | INPUT_DIR=$(readlink -e $1) | 120 | INPUT_DIR=$(readlink -e $1) |
118 | OUTPUT_DIR=$INPUT_DIR | 121 | OUTPUT_DIR=$INPUT_DIR |
119 | BASENAME=$(basename $OUTPUT_DIR) | 122 | BASENAME=$(basename $OUTPUT_DIR) |
120 | SHOW_DIR="$OUTPUT_DIR/shows/" | 123 | SHOW_DIR="$OUTPUT_DIR/shows/" |
121 | SOLR_RES="$OUTPUT_DIR/solr/" | 124 | SOLR_RES="$OUTPUT_DIR/solr/" |
122 | EXT_LEX="$OUTPUT_DIR/LEX/" | 125 | EXT_LEX="$OUTPUT_DIR/LEX/" |
123 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" | 126 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" |
124 | LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" | 127 | LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" |
125 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" | 128 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" |
126 | 129 | ||
127 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" | 130 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" |
128 | if [ -e $CONFPASS_CONFIG_FILE ] | 131 | if [ -e $CONFPASS_CONFIG_FILE ] |
129 | then | 132 | then |
130 | { | 133 | { |
131 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") | 134 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") |
132 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") | 135 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") |
133 | print_warn "Use confidence measure from : $RES_CONF" 1 | 136 | print_warn "Use confidence measure from : $RES_CONF" 1 |
134 | } | 137 | } |
135 | else | 138 | else |
136 | { | 139 | { |
137 | print_error "Can't find $CONFPASS_CONFIG_FILE" 1 | 140 | print_error "Can't find $CONFPASS_CONFIG_FILE" 1 |
138 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" | 141 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" |
139 | RES_CONF="$INPUT_DIR/conf/res_p2" | 142 | RES_CONF="$INPUT_DIR/conf/res_p2" |
140 | } | 143 | } |
141 | fi | 144 | fi |
142 | 145 | ||
143 | mkdir -p $SHOW_DIR | 146 | mkdir -p $SHOW_DIR > /dev/null 2>&1 |
144 | mkdir -p $SOLR_RES | 147 | mkdir -p $SOLR_RES > /dev/null 2>&1 |
145 | mkdir -p $EXT_LEX | 148 | mkdir -p $EXT_LEX > /dev/null 2>&1 |
146 | mkdir -p $TRIGGER_CONFZONE | 149 | mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 |
147 | 150 | ||
148 | #------------------# | 151 | #------------------# |
149 | # Create Workspace # | 152 | # Create Workspace # |
150 | #------------------# | 153 | #------------------# |
151 | # Lock directory | 154 | # Lock directory |
152 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi | 155 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi |
153 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 | 156 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 |
154 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 | 157 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 |
155 | 158 | ||
156 | #------# | 159 | #------# |
157 | # Save # | 160 | # Save # |
158 | #------# | 161 | #------# |
159 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg | 162 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg |
160 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg | 163 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg |
161 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg | 164 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg |
162 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg | 165 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg |
163 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg | 166 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg |
164 | 167 | ||
165 | 168 | ||
166 | #-----------------------# | 169 | #-----------------------# |
167 | # Segmentation by show # | 170 | # Segmentation by show # |
168 | #-----------------------# | 171 | #-----------------------# |
169 | # create txt file from scored res | 172 | # create txt file from scored res |
170 | # tag pos and lemmatization of the txt file | 173 | # tag pos and lemmatization of the txt file |
171 | # merge the scored res and taglem file | 174 | # merge the scored res and taglem file |
172 | # segment using the last generated file | 175 | # segment using the last generated file |
173 | # and create a ctm file by show | 176 | # and create a ctm file by show |
174 | 177 | ||
175 | print_info "Segmentation by show" 1 | 178 | print_info "Segmentation by show" 1 |
176 | 179 | ||
177 | # -> to txt | 180 | # -> to txt |
178 | print_info "Create txt from scored res" 2 | 181 | print_info "Create txt from scored res" 2 |
179 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm | 182 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm |
180 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp | 183 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp |
181 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt | 184 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt |
182 | 185 | ||
183 | # -> to tagger + lemme | 186 | # -> to tagger + lemme |
184 | print_info "Tag pos and lem in txt file" 2 | 187 | print_info "Tag pos and lem in txt file" 2 |
185 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp | 188 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp |
186 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem | 189 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem |
187 | 190 | ||
188 | # merge sctm and taglem | 191 | # merge sctm and taglem |
189 | print_info "Merge scored ctm with tag pos and lem file" 2 | 192 | print_info "Merge scored ctm with tag pos and lem file" 2 |
190 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl | 193 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl |
191 | 194 | ||
192 | # -> new seg | 195 | # -> new seg |
193 | print_info "Create xml file and run Topic Seg" 2 | 196 | print_info "Create xml file and run Topic Seg" 2 |
194 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml | 197 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml |
195 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem | 198 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem |
196 | 199 | ||
197 | # Lia_topic_seg : bring together sentences into show | 200 | # Lia_topic_seg : bring together sentences into show |
198 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml | 201 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml |
199 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg | 202 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg |
200 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg | 203 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg |
201 | rm 0.xml $INPUT_DIR/show.seg | 204 | rm 0.xml $INPUT_DIR/show.seg |
202 | 205 | ||
203 | if [ $CHECK -eq 1 ] | 206 | if [ $CHECK -eq 1 ] |
204 | then | 207 | then |
205 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi | 208 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi |
206 | fi | 209 | fi |
207 | 210 | ||
208 | # Segment ctm into several show files and create a seg list by show | 211 | # Segment ctm into several show files and create a seg list by show |
209 | print_info "Segment ctm into show files and a seg list by show" 2 | 212 | print_info "Segment ctm into show files and a seg list by show" 2 |
210 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR | 213 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR $REDIRECTION_OUTPUT |
211 | 214 | ||
212 | #-----------------------------------------------------------# | 215 | #-----------------------------------------------------------# |
213 | # SOLR QUERIES # | 216 | # SOLR QUERIES # |
214 | # -> Create Confidente Word # | 217 | # -> Create Confidente Word # |
215 | # Keep conf words and use Tags # | 218 | # Keep conf words and use Tags # |
216 | # -> Query SOLR (document & multimedia) # | 219 | # -> Query SOLR (document & multimedia) # |
217 | # concat word + add date 2 day before and after the show # | 220 | # concat word + add date 2 day before and after the show # |
218 | # query document & multimedia # | 221 | # query document & multimedia # |
219 | #-----------------------------------------------------------# | 222 | #-----------------------------------------------------------# |
220 | print_info "Create SOLR queries and ASK SOLR" 1 | 223 | print_info "Create SOLR queries and ASK SOLR" 1 |
221 | for show in $(ls $SHOW_DIR/*.ctm) | 224 | for show in $(ls $SHOW_DIR/*.ctm) |
222 | do | 225 | do |
223 | bn=$(basename $show .ctm) | 226 | bn=$(basename $show .ctm) |
224 | # Remove words with low confidence and keep useful tagger words | 227 | # Remove words with low confidence and keep useful tagger words |
225 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" | 228 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" |
226 | # Get date 2 day before and after the show | 229 | # Get date 2 day before and after the show |
227 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` | 230 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` |
228 | # Create SOLR queries | 231 | # Create SOLR queries |
229 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" | 232 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" |
230 | # Ask SOLR DB | 233 | # Ask SOLR DB |
231 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then | 234 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then |
232 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" | 235 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" |
233 | echo $query > $SHOW_DIR/$bn.queries | 236 | echo $query > $SHOW_DIR/$bn.queries |
234 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp | 237 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp $REDIRECTION_OUTPUT |
235 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords | 238 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords |
236 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt | 239 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt |
237 | rm $SOLR_RES/*.tmp | 240 | rm $SOLR_RES/*.tmp > /dev/null 2>&1 |
238 | fi | 241 | fi |
239 | 242 | ||
240 | if [ $CHECK -eq 1 ] | 243 | if [ $CHECK -eq 1 ] |
241 | then | 244 | then |
242 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] | 245 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] |
243 | then | 246 | then |
244 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 | 247 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 |
245 | fi | 248 | fi |
246 | fi | 249 | fi |
247 | 250 | ||
248 | done | 251 | done |
249 | 252 | ||
250 | #----------------------------------------------------------------------------------------------- | 253 | #----------------------------------------------------------------------------------------------- |
251 | # Build trigger file | 254 | # Build trigger file |
252 | # 1) keywords are automatically boosted in the non confident zone of the current res | 255 | # 1) keywords are automatically boosted in the non confident zone of the current res |
253 | # confident zone are boosted | 256 | # confident zone are boosted |
254 | # previous words in sensible zone are penalized | 257 | # previous words in sensible zone are penalized |
255 | # 2) OOVs are extracted + phonetized | 258 | # 2) OOVs are extracted + phonetized |
256 | # 3) Try to find OOVs acousticly in the current segment | 259 | # 3) Try to find OOVs acousticly in the current segment |
257 | # 4) Generate the .trigg file | 260 | # 4) Generate the .trigg file |
258 | #------------------------------------------------------------------------------------------------ | 261 | #------------------------------------------------------------------------------------------------ |
259 | print_info "Build trigger files" 1 | 262 | print_info "Build trigger files" 1 |
260 | for i in `ls $SOLR_RES/*.keywords` | 263 | for i in `ls $SOLR_RES/*.keywords` |
261 | do | 264 | do |
262 | basename=`basename $i .keywords` | 265 | basename=`basename $i .keywords` |
263 | 266 | ||
264 | # | 267 | # |
265 | # Tokenize & produce coverage report | 268 | # Tokenize & produce coverage report |
266 | # Use filter you need | 269 | # Use filter you need |
267 | # | 270 | # |
268 | print_info "keywords filtering and produce coverage report" 2 | 271 | print_info "keywords filtering and produce coverage report" 2 |
269 | # Default filter | 272 | # Default filter |
270 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ | 273 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ |
271 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 274 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
272 | # do less filter | 275 | # do less filter |
273 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 276 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
274 | 277 | ||
275 | 278 | ||
276 | # | 279 | # |
277 | # Extract "real" OOV and phonetize them | 280 | # Extract "real" OOV and phonetize them |
278 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits | 281 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits |
279 | # | 282 | # |
280 | print_info "Extract OOV and phonetize them" 2 | 283 | print_info "Extract OOV and phonetize them" 2 |
281 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov | 284 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov |
282 | 285 | ||
283 | # | 286 | # |
284 | # Search INVOC & OOV in the current lattice | 287 | # Search INVOC & OOV in the current lattice |
285 | # | 288 | # |
286 | print_info "Search INVOC and OOV in the current lattice" 2 | 289 | print_info "Search INVOC and OOV in the current lattice" 2 |
287 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch | 290 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch |
288 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch | 291 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch |
289 | 292 | ||
290 | # For each treil | 293 | # For each treil |
291 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 294 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
292 | do | 295 | do |
293 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound | 296 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION |
294 | # | 297 | # |
295 | # Produce the boost file for the next decoding pass | 298 | # Produce the boost file for the next decoding pass |
296 | # | 299 | # |
297 | print_info "Produce trigg file : $baseseg " 3 | 300 | print_info "Produce trigg file : $baseseg " 3 |
298 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg | 301 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg |
299 | done | 302 | done |
300 | 303 | ||
301 | done | 304 | done |
302 | 305 | ||
303 | #----------------------------------------------------------------------------------------------- | 306 | #----------------------------------------------------------------------------------------------- |
304 | # Build the extended SPEERAL Lexicon | 307 | # Build the extended SPEERAL Lexicon |
305 | # 1) Merge OOVs + LEXICON | 308 | # 1) Merge OOVs + LEXICON |
306 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) | 309 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) |
307 | # 2) The current lexicon is extended with all the valid OOVs | 310 | # 2) The current lexicon is extended with all the valid OOVs |
308 | #----------------------------------------------------------------------------------------------- | 311 | #----------------------------------------------------------------------------------------------- |
309 | print_info "Build extended Speeral Lexicon" 1 | 312 | print_info "Build extended Speeral Lexicon" 1 |
310 | mkdir -p $EXT_LEX/final | 313 | mkdir -p $EXT_LEX/final |
311 | mkdir -p $EXT_LEX/tmp | 314 | mkdir -p $EXT_LEX/tmp |
312 | mkdir -p $EXT_LEX/tmp/txt | 315 | mkdir -p $EXT_LEX/tmp/txt |
313 | # | 316 | # |
314 | # Collect the acousticly found oov and their phonetisation | 317 | # Collect the acousticly found oov and their phonetisation |
315 | # | 318 | # |
316 | print_info "Get all OOV and retrieve all phonetisation" 2 | 319 | print_info "Get all OOV and retrieve all phonetisation" 2 |
317 | for i in `ls $SOLR_RES/*.phon_oov` | 320 | for i in `ls $SOLR_RES/*.phon_oov` |
318 | do | 321 | do |
319 | basename=`basename $i .phon_oov` | 322 | basename=`basename $i .phon_oov` |
320 | 323 | ||
321 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null | 324 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null |
322 | # list acousticly found for the show | 325 | # list acousticly found for the show |
323 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 326 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
324 | do | 327 | do |
325 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound | 328 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound |
326 | done | 329 | done |
327 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp | 330 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp |
328 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound | 331 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound |
329 | 332 | ||
330 | # | 333 | # |
331 | # Extract OOV really added | 334 | # Extract OOV really added |
332 | # | 335 | # |
333 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov | 336 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov |
334 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound | 337 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound $REDIRECTION_OUTPUT |
335 | # | 338 | # |
336 | # Retrieve all phonetisation | 339 | # Retrieve all phonetisation |
337 | # | 340 | # |
338 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon | 341 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon |
339 | done | 342 | done |
340 | 343 | ||
341 | # | 344 | # |
342 | # Merge OOVs and their phonetisation | 345 | # Merge OOVs and their phonetisation |
343 | # | 346 | # |
344 | print_info "Merge OOV and their phonetisation" 2 | 347 | print_info "Merge OOV and their phonetisation" 2 |
345 | lexname=$(basename $LEXICON) | 348 | lexname=$(basename $LEXICON) |
346 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon | 349 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon |
347 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound | 350 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound |
348 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon | 351 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon $REDIRECTION_OUTPUT |
349 | 352 | ||
350 | # | 353 | # |
351 | # Collect + clean retrieved txt | 354 | # Collect + clean retrieved txt |
352 | # | 355 | # |
353 | print_info "Collect and clean SOLR txt answers" 2 | 356 | print_info "Collect and clean SOLR txt answers" 2 |
354 | # choose filter | 357 | # choose filter |
355 | # default | 358 | # default |
356 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt | 359 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt |
357 | # low filter | 360 | # low filter |
358 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt | 361 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt |
359 | 362 | ||
360 | # | 363 | # |
361 | # Construct the map file | 364 | # Construct the map file |
362 | # | 365 | # |
363 | # Notes: | 366 | # Notes: |
364 | # - Expected format : | 367 | # - Expected format : |
365 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> | 368 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> |
366 | # | 369 | # |
367 | print_info "Construct map file" 2 | 370 | print_info "Construct map file" 2 |
368 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null | 371 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null |
369 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null | 372 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null |
370 | 373 | ||
371 | while read oov | 374 | while read oov |
372 | do | 375 | do |
373 | oov=`echo $oov | sed "s/\n//g"` | 376 | oov=`echo $oov | sed "s/\n//g"` |
374 | # | 377 | # |
375 | # Obtain the oov's tag | 378 | # Obtain the oov's tag |
376 | # | 379 | # |
377 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` | 380 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` |
378 | # | 381 | # |
379 | # Try to collect text containing the oov word | 382 | # Try to collect text containing the oov word |
380 | # | 383 | # |
381 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt | 384 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt |
382 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then | 385 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then |
383 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 386 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
384 | if [ $nbWords -eq 0 ]; then | 387 | if [ $nbWords -eq 0 ]; then |
385 | echo "UNVALID OOV: $oov => $nbWords occurrences" | 388 | echo "UNVALID OOV: $oov => $nbWords occurrences" |
386 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 389 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
387 | else | 390 | else |
388 | # | 391 | # |
389 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected | 392 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected |
390 | # | 393 | # |
391 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" | 394 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" |
392 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 395 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
393 | if [ ! x$candidate = "x" ]; then | 396 | if [ ! x$candidate = "x" ]; then |
394 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon | 397 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon |
395 | while read phonLine | 398 | while read phonLine |
396 | do | 399 | do |
397 | #<word> <phon> => <word> <candidate> <phon> | 400 | #<word> <phon> => <word> <candidate> <phon> |
398 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map | 401 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map |
399 | done < $EXT_LEX/tmp/$oov.phon | 402 | done < $EXT_LEX/tmp/$oov.phon |
400 | else | 403 | else |
401 | echo "UNVALID OOV: $oov => no availaible Candidate word in LM" | 404 | echo "UNVALID OOV: $oov => no availaible Candidate word in LM" |
402 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 405 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
403 | fi | 406 | fi |
404 | fi | 407 | fi |
405 | else | 408 | else |
406 | echo "UNVALID OOV: $oov" | 409 | echo "UNVALID OOV: $oov" |
407 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 410 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
408 | fi | 411 | fi |
409 | done < $EXT_LEX/final/all.oov_acousticlyfound | 412 | done < $EXT_LEX/final/all.oov_acousticlyfound |
410 | 413 | ||
411 | # | 414 | # |
412 | ### Speeral | 415 | ### Speeral |
413 | # | 416 | # |
414 | 417 | ||
415 | lexname=`basename $LEXICON` | 418 | lexname=`basename $LEXICON` |
416 | # | 419 | # |
417 | # Build the final trigger file | 420 | # Build the final trigger file |
418 | # | 421 | # |
419 | print_info "Clean trigg files" 2 | 422 | print_info "Clean trigg files" 2 |
420 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null | 423 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null |
421 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null | 424 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null |
422 | for i in `ls $TRIGGER_CONFZONE/*.trigg` | 425 | for i in `ls $TRIGGER_CONFZONE/*.trigg` |
423 | do | 426 | do |
424 | basename=`basename $i .trigg` | 427 | basename=`basename $i .trigg` |
425 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg | 428 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg |
426 | done | 429 | done |
427 | # | 430 | # |
428 | # Compile the speeral extended lexicon | 431 | # Compile the speeral extended lexicon |
429 | # | 432 | # |
430 | print_info "Compile Speeral extended lexicon" 2 | 433 | print_info "Compile Speeral extended lexicon" 2 |
431 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext | 434 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext $REDIRECTION_OUTPUT |
432 | 435 | ||
433 | if [ $CHECK -eq 1 ] | 436 | if [ $CHECK -eq 1 ] |
434 | then | 437 | then |
435 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" | 438 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" |
436 | if [ $? -eq 1 ] | 439 | if [ $? -eq 1 ] |
437 | then | 440 | then |
438 | echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE | 441 | echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE |
439 | exit 1; | 442 | exit 1; |
440 | fi | 443 | fi |
441 | fi | 444 | fi |
442 | 445 | ||
443 | 446 | ||
444 | #-------# | 447 | #-------# |
445 | # CLOSE # | 448 | # CLOSE # |
446 | #-------# | 449 | #-------# |
447 | # Seem OK | 450 | # Seem OK |
448 | print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 | 451 | print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 |
449 | echo -e "#Solr $BASENAME " >> $LOGFILE | 452 | echo -e "#Solr $BASENAME " >> $LOGFILE |
450 | 453 | ||
451 | # unlock directory | 454 | # unlock directory |
452 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" | 455 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" |
453 | 456 | ||
454 | 457 | ||
455 | 458 |
main_tools/FirstPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : FirstPass.sh # | 4 | # File : FirstPass.sh # |
5 | # Brief : ASR first pass and speaker diarization # | 5 | # Brief : ASR first pass and speaker diarization # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and Hugo Mauchrétien works) # | 8 | # and Hugo Mauchrétien works) # |
9 | # Version : 1.1 # | 9 | # Version : 1.1 # |
10 | # Date : 18/06/13 # | 10 | # Date : 18/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | # Check OTMEDIA_HOME env var | 13 | # Check OTMEDIA_HOME env var |
14 | if [ -z ${OTMEDIA_HOME} ] | 14 | if [ -z ${OTMEDIA_HOME} ] |
15 | then | 15 | then |
16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
17 | export OTMEDIA_HOME=$OTMEDIA_HOME | 17 | export OTMEDIA_HOME=$OTMEDIA_HOME |
18 | fi | 18 | fi |
19 | 19 | ||
20 | # where is FirstPass.sh | 20 | # where is FirstPass.sh |
21 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 21 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
22 | 22 | ||
23 | # scripts path | 23 | # scripts path |
24 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 24 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
25 | 25 | ||
26 | # Include scripts | 26 | # Include scripts |
27 | . $SCRIPT_PATH"/Tools.sh" | 27 | . $SCRIPT_PATH"/Tools.sh" |
28 | . $SCRIPT_PATH"/CheckFirstPass.sh" | 28 | . $SCRIPT_PATH"/CheckFirstPass.sh" |
29 | 29 | ||
30 | # where is FirstPass.cfg | 30 | # where is FirstPass.cfg |
31 | FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" | 31 | FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" |
32 | if [ -e $FIRSTPASS_CONFIG_FILE ] | 32 | if [ -e $FIRSTPASS_CONFIG_FILE ] |
33 | then | 33 | then |
34 | . $FIRSTPASS_CONFIG_FILE | 34 | . $FIRSTPASS_CONFIG_FILE |
35 | else | 35 | else |
36 | echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2 | 36 | echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2 |
37 | exit 1 | 37 | exit 1 |
38 | fi | 38 | fi |
39 | 39 | ||
40 | #---------------# | 40 | #---------------# |
41 | # Parse Options # | 41 | # Parse Options # |
42 | #---------------# | 42 | #---------------# |
43 | while getopts ":hDv:cf:r" opt | 43 | while getopts ":hDv:cf:r" opt |
44 | do | 44 | do |
45 | case $opt in | 45 | case $opt in |
46 | h) | 46 | h) |
47 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" | 47 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" |
48 | echo -e "\t Options:" | 48 | echo -e "\t Options:" |
49 | echo -e "\t\t-h :\tprint this message" | 49 | echo -e "\t\t-h :\tprint this message" |
50 | echo -e "\t\t-D :\tDEBUG mode on" | 50 | echo -e "\t\t-D :\tDEBUG mode on" |
51 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 51 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
52 | echo -e "\t\t-c :\tCheck process, stop if error detected" | 52 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
53 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 53 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
54 | echo -e "\t\t-r :\tforce rerun the wav file" | 54 | echo -e "\t\t-r :\tforce rerun the wav file" |
55 | exit 1 | 55 | exit 1 |
56 | ;; | 56 | ;; |
57 | D) | 57 | D) |
58 | DEBUG=1 | 58 | DEBUG=1 |
59 | ;; | 59 | ;; |
60 | v) | 60 | v) |
61 | VERBOSE=$OPTARG | 61 | VERBOSE=$OPTARG |
62 | ;; | 62 | ;; |
63 | c) | 63 | c) |
64 | CHECK=1 | 64 | CHECK=1 |
65 | ;; | 65 | ;; |
66 | f) | 66 | f) |
67 | FORKS="--forks $OPTARG" | 67 | FORKS="--forks $OPTARG" |
68 | ;; | 68 | ;; |
69 | r) | 69 | r) |
70 | RERUN=1 | 70 | RERUN=1 |
71 | ;; | 71 | ;; |
72 | :) | 72 | :) |
73 | echo "Option -$OPTARG requires an argument." >&2 | 73 | echo "Option -$OPTARG requires an argument." >&2 |
74 | exit 1 | 74 | exit 1 |
75 | ;; | 75 | ;; |
76 | \?) | 76 | \?) |
77 | echo "BAD USAGE : unknow opton -$OPTARG" | 77 | echo "BAD USAGE : unknow opton -$OPTARG" |
78 | exit 1 | 78 | exit 1 |
79 | ;; | 79 | ;; |
80 | esac | 80 | esac |
81 | done | 81 | done |
82 | 82 | ||
83 | # mode debug enable | 83 | # mode debug enable |
84 | if [ $DEBUG -eq 1 ] | 84 | if [ $DEBUG -eq 1 ] |
85 | then | 85 | then |
86 | set -x | 86 | set -x |
87 | echo -e "## Mode DEBUG ON ##" | 87 | echo -e "## Mode DEBUG ON ##" |
88 | REDIRECTION_OUTPUT="" | 88 | REDIRECTION_OUTPUT="" |
89 | else | 89 | else |
90 | REDIRECTION_OUTPUT=" > /dev/null 2>&1" | 90 | REDIRECTION_OUTPUT=" 2> /dev/null" |
91 | fi | 91 | fi |
92 | 92 | ||
93 | # mode verbose enable | 93 | # mode verbose enable |
94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; REDIRECTION_OUTPUT=" 2> /dev/null"; fi | 94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
95 | 95 | ||
96 | # Check USAGE by arguments number | 96 | # Check USAGE by arguments number |
97 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 97 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
98 | then | 98 | then |
99 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" | 99 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" |
100 | echo "$0 -h for more info" | 100 | echo "$0 -h for more info" |
101 | exit 1 | 101 | exit 1 |
102 | fi | 102 | fi |
103 | 103 | ||
104 | shift $((OPTIND-1)) | 104 | shift $((OPTIND-1)) |
105 | # check audio file - First argument | 105 | # check audio file - First argument |
106 | if [ -e $1 ] && [ -s $1 ] | 106 | if [ -e $1 ] && [ -s $1 ] |
107 | then | 107 | then |
108 | # absolute path to wav file | 108 | # absolute path to wav file |
109 | WAV_FILE=$(readlink -e $1) | 109 | WAV_FILE=$(readlink -e $1) |
110 | # wav filename | 110 | # wav filename |
111 | FILENAME=$(basename $WAV_FILE) | 111 | FILENAME=$(basename $WAV_FILE) |
112 | # wav filename without extension | 112 | # wav filename without extension |
113 | BASENAME=${FILENAME%.*} | 113 | BASENAME=${FILENAME%.*} |
114 | 114 | ||
115 | print_info "=> $BASENAME P1 | $(date +'%d/%m/%y %H:%M:%S')" 1 | 115 | print_info "=> $BASENAME P1 | $(date +'%d/%m/%y %H:%M:%S')" 1 |
116 | print_info "$WAV_FILE OK" 1 | 116 | print_info "$WAV_FILE OK" 1 |
117 | else | 117 | else |
118 | print_error "can't find $1 OR file is empty" | 118 | print_error "can't find $1 OR file is empty" |
119 | exit 1 | 119 | exit 1 |
120 | fi | 120 | fi |
121 | 121 | ||
122 | # check output directory - Second argument | 122 | # check output directory - Second argument |
123 | if [ ! -e $2 ] | 123 | if [ ! -e $2 ] |
124 | then | 124 | then |
125 | mkdir -p $2 | 125 | mkdir -p $2 |
126 | print_info "Make directory $2" 1 | 126 | print_info "Make directory $2" 1 |
127 | fi | 127 | fi |
128 | 128 | ||
129 | 129 | ||
130 | #-------------# | 130 | #-------------# |
131 | # GLOBAL VARS # | 131 | # GLOBAL VARS # |
132 | #-------------# | 132 | #-------------# |
133 | OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path | 133 | OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path |
134 | OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME | 134 | OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME |
135 | PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file | 135 | PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file |
136 | PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory | 136 | PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory |
137 | SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file | 137 | SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file |
138 | LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file | 138 | LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file |
139 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" | 139 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" |
140 | LOGFILE="$OUTPUT_DIR/info_p1.log" | 140 | LOGFILE="$OUTPUT_DIR/info_p1.log" |
141 | ERRORFILE="$OUTPUT_DIR/error_p1.log" | 141 | ERRORFILE="$OUTPUT_DIR/error_p1.log" |
142 | 142 | ||
143 | #------------------# | 143 | #------------------# |
144 | # Create WORKSPACE # | 144 | # Create WORKSPACE # |
145 | #------------------# | 145 | #------------------# |
146 | if [ ! -e $OUTPUT_DIR_BASENAME ] | 146 | if [ ! -e $OUTPUT_DIR_BASENAME ] |
147 | then | 147 | then |
148 | mkdir -p $OUTPUT_DIR_BASENAME | 148 | mkdir -p $OUTPUT_DIR_BASENAME |
149 | print_info "Make directory $OUTPUT_DIR_BASENAME" 1 | 149 | print_info "Make directory $OUTPUT_DIR_BASENAME" 1 |
150 | fi | 150 | fi |
151 | 151 | ||
152 | # Lock directory | 152 | # Lock directory |
153 | if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi | 153 | if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi |
154 | rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 | 154 | rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 |
155 | touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 | 155 | touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 |
156 | 156 | ||
157 | rm -r $PLP_DIR > /dev/null 2>&1; | 157 | rm -r $PLP_DIR > /dev/null 2>&1; |
158 | mkdir -p $PLP_DIR | 158 | mkdir -p $PLP_DIR |
159 | print_info "Make directory $PLP_DIR" 1 | 159 | print_info "Make directory $PLP_DIR" 1 |
160 | if [ $RERUN -eq 0 ]; | 160 | if [ $RERUN -eq 0 ]; |
161 | then | 161 | then |
162 | rm -r $RES_DIR > /dev/null 2>&1; | 162 | rm -r $RES_DIR > /dev/null 2>&1; |
163 | else | 163 | else |
164 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 164 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
165 | fi | 165 | fi |
166 | mkdir -p $RES_DIR $REDIRECTION_OUTPUT | 166 | mkdir -p $RES_DIR > /dev/null 2>&1 |
167 | print_info "Make directory $RES_DIR" 1 | 167 | print_info "Make directory $RES_DIR" 1 |
168 | 168 | ||
169 | #--------------------# | 169 | #--------------------# |
170 | # Save configuration # | 170 | # Save configuration # |
171 | #--------------------# | 171 | #--------------------# |
172 | cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg | 172 | cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg |
173 | echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 173 | echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
174 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 174 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
175 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 175 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
176 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 176 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
177 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 177 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
178 | echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 178 | echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
179 | echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 179 | echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
180 | echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 180 | echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
181 | echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 181 | echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
182 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 182 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
183 | print_info "save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 | 183 | print_info "save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 |
184 | 184 | ||
185 | #-------------------------# | 185 | #-------------------------# |
186 | # Check Audio File Format # | 186 | # Check Audio File Format # |
187 | #-------------------------# | 187 | #-------------------------# |
188 | error=0 | 188 | error=0 |
189 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") | 189 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") |
190 | if [ -z "$temp" ]; then error=1; fi | 190 | if [ -z "$temp" ]; then error=1; fi |
191 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") | 191 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") |
192 | if [ -z "$temp" ]; then error=1; fi | 192 | if [ -z "$temp" ]; then error=1; fi |
193 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") | 193 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") |
194 | if [ -z "$temp" ]; then error=1; fi | 194 | if [ -z "$temp" ]; then error=1; fi |
195 | 195 | ||
196 | if [ $error -eq 1 ] | 196 | if [ $error -eq 1 ] |
197 | then | 197 | then |
198 | print_message $WARNING 2 "$WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" | 198 | print_message $WARNING 2 "$WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" |
199 | print_message $INFO 3 "avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" | 199 | print_message $INFO 3 "avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" |
200 | avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT | 200 | avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT |
201 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav | 201 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav |
202 | FILENAME=$BASENAME.wav | 202 | FILENAME=$BASENAME.wav |
203 | print_message $INFO 1 "new wav file : $WAV_FILE" | 203 | print_message $INFO 1 "new wav file : $WAV_FILE" |
204 | fi | 204 | fi |
205 | 205 | ||
206 | #---------------# | 206 | #---------------# |
207 | # Get SRT file # | 207 | # Get SRT file # |
208 | #---------------# | 208 | #---------------# |
209 | if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] | 209 | if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] |
210 | then | 210 | then |
211 | cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT | 211 | cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT |
212 | print_info "copy $BASENAME.SRT file into workingspace" 1 | 212 | print_info "copy $BASENAME.SRT file into workingspace" 1 |
213 | fi | 213 | fi |
214 | 214 | ||
215 | #------------# | 215 | #------------# |
216 | # WAV -> PLP # | 216 | # WAV -> PLP # |
217 | #------------# | 217 | #------------# |
218 | print_info "convert WAV -> PLP" 1 | 218 | print_info "convert WAV -> PLP" 1 |
219 | echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp | 219 | echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp |
220 | print_info "$BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms | 220 | print_info "$BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms |
221 | " 2 | 221 | " 2 |
222 | 222 | ||
223 | $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT | 223 | $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT |
224 | 224 | ||
225 | if [ $CHECK -eq 1 ] | 225 | if [ $CHECK -eq 1 ] |
226 | then | 226 | then |
227 | check_first_pass_plp "$PLP_FILE" | 227 | check_first_pass_plp "$PLP_FILE" |
228 | if [ $? -eq 1 ] | 228 | if [ $? -eq 1 ] |
229 | then | 229 | then |
230 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP_FILE" >> $ERRORFILE | 230 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP_FILE" >> $ERRORFILE |
231 | exit 1 | 231 | exit 1 |
232 | fi | 232 | fi |
233 | fi | 233 | fi |
234 | 234 | ||
235 | rm $OUTPUT_DIR_BASENAME/list.tmp | 235 | rm $OUTPUT_DIR_BASENAME/list.tmp |
236 | 236 | ||
237 | #------------------------------# | 237 | #------------------------------# |
238 | # S/NS + SPEAKERS SEGMENTATION # | 238 | # S/NS + SPEAKERS SEGMENTATION # |
239 | #------------------------------# | 239 | #------------------------------# |
240 | print_info "Launch speakers diarization" 1 | 240 | print_info "Launch speakers diarization" 1 |
241 | # Calcul seg file | 241 | # Calcul seg file |
242 | print_info "java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 2 | 242 | print_info "java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 2 |
243 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME | 243 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME |
244 | java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering | 244 | java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering |
245 | 245 | ||
246 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) | 246 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) |
247 | then | 247 | then |
248 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $SEG_FILE" >> $ERRORFILE | 248 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $SEG_FILE" >> $ERRORFILE |
249 | exit 1 | 249 | exit 1 |
250 | fi | 250 | fi |
251 | 251 | ||
252 | 252 | ||
253 | # Create LBL file | 253 | # Create LBL file |
254 | print_info "Extract LBL file from SEG file" 2 | 254 | print_info "Extract LBL file from SEG file" 2 |
255 | 255 | ||
256 | cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE | 256 | cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE |
257 | 257 | ||
258 | if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) | 258 | if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) |
259 | then | 259 | then |
260 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $LBL_FILE" >> $ERRORFILE | 260 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $LBL_FILE" >> $ERRORFILE |
261 | exit 1 | 261 | exit 1 |
262 | fi | 262 | fi |
263 | 263 | ||
264 | 264 | ||
265 | #----------------------------------------------------# | 265 | #----------------------------------------------------# |
266 | # Cut global PLP file depending to LBL segmentations # | 266 | # Cut global PLP file depending to LBL segmentations # |
267 | #----------------------------------------------------# | 267 | #----------------------------------------------------# |
268 | print_info "Cut PLP file depending to LBL segmentations" 1 | 268 | print_info "Cut PLP file depending to LBL segmentations" 1 |
269 | print_info "$BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 2 | 269 | print_info "$BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 2 |
270 | 270 | ||
271 | $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT | 271 | $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT |
272 | 272 | ||
273 | if [ $CHECK -eq 1 ] | 273 | if [ $CHECK -eq 1 ] |
274 | then | 274 | then |
275 | check_first_pass_plps_lbl $PLP_DIR $LBL_FILE | 275 | check_first_pass_plps_lbl $PLP_DIR $LBL_FILE |
276 | if [ $? -eq 1 ] | 276 | if [ $? -eq 1 ] |
277 | then | 277 | then |
278 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong .plp files number" >> $ERRORFILE | 278 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong .plp files number" >> $ERRORFILE |
279 | exit 1 | 279 | exit 1 |
280 | fi | 280 | fi |
281 | fi | 281 | fi |
282 | 282 | ||
283 | # change plp files names | 283 | # change plp files names |
284 | cd $PLP_DIR; | 284 | cd $PLP_DIR; |
285 | rename -f s/_/#/g *plp | 285 | rename -f s/_/#/g *plp |
286 | rename -f s/#/_/ *plp | 286 | rename -f s/#/_/ *plp |
287 | cd $OLDPWD | 287 | cd $OLDPWD |
288 | 288 | ||
289 | #---------------------------------------------# | 289 | #---------------------------------------------# |
290 | # PLP files list depending to acoustic models # | 290 | # PLP files list depending to acoustic models # |
291 | #---------------------------------------------# | 291 | #---------------------------------------------# |
292 | print_info "Create PLP list depending of the model" 1 | 292 | print_info "Create PLP list depending of the model" 1 |
293 | # Create a list of plp files | 293 | # Create a list of plp files |
294 | find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst | 294 | find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst |
295 | 295 | ||
296 | rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 | 296 | rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 |
297 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 297 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
298 | do | 298 | do |
299 | a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` | 299 | a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` |
300 | if [ -n "$a" ]; then | 300 | if [ -n "$a" ]; then |
301 | print_info "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 | 301 | print_info "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 |
302 | grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 302 | grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
303 | fi | 303 | fi |
304 | done | 304 | done |
305 | 305 | ||
306 | #-----------------------# | 306 | #-----------------------# |
307 | # First Pass (DECODING) # | 307 | # First Pass (DECODING) # |
308 | #-----------------------# | 308 | #-----------------------# |
309 | # | 309 | # |
310 | # For all AM do decoding | 310 | # For all AM do decoding |
311 | # if Check error -> iter on undone decoding (max 1 times) | 311 | # if Check error -> iter on undone decoding (max 1 times) |
312 | # | 312 | # |
313 | print_info "Launch decoding" 1 | 313 | print_info "Launch decoding" 1 |
314 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 314 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
315 | do | 315 | do |
316 | redo=1; # nb of try if not all segs is decoded | 316 | redo=1; # nb of try if not all segs is decoded |
317 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] | 317 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] |
318 | then | 318 | then |
319 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 319 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
320 | while [ $redo -gt 0 ]; do | 320 | while [ $redo -gt 0 ]; do |
321 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 321 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
322 | print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 2 | 322 | print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 2 |
323 | # Run speeral | 323 | # Run speeral |
324 | $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT | 324 | $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT |
325 | 325 | ||
326 | # Check if error | 326 | # Check if error |
327 | if [ $CHECK -eq 1 ] | 327 | if [ $CHECK -eq 1 ] |
328 | then | 328 | then |
329 | check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" | 329 | check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" |
330 | # if error | 330 | # if error |
331 | if [ $? -eq 1 ] | 331 | if [ $? -eq 1 ] |
332 | then | 332 | then |
333 | # rerun | 333 | # rerun |
334 | redo=$(($redo - 1)); | 334 | redo=$(($redo - 1)); |
335 | echo -e "WARN : Speeral output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" >> $ERRORFILE | 335 | echo -e "WARN : Speeral output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" >> $ERRORFILE |
336 | # new plp list | 336 | # new plp list |
337 | # list .seg done and compare to list of seg to do | 337 | # list .seg done and compare to list of seg to do |
338 | ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 338 | ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
339 | diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst | 339 | diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst |
340 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 340 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
341 | # log seg to do | 341 | # log seg to do |
342 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE | 342 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE |
343 | todo=${OUTPUT_DIR_BASENAME}/todo.lst | 343 | todo=${OUTPUT_DIR_BASENAME}/todo.lst |
344 | echo -e "WARN : Try $redo" >> $ERRORFILE | 344 | echo -e "WARN : Try $redo" >> $ERRORFILE |
345 | fi | 345 | fi |
346 | else | 346 | else |
347 | redo=-5; | 347 | redo=-5; |
348 | fi | 348 | fi |
349 | done | 349 | done |
350 | if [ $redo -eq 0 ] | 350 | if [ $redo -eq 0 ] |
351 | then | 351 | then |
352 | echo -e "ERROR : Speeral $todo" >> $ERRORFILE | 352 | echo -e "ERROR : Speeral $todo" >> $ERRORFILE |
353 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE | 353 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE |
354 | #exit 1 | 354 | #exit 1 |
355 | fi | 355 | fi |
356 | rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 | 356 | rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 |
357 | #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 357 | #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
358 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 358 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
359 | fi | 359 | fi |
360 | done | 360 | done |
361 | 361 | ||
362 | print_info "<= End P1 $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" 1 | 362 | print_info "<= End P1 $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" 1 |
363 | 363 | ||
364 | ## Check missing seg and log it | 364 | ## Check missing seg and log it |
365 | ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 365 | ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
366 | echo -e "$BASENAME P1 END\n[" >> $LOGFILE | 366 | echo -e "$BASENAME P1 END\n[" >> $LOGFILE |
367 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE | 367 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE |
368 | todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l) | 368 | todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l) |
369 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) | 369 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) |
370 | pourcentage=$((($notdone*100)/$todo)) | 370 | pourcentage=$((($notdone*100)/$todo)) |
371 | echo -e "] $pourcentage% $BASENAME" >> $LOGFILE | 371 | echo -e "] $pourcentage% $BASENAME" >> $LOGFILE |
372 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 372 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
373 | 373 | ||
374 | 374 | ||
375 | #---------------# | 375 | #---------------# |
376 | # Convert res # | 376 | # Convert res # |
377 | #---------------# | 377 | #---------------# |
378 | 378 | ||
379 | # .res => .ctm | 379 | # .res => .ctm |
380 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT | 380 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT |
381 | # .res => .trs | 381 | # .res => .trs |
382 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | 382 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
383 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT | 383 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT |
384 | rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null | 384 | rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null |
385 | # .res => .txt | 385 | # .res => .txt |
386 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT | 386 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT |
387 | 387 | ||
388 | # unlock directory | 388 | # unlock directory |
389 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" | 389 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" |
390 | 390 |
main_tools/OneScriptToRuleThemAll.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################### | 3 | ##################################### |
4 | # File: OneScriptToRuleThemAll.sh # | 4 | # File: OneScriptToRuleThemAll.sh # |
5 | # Brief : Script to launch OTMEDIA # | 5 | # Brief : Script to launch OTMEDIA # |
6 | # Version : 1.0 # | 6 | # Version : 1.0 # |
7 | # Date : 23/07/2013 # | 7 | # Date : 23/07/2013 # |
8 | # Author : Jean-François Rey # | 8 | # Author : Jean-François Rey # |
9 | ##################################### | 9 | ##################################### |
10 | 10 | ||
11 | echo -e "OneScriptToRuleThemAll :" | 11 | echo -e "OneScriptToRuleThemAll :" |
12 | LORD=" Three::rings | 12 | LORD=" Three::rings |
13 | for:::the::Elven-King | 13 | for:::the::Elven-King |
14 | under:the:sky,:Seven:for:the | 14 | under:the:sky,:Seven:for:the |
15 | Dwarf-Lords::in::their::halls:of | 15 | Dwarf-Lords::in::their::halls:of |
16 | stone,:Nine for:Mortal | 16 | stone,:Nine for:Mortal |
17 | :::Men::: ________ doomed::to | 17 | :::Men::: ________ doomed::to |
18 | die.:One _,-'...:... \`-. for:::the | 18 | die.:One _,-'...:... \`-. for:::the |
19 | ::Dark:: ,- .:::::::::::. \`. Lord::on | 19 | ::Dark:: ,- .:::::::::::. \`. Lord::on |
20 | his:dark ,' .:::::zzz:::::. \`. :throne: | 20 | his:dark ,' .:::::zzz:::::. \`. :throne: |
21 | In:::the/ ::::OTMEDIA:::: \ Land::of | 21 | In:::the/ ::::OTMEDIA:::: \ Land::of |
22 | :Mordor:\ ::::SCRIPTS:::: / :where:: | 22 | :Mordor:\ ::::SCRIPTS:::: / :where:: |
23 | ::the::: '. '::::YEEEP::::' ,' Shadows: | 23 | ::the::: '. '::::YEEEP::::' ,' Shadows: |
24 | lie.::One \`. \`\`:::::::::'' ,' Ring::to | 24 | lie.::One \`. \`\`:::::::::'' ,' Ring::to |
25 | ::rule:: \`-._\`\`\`:'''_,-' ::them:: | 25 | ::rule:: \`-._\`\`\`:'''_,-' ::them:: |
26 | all,::One \`-----' ring::to | 26 | all,::One \`-----' ring::to |
27 | ::find::: them,:One | 27 | ::find::: them,:One |
28 | Ring:::::to bring::them | 28 | Ring:::::to bring::them |
29 | all::and::in:the:darkness:bind | 29 | all::and::in:the:darkness:bind |
30 | them:In:the:Land:of:Mordor | 30 | them:In:the:Land:of:Mordor |
31 | where:::the::Shadows | 31 | where:::the::Shadows |
32 | :::lie.:::" | 32 | :::lie.:::" |
33 | 33 | ||
34 | 34 | ||
35 | # Check OTMEDIA_HOME env var | 35 | # Check OTMEDIA_HOME env var |
36 | if [ -z ${OTMEDIA_HOME} ] | 36 | if [ -z ${OTMEDIA_HOME} ] |
37 | then | 37 | then |
38 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 38 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
39 | export OTMEDIA_HOME=$OTMEDIA_HOME | 39 | export OTMEDIA_HOME=$OTMEDIA_HOME |
40 | fi | 40 | fi |
41 | 41 | ||
42 | # where is OneScriptToRuleThemAll.sh | 42 | # where is OneScriptToRuleThemAll.sh |
43 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 43 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
44 | 44 | ||
45 | RING="" | 45 | RING="" |
46 | 46 | ||
47 | #---------------# | 47 | #---------------# |
48 | # Parse Options # | 48 | # Parse Options # |
49 | #---------------# | 49 | #---------------# |
50 | while getopts ":hDv:cf:r" opt | 50 | while getopts ":hDv:cf:r" opt |
51 | do | 51 | do |
52 | case $opt in | 52 | case $opt in |
53 | h) | 53 | h) |
54 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" | 54 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" |
55 | echo -e "\t Options:" | 55 | echo -e "\t Options:" |
56 | echo -e "\t\t-h :\tprint this message" | 56 | echo -e "\t\t-h :\tprint this message" |
57 | echo -e "\t\t-D :\tDEBUG mode on" | 57 | echo -e "\t\t-D :\tDEBUG mode on" |
58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
59 | echo -e "\t\t-c :\tCheck process, stop if error detected" | 59 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
60 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 60 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
61 | echo -e "\t\t-r :\tforce rerun the wav file" | 61 | echo -e "\t\t-r :\tforce rerun the wav file" |
62 | exit 1 | 62 | exit 1 |
63 | ;; | 63 | ;; |
64 | D) | 64 | D) |
65 | RING=$RING" -D" | 65 | RING=$RING" -D" |
66 | ;; | 66 | ;; |
67 | v) | 67 | v) |
68 | RING=$RING" -v $OPTARG" | 68 | RING=$RING" -v $OPTARG" |
69 | ;; | 69 | ;; |
70 | c) | 70 | c) |
71 | RING=$RING" -c" | 71 | RING=$RING" -c" |
72 | ;; | 72 | ;; |
73 | f) | 73 | f) |
74 | RING=$RING" -f $OPTARG" | 74 | RING=$RING" -f $OPTARG" |
75 | ;; | 75 | ;; |
76 | r) | 76 | r) |
77 | RING=$RING" -r" | 77 | RING=$RING" -r" |
78 | ;; | 78 | ;; |
79 | :) | 79 | :) |
80 | echo "Option -$OPTARG requires an argument." >&2 | 80 | echo "Option -$OPTARG requires an argument." >&2 |
81 | exit 1 | 81 | exit 1 |
82 | ;; | 82 | ;; |
83 | \?) | 83 | \?) |
84 | echo "BAD USAGE : unknow opton -$OPTARG" | 84 | echo "BAD USAGE : unknow opton -$OPTARG" |
85 | exit 1 | 85 | exit 1 |
86 | ;; | 86 | ;; |
87 | esac | 87 | esac |
88 | done | 88 | done |
89 | 89 | ||
90 | # Check USAGE by arguments number | 90 | # Check USAGE by arguments number |
91 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 91 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
92 | then | 92 | then |
93 | echo "BAD USAGE : $0 [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" | 93 | echo "BAD USAGE : $0 [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" |
94 | echo "$0 -h for more info" | 94 | echo "$0 -h for more info" |
95 | exit 1 | 95 | exit 1 |
96 | fi | 96 | fi |
97 | 97 | ||
98 | shift $((OPTIND-1)) | 98 | shift $((OPTIND-1)) |
99 | # check audio file - First argument | 99 | # check audio file - First argument |
100 | if [ -e $1 ] && [ -s $1 ] | 100 | if [ -e $1 ] && [ -s $1 ] |
101 | then | 101 | then |
102 | echo -e "$LORD\n" | 102 | echo -e "$LORD\n" |
103 | REP_OUT=$2/${1%.*} | 103 | REP_OUT=$2/${1%.*} |
104 | ${MAIN_SCRIPT_PATH}/FirstPass.sh ${RING} $1 $2 | 104 | ${MAIN_SCRIPT_PATH}/FirstPass.sh ${RING} $1 $2 |
105 | ${MAIN_SCRIPT_PATH}/SecondPass.sh ${RING} ${REP_OUT} | 105 | ${MAIN_SCRIPT_PATH}/SecondPass.sh ${RING} ${REP_OUT} |
106 | ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p2" | 106 | ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p2" |
107 | ${MAIN_SCRIPT_PATH}/ExploitConfPass.sh ${RING} ${REP_OUT} | 107 | ${MAIN_SCRIPT_PATH}/ExploitConfidencePass.sh ${RING} ${REP_OUT} |
108 | ${MAIN_SCRIPT_PATH}/ThirdPass.sh ${RING} ${REP_OUT} | 108 | ${MAIN_SCRIPT_PATH}/ThirdPass.sh ${RING} ${REP_OUT} |
109 | ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p3" | 109 | ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p3" |
110 | ${MAIN_SCRIPT_PATH}/RecomposePass.sh ${RING} ${REP_OUT} | 110 | ${MAIN_SCRIPT_PATH}/RecomposePass.sh ${RING} ${REP_OUT} |
111 | ${MAIN_SCRIPT_PATH}/ScoringRes.sh ${RING} ${REP_OUT} | 111 | ${MAIN_SCRIPT_PATH}/ScoringRes.sh ${RING} ${REP_OUT} |
112 | else | 112 | else |
113 | echo "can't find $1 OR file is empty" | 113 | echo "can't find $1 OR file is empty" |
114 | exit 1 | 114 | exit 1 |
115 | fi | 115 | fi |
116 | 116 | ||
117 | 117 | ||
118 | 118 | ||
119 | 119 |
main_tools/SecondPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : SecondPass.sh # | 4 | # File : SecondPass.sh # |
5 | # Brief : Speaker adaptation + ASR second pass # | 5 | # Brief : Speaker adaptation + ASR second pass # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and Hugo Mauchrétien works) # | 8 | # and Hugo Mauchrétien works) # |
9 | # Version : 1.1 # | 9 | # Version : 1.1 # |
10 | # Date : 18/06/13 # | 10 | # Date : 18/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | # Check OTMEDIA_HOME env var | 13 | # Check OTMEDIA_HOME env var |
14 | if [ -z ${OTMEDIA_HOME} ] | 14 | if [ -z ${OTMEDIA_HOME} ] |
15 | then | 15 | then |
16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
17 | export OTMEDIA_HOME=$OTMEDIA_HOME | 17 | export OTMEDIA_HOME=$OTMEDIA_HOME |
18 | fi | 18 | fi |
19 | 19 | ||
20 | # where is SecondPass.sh | 20 | # where is SecondPass.sh |
21 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 21 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
22 | 22 | ||
23 | # Scripts Path | 23 | # Scripts Path |
24 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 24 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
25 | 25 | ||
26 | # Include scripts | 26 | # Include scripts |
27 | . $SCRIPT_PATH"/Tools.sh" | 27 | . $SCRIPT_PATH"/Tools.sh" |
28 | . $SCRIPT_PATH"/CheckSecondPass.sh" | 28 | . $SCRIPT_PATH"/CheckSecondPass.sh" |
29 | 29 | ||
30 | # where is SecondPass.cfg | 30 | # where is SecondPass.cfg |
31 | SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" | 31 | SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" |
32 | if [ -e $SECONDPASS_CONFIG_FILE ] | 32 | if [ -e $SECONDPASS_CONFIG_FILE ] |
33 | then | 33 | then |
34 | . $SECONDPASS_CONFIG_FILE | 34 | . $SECONDPASS_CONFIG_FILE |
35 | else | 35 | else |
36 | echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" >&2 | 36 | echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" >&2 |
37 | exit 1 | 37 | exit 1 |
38 | fi | 38 | fi |
39 | 39 | ||
40 | #---------------# | 40 | #---------------# |
41 | # Parse Options # | 41 | # Parse Options # |
42 | #---------------# | 42 | #---------------# |
43 | while getopts ":hDv:crf:" opt | 43 | while getopts ":hDv:crf:" opt |
44 | do | 44 | do |
45 | case $opt in | 45 | case $opt in |
46 | h) | 46 | h) |
47 | echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n" | 47 | echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n" |
48 | echo -e "\t Options:" | 48 | echo -e "\t Options:" |
49 | echo -e "\t\t-h :\tprint this message" | 49 | echo -e "\t\t-h :\tprint this message" |
50 | echo -e "\t\t-D :\tDEBUG mode on" | 50 | echo -e "\t\t-D :\tDEBUG mode on" |
51 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 51 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
52 | echo -e "\t\t-c :\t Check process, stop if error detected" | 52 | echo -e "\t\t-c :\t Check process, stop if error detected" |
53 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" | 53 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" |
54 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" | 54 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" |
55 | exit 1 | 55 | exit 1 |
56 | ;; | 56 | ;; |
57 | D) | 57 | D) |
58 | DEBUG=1 | 58 | DEBUG=1 |
59 | ;; | 59 | ;; |
60 | v) | 60 | v) |
61 | VERBOSE=$OPTARG | 61 | VERBOSE=$OPTARG |
62 | ;; | 62 | ;; |
63 | c) | 63 | c) |
64 | CHECK=1 | 64 | CHECK=1 |
65 | ;; | 65 | ;; |
66 | f) | 66 | f) |
67 | FORKS="--forks $OPTARG" | 67 | FORKS="--forks $OPTARG" |
68 | ;; | 68 | ;; |
69 | r) | 69 | r) |
70 | RERUN=1 | 70 | RERUN=1 |
71 | ;; | 71 | ;; |
72 | :) | 72 | :) |
73 | echo "Option -$OPTARG requires an argument." >&2 | 73 | echo "Option -$OPTARG requires an argument." >&2 |
74 | exit 1 | 74 | exit 1 |
75 | ;; | 75 | ;; |
76 | \?) | 76 | \?) |
77 | echo "BAD USAGE : unknow opton -$OPTARG" | 77 | echo "BAD USAGE : unknow opton -$OPTARG" |
78 | exit 1 | 78 | exit 1 |
79 | ;; | 79 | ;; |
80 | esac | 80 | esac |
81 | done | 81 | done |
82 | 82 | ||
83 | # mode debug enable | 83 | # mode debug enable |
84 | if [ $DEBUG -eq 1 ] | 84 | if [ $DEBUG -eq 1 ] |
85 | then | 85 | then |
86 | set -x | 86 | set -x |
87 | echo -e "## Mode DEBUG ON ##" | 87 | echo -e "## Mode DEBUG ON ##" |
88 | REDIRECTION_OUTPUT="" | ||
89 | else | ||
90 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
88 | fi | 91 | fi |
89 | 92 | ||
90 | # mode verbose enable | 93 | # mode verbose enable |
91 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | 94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
92 | 95 | ||
93 | # Check USAGE by arguments number | 96 | # Check USAGE by arguments number |
94 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 97 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
95 | then | 98 | then |
96 | echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" | 99 | echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" |
97 | echo "$0 -h for more info" | 100 | echo "$0 -h for more info" |
98 | exit 1 | 101 | exit 1 |
99 | fi | 102 | fi |
100 | 103 | ||
101 | shift $((OPTIND-1)) | 104 | shift $((OPTIND-1)) |
102 | # check FirstPass directory - First argument | 105 | # check FirstPass directory - First argument |
103 | if [ -e $1 ] && [ -d $1 ] | 106 | if [ -e $1 ] && [ -d $1 ] |
104 | then | 107 | then |
105 | FIRSTPASS_DIR=$(readlink -e $1) | 108 | FIRSTPASS_DIR=$(readlink -e $1) |
106 | else | 109 | else |
107 | print_error "can't find $1 directory" | 110 | print_error "can't find $1 directory" |
108 | exit 1 | 111 | exit 1 |
109 | fi | 112 | fi |
110 | 113 | ||
111 | #-------------# | 114 | #-------------# |
112 | # GLOBAL VARS # | 115 | # GLOBAL VARS # |
113 | #-------------# | 116 | #-------------# |
114 | FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" | 117 | FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" |
115 | if [ -e $FIRSTPASS_CONFIG_FILE ] | 118 | if [ -e $FIRSTPASS_CONFIG_FILE ] |
116 | then | 119 | then |
117 | WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") | 120 | WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") |
118 | BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") | 121 | BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") |
119 | OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") | 122 | OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") |
120 | OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR | 123 | OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR |
121 | PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") | 124 | PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") |
122 | PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") | 125 | PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") |
123 | SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") | 126 | SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") |
124 | LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") | 127 | LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") |
125 | RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") | 128 | RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") |
126 | else | 129 | else |
127 | print_error "can't find $FIRSTPASS_CONFIG_FILE file" | 130 | print_error "can't find $FIRSTPASS_CONFIG_FILE file" |
128 | exit 1 | 131 | exit 1 |
129 | fi | 132 | fi |
130 | LST=$OUTPUT_DIR_BASENAME"/lists" | 133 | LST=$OUTPUT_DIR_BASENAME"/lists" |
131 | HMM=$OUTPUT_DIR_BASENAME"/hmm/" | 134 | HMM=$OUTPUT_DIR_BASENAME"/hmm/" |
132 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" | 135 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" |
133 | LOGFILE="$OUTPUT_DIR/info_p2.log" | 136 | LOGFILE="$OUTPUT_DIR/info_p2.log" |
134 | ERRORFILE="$OUTPUT_DIR/error_p2.log" | 137 | ERRORFILE="$OUTPUT_DIR/error_p2.log" |
135 | 138 | ||
136 | #------------------# | 139 | #------------------# |
137 | # Create WORKSPACE # | 140 | # Create WORKSPACE # |
138 | #------------------# | 141 | #------------------# |
139 | 142 | ||
140 | # Lock directory | 143 | # Lock directory |
141 | if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi | 144 | if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi |
142 | rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 | 145 | rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 |
143 | touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 | 146 | touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 |
144 | 147 | ||
145 | rm -r $LST > /dev/null 2>&1 | 148 | rm -r $LST > /dev/null 2>&1 |
146 | mkdir -p $LST | 149 | mkdir -p $LST |
147 | print_info "Make directory $LST" 1 | 150 | print_info "Make directory $LST" 1 |
148 | if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi | 151 | if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi |
149 | mkdir -p $HMM | 152 | mkdir -p $HMM |
150 | print_info "Make directory $HMM" 1 | 153 | print_info "Make directory $HMM" 1 |
151 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi | 154 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi |
152 | mkdir -p $RES_DIR | 155 | mkdir -p $RES_DIR > /dev/null 2>&1 |
153 | print_info "Make directory $RES_DIR" 1 | 156 | print_info "Make directory $RES_DIR" 1 |
154 | 157 | ||
155 | #-------------------# | 158 | #-------------------# |
156 | # Check Pass # | 159 | # Check Pass # |
157 | #-------------------# | 160 | #-------------------# |
158 | print_info "Check Pass 2 directory" 1 | 161 | print_info "Check Pass 2 directory" 1 |
159 | for treil in $(ls $RES_DIR/ | grep treil) | 162 | for treil in $(ls $RES_DIR/ | grep treil) |
160 | do | 163 | do |
161 | if [ ! -s $RES_DIR/$treil ] | 164 | if [ ! -s $RES_DIR/$treil ] |
162 | then | 165 | then |
163 | bn = $(basename $treil ".treil") | 166 | bn = $(basename $treil ".treil") |
164 | rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null | 167 | rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null |
165 | print_info "$RES_DIR/$bn.* files deleted.." 2 | 168 | print_info "$RES_DIR/$bn.* files deleted.." 2 |
166 | fi | 169 | fi |
167 | done | 170 | done |
168 | 171 | ||
169 | # Check if more then 89% of treil are done | 172 | # Check if more then 89% of treil are done |
170 | nbres_p1=$(ls $RES_DIR_P1/*.res | wc -l) | 173 | nbres_p1=$(ls $RES_DIR_P1/*.res | wc -l) |
171 | nbtreil_p2=$(ls $RES_DIR/*.treil | wc -l) | 174 | nbtreil_p2=$(ls $RES_DIR/*.treil | wc -l) |
172 | if [ $nbres_p1 -gt 0 ] | 175 | if [ $nbres_p1 -gt 0 ] |
173 | then | 176 | then |
174 | pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) | 177 | pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) |
175 | if [ $pourcentage -gt 89 ] | 178 | if [ $pourcentage -gt 89 ] |
176 | then | 179 | then |
177 | echo "Lattice already done, skipping $BASENAME" | 180 | echo "Lattice already done, skipping $BASENAME" |
178 | exit 0 | 181 | exit 0 |
179 | fi | 182 | fi |
180 | fi | 183 | fi |
181 | 184 | ||
182 | #--------------------# | 185 | #--------------------# |
183 | # Save configuration # | 186 | # Save configuration # |
184 | #--------------------# | 187 | #--------------------# |
185 | cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg | 188 | cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg |
186 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 189 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
187 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 190 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
188 | echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 191 | echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
189 | echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 192 | echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
190 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 193 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
191 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 194 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
192 | echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 195 | echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
193 | echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 196 | echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
194 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 197 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
195 | print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 | 198 | print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 |
196 | 199 | ||
197 | 200 | ||
198 | #--------------------------------------------------# | 201 | #--------------------------------------------------# |
199 | # Speaker Adaptation (AM) + Second pass (DECODING) # | 202 | # Speaker Adaptation (AM) + Second pass (DECODING) # |
200 | #--------------------------------------------------# | 203 | #--------------------------------------------------# |
201 | print_info "Launch Second Pass" 2 | 204 | print_info "Launch Second Pass" 2 |
202 | 205 | ||
203 | # for all speaker | 206 | # for all speaker |
204 | for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) | 207 | for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) |
205 | do | 208 | do |
206 | ## get seg file from P1 containing the speaker | 209 | ## get seg file from P1 containing the speaker |
207 | find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst | 210 | find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst |
208 | print_info "file for $speaker in $LST/$speaker.lst" 3 | 211 | print_info "file for $speaker in $LST/$speaker.lst" 3 |
209 | if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi | 212 | if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi |
210 | 213 | ||
211 | 214 | ||
212 | # for all AM | 215 | # for all AM |
213 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 216 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
214 | do | 217 | do |
215 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then | 218 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then |
216 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") | 219 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") |
217 | ## if is the good AM for the speaker | 220 | ## if is the good AM for the speaker |
218 | if [ -n "$type" ] | 221 | if [ -n "$type" ] |
219 | then | 222 | then |
220 | ## HMM adaptation | 223 | ## HMM adaptation |
221 | if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) | 224 | if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) |
222 | then | 225 | then |
223 | print_info "$SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 | 226 | print_info "$SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 |
224 | $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ | 227 | $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ $REDIRECTION_OUTPUT |
225 | fi | 228 | fi |
226 | 229 | ||
227 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) | 230 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) |
228 | then | 231 | then |
229 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.hmm" >> $ERRORFILE | 232 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.hmm" >> $ERRORFILE |
230 | #exit 1 | 233 | #exit 1 |
231 | fi | 234 | fi |
232 | 235 | ||
233 | ## cp map files | 236 | ## cp map files |
234 | cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map | 237 | cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map |
235 | 238 | ||
236 | ## class clustering | 239 | ## class clustering |
237 | if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) | 240 | if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) |
238 | then | 241 | then |
239 | print_info "$SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 | 242 | print_info "$SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 |
240 | $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls | 243 | $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls $REDIRECTION_OUTPUT |
241 | fi | 244 | fi |
242 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) | 245 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) |
243 | then | 246 | then |
244 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.cls" >> $ERRORFILE | 247 | echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.cls" >> $ERRORFILE |
245 | #exit 1 | 248 | #exit 1 |
246 | fi | 249 | fi |
247 | 250 | ||
248 | ## Speeral decoding | 251 | ## Speeral decoding |
249 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] | 252 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] |
250 | then | 253 | then |
251 | print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 | 254 | print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 |
252 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock | 255 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT |
253 | else | 256 | else |
254 | print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 | 257 | print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 |
255 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock | 258 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT |
256 | fi | 259 | fi |
257 | 260 | ||
258 | if [ $CHECK -eq 1 ] | 261 | if [ $CHECK -eq 1 ] |
259 | then | 262 | then |
260 | check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" | 263 | check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" |
261 | if [ $? -eq 1 ] | 264 | if [ $? -eq 1 ] |
262 | then | 265 | then |
263 | echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE | 266 | echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE |
264 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 267 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
265 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE | 268 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE |
266 | echo -e "] " >> $ERRORFILE | 269 | echo -e "] " >> $ERRORFILE |
267 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 270 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
268 | #exit 1 | 271 | #exit 1 |
269 | fi | 272 | fi |
270 | fi | 273 | fi |
271 | break | 274 | break |
272 | fi | 275 | fi |
273 | fi | 276 | fi |
274 | done | 277 | done |
275 | #rm "$HMM/$speaker.*" > /dev/null 2>&1 | 278 | #rm "$HMM/$speaker.*" > /dev/null 2>&1 |
276 | #rm "$LST/$speaker.lst" > /dev/null 2>&1 | 279 | #rm "$LST/$speaker.lst" > /dev/null 2>&1 |
277 | done | 280 | done |
278 | 281 | ||
279 | print_info "<= End $BASENAME P2 | $(date +'%d/%m/%y %H:%M:%S')" 1 | 282 | print_info "<= End $BASENAME P2 | $(date +'%d/%m/%y %H:%M:%S')" 1 |
280 | 283 | ||
281 | ## Check missing seg and log it | 284 | ## Check missing seg and log it |
282 | ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 285 | ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
283 | echo -e "$BASENAME P2 END\n[" >> $LOGFILE | 286 | echo -e "$BASENAME P2 END\n[" >> $LOGFILE |
284 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE | 287 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE |
285 | echo -e "] $BASENAME" >> $LOGFILE | 288 | echo -e "] $BASENAME" >> $LOGFILE |
286 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 289 | rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 |
287 | 290 | ||
288 | #---------------# | 291 | #---------------# |
289 | # Convert res # | 292 | # Convert res # |
290 | #---------------# | 293 | #---------------# |
291 | 294 | ||
292 | # .res => .ctm | 295 | # .res => .ctm |
293 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm | 296 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm $REDIRECTION_OUTPUT |
294 | # .res => .trs | 297 | # .res => .trs |
295 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 298 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
296 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 299 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT |
297 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 300 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 |
298 | # .res => .txt | 301 | # .res => .txt |
299 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt | 302 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt $REDIRECTION_OUTPUT |
300 | 303 | ||
301 | 304 | ||
302 | # unlock directory | 305 | # unlock directory |
303 | mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" | 306 | mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" |
304 | 307 | ||
305 | 308 |
main_tools/ThirdPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ThirdPass.sh # | 4 | # File : ThirdPass.sh # |
5 | # Brief : ASR third pass using trigg files # | 5 | # Brief : ASR third pass using trigg files # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # Version : 1.0 # | 7 | # Version : 1.0 # |
8 | # Date : 18/07/13 # | 8 | # Date : 18/07/13 # |
9 | ##################################################### | 9 | ##################################################### |
10 | 10 | ||
11 | # Check OTMEDIA_HOME env var | 11 | # Check OTMEDIA_HOME env var |
12 | if [ -z ${OTMEDIA_HOME} ] | 12 | if [ -z ${OTMEDIA_HOME} ] |
13 | then | 13 | then |
14 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 14 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
15 | export OTMEDIA_HOME=$OTMEDIA_HOME | 15 | export OTMEDIA_HOME=$OTMEDIA_HOME |
16 | fi | 16 | fi |
17 | 17 | ||
18 | # where is SecondPass.sh | 18 | # where is SecondPass.sh |
19 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 19 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
20 | 20 | ||
21 | # Scripts Path | 21 | # Scripts Path |
22 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 22 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
23 | 23 | ||
24 | # Include scripts | 24 | # Include scripts |
25 | . $SCRIPT_PATH"/Tools.sh" | 25 | . $SCRIPT_PATH"/Tools.sh" |
26 | . $SCRIPT_PATH"/CheckThirdPass.sh" | 26 | . $SCRIPT_PATH"/CheckThirdPass.sh" |
27 | 27 | ||
28 | # where is ThirdPass.cfg | 28 | # where is ThirdPass.cfg |
29 | THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg" | 29 | THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg" |
30 | if [ -e $THIRDPASS_CONFIG_FILE ] | 30 | if [ -e $THIRDPASS_CONFIG_FILE ] |
31 | then | 31 | then |
32 | . $THIRDPASS_CONFIG_FILE | 32 | . $THIRDPASS_CONFIG_FILE |
33 | else | 33 | else |
34 | echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2 | 34 | echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2 |
35 | exit 1 | 35 | exit 1 |
36 | fi | 36 | fi |
37 | 37 | ||
38 | #---------------# | 38 | #---------------# |
39 | # Parse Options # | 39 | # Parse Options # |
40 | #---------------# | 40 | #---------------# |
41 | while getopts ":hDv:crf:" opt | 41 | while getopts ":hDv:crf:" opt |
42 | do | 42 | do |
43 | case $opt in | 43 | case $opt in |
44 | h) | 44 | h) |
45 | echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n" | 45 | echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n" |
46 | echo -e "\t Options:" | 46 | echo -e "\t Options:" |
47 | echo -e "\t\t-h :\tprint this message" | 47 | echo -e "\t\t-h :\tprint this message" |
48 | echo -e "\t\t-D :\tDEBUG mode on" | 48 | echo -e "\t\t-D :\tDEBUG mode on" |
49 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 49 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
50 | echo -e "\t\t-c :\t Check process, stop if error detected" | 50 | echo -e "\t\t-c :\t Check process, stop if error detected" |
51 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" | 51 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" |
52 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" | 52 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" |
53 | exit 1 | 53 | exit 1 |
54 | ;; | 54 | ;; |
55 | D) | 55 | D) |
56 | DEBUG=1 | 56 | DEBUG=1 |
57 | ;; | 57 | ;; |
58 | v) | 58 | v) |
59 | VERBOSE=$OPTARG | 59 | VERBOSE=$OPTARG |
60 | ;; | 60 | ;; |
61 | c) | 61 | c) |
62 | CHECK=1 | 62 | CHECK=1 |
63 | ;; | 63 | ;; |
64 | f) | 64 | f) |
65 | FORKS="--forks $OPTARG" | 65 | FORKS="--forks $OPTARG" |
66 | ;; | 66 | ;; |
67 | r) | 67 | r) |
68 | RERUN=1 | 68 | RERUN=1 |
69 | ;; | 69 | ;; |
70 | :) | 70 | :) |
71 | echo "Option -$OPTARG requires an argument." >&2 | 71 | echo "Option -$OPTARG requires an argument." >&2 |
72 | exit 1 | 72 | exit 1 |
73 | ;; | 73 | ;; |
74 | \?) | 74 | \?) |
75 | echo "BAD USAGE : unknow opton -$OPTARG" | 75 | echo "BAD USAGE : unknow opton -$OPTARG" |
76 | #exit 1 | 76 | #exit 1 |
77 | ;; | 77 | ;; |
78 | esac | 78 | esac |
79 | done | 79 | done |
80 | 80 | ||
81 | # mode debug enable | 81 | # mode debug enable |
82 | if [ $DEBUG -eq 1 ] | 82 | if [ $DEBUG -eq 1 ] |
83 | then | 83 | then |
84 | set -x | 84 | set -x |
85 | echo -e "## Mode DEBUG ON ##" | 85 | echo -e "## Mode DEBUG ON ##" |
86 | REDIRECTION_OUTPUT="" | ||
87 | else | ||
88 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
86 | fi | 89 | fi |
87 | 90 | ||
88 | # mode verbose enable | 91 | # mode verbose enable |
89 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | 92 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi |
90 | 93 | ||
91 | # Check USAGE by arguments number | 94 | # Check USAGE by arguments number |
92 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 95 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
93 | then | 96 | then |
94 | echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>" | 97 | echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>" |
95 | echo "$0 -h for more info" | 98 | echo "$0 -h for more info" |
96 | exit 1 | 99 | exit 1 |
97 | fi | 100 | fi |
98 | 101 | ||
99 | shift $((OPTIND-1)) | 102 | shift $((OPTIND-1)) |
100 | # check Pass directory - First argument | 103 | # check Pass directory - First argument |
101 | if [ -e $1 ] && [ -d $1 ] | 104 | if [ -e $1 ] && [ -d $1 ] |
102 | then | 105 | then |
103 | PASS_DIR=$(readlink -e $1) | 106 | PASS_DIR=$(readlink -e $1) |
104 | else | 107 | else |
105 | print_error "can't find $1 directory" | 108 | print_error "can't find $1 directory" |
106 | exit 1 | 109 | exit 1 |
107 | fi | 110 | fi |
108 | 111 | ||
109 | #-------------# | 112 | #-------------# |
110 | # GLOBAL VARS # | 113 | # GLOBAL VARS # |
111 | #-------------# | 114 | #-------------# |
112 | EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg" | 115 | EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg" |
113 | if [ -e $EXPLOITCONFPASS_CONFIG_FILE ] | 116 | if [ -e $EXPLOITCONFPASS_CONFIG_FILE ] |
114 | then | 117 | then |
115 | TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=") | 118 | TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=") |
116 | LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=") | 119 | LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=") |
117 | LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=") | 120 | LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=") |
118 | LST="" | 121 | LST="" |
119 | PLP_DIR_P1="" | 122 | PLP_DIR_P1="" |
120 | HMM="" | 123 | HMM="" |
121 | else | 124 | else |
122 | print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file" | 125 | print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file" |
123 | #exit 1 | 126 | #exit 1 |
124 | TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/ | 127 | TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/ |
125 | LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext | 128 | LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext |
126 | LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin | 129 | LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin |
127 | fi | 130 | fi |
128 | SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg" | 131 | SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg" |
129 | if [ -e $SECONDPASS_CONFIG_FILE ] | 132 | if [ -e $SECONDPASS_CONFIG_FILE ] |
130 | then | 133 | then |
131 | LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=") | 134 | LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=") |
132 | HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=") | 135 | HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=") |
133 | PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=") | 136 | PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=") |
134 | else | 137 | else |
135 | print_error "can't find $SECONDPASS_CONFIG_FILE file" | 138 | print_error "can't find $SECONDPASS_CONFIG_FILE file" |
136 | #exit 1 | 139 | #exit 1 |
137 | LST=$PASS_DIR/lists | 140 | LST=$PASS_DIR/lists |
138 | HMM=$PASS_DIR/hmm | 141 | HMM=$PASS_DIR/hmm |
139 | PLP_DIR_P1=$PASS_DIR/PLP | 142 | PLP_DIR_P1=$PASS_DIR/PLP |
140 | fi | 143 | fi |
141 | 144 | ||
142 | BASENAME=$(basename $PASS_DIR) | 145 | BASENAME=$(basename $PASS_DIR) |
143 | OUTPUT_DIR_BASENAME=$PASS_DIR | 146 | OUTPUT_DIR_BASENAME=$PASS_DIR |
144 | RES_DIR="$PASS_DIR/res_p3" | 147 | RES_DIR="$PASS_DIR/res_p3" |
145 | LOGFILE=$(dirname $PASS_DIR)"/info_p3.log" | 148 | LOGFILE=$(dirname $PASS_DIR)"/info_p3.log" |
146 | ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log" | 149 | ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log" |
147 | 150 | ||
148 | #------------------# | 151 | #------------------# |
149 | # Create WORKSPACE # | 152 | # Create WORKSPACE # |
150 | #------------------# | 153 | #------------------# |
151 | 154 | ||
152 | # Lock directory | 155 | # Lock directory |
153 | if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi | 156 | if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi |
154 | rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1 | 157 | rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1 |
155 | touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1 | 158 | touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1 |
156 | 159 | ||
157 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi | 160 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi |
158 | mkdir -p $RES_DIR | 161 | mkdir -p $RES_DIR |
159 | print_info "Make directory $RES_DIR" 1 | 162 | print_info "Make directory $RES_DIR" 1 |
160 | 163 | ||
161 | #--------------------# | 164 | #--------------------# |
162 | # Save configuration # | 165 | # Save configuration # |
163 | #--------------------# | 166 | #--------------------# |
164 | cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg | 167 | cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg |
165 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg | 168 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg |
166 | print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 | 169 | print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 |
167 | 170 | ||
168 | #--------------------------------------------------# | 171 | #--------------------------------------------------# |
169 | # Third Pass using trigger file (DECODING) # | 172 | # Third Pass using trigger file (DECODING) # |
170 | #--------------------------------------------------# | 173 | #--------------------------------------------------# |
171 | print_info "Launch Third Pass" 1 | 174 | print_info "Launch Third Pass" 1 |
172 | 175 | ||
173 | ## Generate speeral config file adding trigger rep | 176 | ## Generate speeral config file adding trigger rep |
174 | cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \ | 177 | cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \ |
175 | | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \ | 178 | | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \ |
176 | | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml | 179 | | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml |
177 | SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml | 180 | SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml |
178 | 181 | ||
179 | # for all speaker | 182 | # for all speaker |
180 | for lspeaker in $(ls $LST/*.lst) | 183 | for lspeaker in $(ls $LST/*.lst) |
181 | do | 184 | do |
182 | speaker=$(basename $lspeaker ".lst") | 185 | speaker=$(basename $lspeaker ".lst") |
183 | # for all AM | 186 | # for all AM |
184 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 187 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
185 | do | 188 | do |
186 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then | 189 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then |
187 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") | 190 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") |
188 | ## if is the good AM for the speaker | 191 | ## if is the good AM for the speaker |
189 | if [ -n "$type" ] | 192 | if [ -n "$type" ] |
190 | then | 193 | then |
191 | ## Speeral decoding | 194 | ## Speeral decoding |
192 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] | 195 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] |
193 | then | 196 | then |
194 | print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 | 197 | print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 |
195 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock | 198 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT |
196 | else | 199 | else |
197 | print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 | 200 | print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 |
198 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock | 201 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT |
199 | fi | 202 | fi |
200 | 203 | ||
201 | if [ $CHECK -eq 1 ] | 204 | if [ $CHECK -eq 1 ] |
202 | then | 205 | then |
203 | check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" | 206 | check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" |
204 | if [ $? -eq 1 ] | 207 | if [ $? -eq 1 ] |
205 | then | 208 | then |
206 | echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE | 209 | echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE |
207 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 210 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
208 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE | 211 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE |
209 | echo -e "] " >> $ERRORFILE | 212 | echo -e "] " >> $ERRORFILE |
210 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 213 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
211 | #exit 1 | 214 | #exit 1 |
212 | fi | 215 | fi |
213 | fi | 216 | fi |
214 | break | 217 | break |
215 | fi | 218 | fi |
216 | fi | 219 | fi |
217 | done | 220 | done |
218 | done | 221 | done |
219 | 222 | ||
220 | ## Check missing seg and log it | 223 | ## Check missing seg and log it |
221 | ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 224 | ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
222 | echo -e "$BASENAME P3 END\n[" >> $LOGFILE | 225 | echo -e "$BASENAME P3 END\n[" >> $LOGFILE |
223 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE | 226 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE |
224 | echo -e "] $BASENAME" >> $LOGFILE | 227 | echo -e "] $BASENAME" >> $LOGFILE |
225 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 228 | rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 |
226 | 229 | ||
227 | #---------------# | 230 | #---------------# |
228 | # Convert res # | 231 | # Convert res # |
229 | #---------------# | 232 | #---------------# |
230 | 233 | ||
231 | # .res => .ctm | 234 | # .res => .ctm |
232 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm | 235 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm $REDIRECTION_OUTPUT |
233 | # .res => .trs | 236 | # .res => .trs |
234 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 237 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
235 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 238 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT |
236 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 239 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 |
237 | # .res => .txt | 240 | # .res => .txt |
238 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt | 241 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt $REDIRECTION_OUTPUT |
239 | 242 | ||
240 | 243 | ||
241 | print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1 | 244 | print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1 |
242 | 245 | ||
243 | # unlock directory | 246 | # unlock directory |
244 | mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" | 247 | mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" |
245 | 248 | ||
246 | 249 |
tools/scripts/ExtractAudioFromTV.sh
File was created | 1 | #!/bin/bash | |
2 | |||
3 | # File : ExtractAudioFromTV.sh | ||
4 | # Brief : extract audio file and subtitle from TV corpus | ||
5 | # version 1.0 | ||
6 | # | ||
7 | |||
8 | ### ExtractAudioFromCorpus.sh INA-TV2 MYTV | ||
9 | |||
10 | if [ $(($#-($OPTIND-1))) -ne 2 ] | ||
11 | then | ||
12 | echo "BAD USAGE : $0 <INPUT_DIRECTORY> <OUTPUT_DIRECTORY>" | ||
13 | exit 1 | ||
14 | fi | ||
15 | |||
16 | if [ -e $1 ] | ||
17 | then | ||
18 | IN=$(readlink -e $1) | ||
19 | else | ||
20 | echo "ERROR : Can't read input $1" | ||
21 | exit 1 | ||
22 | fi | ||
23 | |||
24 | if [ -e $2 ] | ||
25 | then | ||
26 | OUT=$(readlink -e $2) | ||
27 | else | ||
28 | echo "ERROR : Can't read OUTPUT $2" | ||
29 | exit 1 | ||
30 | fi | ||
31 | |||
32 | |||
33 | for f in `ls $IN` | ||
34 | do | ||
35 | channel=`echo $f | sed -re 's/NAS_//'` | ||
36 | mkdir $OUT/$channel | ||
37 | cd $f | ||
38 | for d in `ls $f` | ||
39 | do | ||
40 | for file in `ls ${f}/${d} | grep MP4` | ||
41 | do | ||
42 | avconv -i ${IN}/${f}/${d}/${file} -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 ${OUT}/${channel}/${d}_$(basename ${file} .MP4).wav | ||
43 | if [ -e ${IN}/${f}/${d}/$(basename ${file} .MP4).SRT ] | ||
44 | then | ||
45 | cp ${IN}/${f}/${d}/$(basename ${file} .MP4).SRT ${OUT}/${channel}/${d}_$(basename ${file} .MP4).SRT | ||
46 | fi | ||
47 | done | ||
48 | done | ||
49 | done | ||
50 | |||
51 |