Commit 561670accdc5aa799e7d2ba3ce3fa2f825a656fa
1 parent
7e99f07935
Exists in
master
remove output redirections
Showing 5 changed files with 66 additions and 73 deletions Inline Diff
main_tools/ConfPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ConfPass.sh # | 4 | # File : ConfPass.sh # |
5 | # Brief : Process the ASR Confidence pass # | 5 | # Brief : Process the ASR Confidence pass # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and hugo Mauchrétien works) # | 8 | # and hugo Mauchrétien works) # |
9 | # Version : 1.0 # | 9 | # Version : 1.0 # |
10 | # Date : 17/06/13 # | 10 | # Date : 17/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | echo "### ConfPass.sh ###" | 13 | echo "### ConfPass.sh ###" |
14 | 14 | ||
15 | #Check OTMEDIA_HOME env var | 15 | #Check OTMEDIA_HOME env var |
16 | if [ -z ${OTMEDIA_HOME} ] | 16 | if [ -z ${OTMEDIA_HOME} ] |
17 | then | 17 | then |
18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
19 | export OTMEDIA_HOME=$OTMEDIA_HOME | 19 | export OTMEDIA_HOME=$OTMEDIA_HOME |
20 | fi | 20 | fi |
21 | 21 | ||
22 | 22 | ||
23 | # where is ConfPath.sh | 23 | # where is ConfPath.sh |
24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
25 | 25 | ||
26 | # Scripts Path | 26 | # Scripts Path |
27 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 27 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
28 | 28 | ||
29 | # Include scripts | 29 | # Include scripts |
30 | . $SCRIPT_PATH"/Tools.sh" | 30 | . $SCRIPT_PATH"/Tools.sh" |
31 | . $SCRIPT_PATH"/CheckConfPass.sh" | 31 | . $SCRIPT_PATH"/CheckConfPass.sh" |
32 | 32 | ||
33 | # where is FirstPass.cfg | 33 | # where is FirstPass.cfg |
34 | CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" | 34 | CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" |
35 | if [ -e $CONFPASS_CONFIG_FILE ] | 35 | if [ -e $CONFPASS_CONFIG_FILE ] |
36 | then | 36 | then |
37 | . $CONFPASS_CONFIG_FILE | 37 | . $CONFPASS_CONFIG_FILE |
38 | else | 38 | else |
39 | echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" > /dev/stderr | 39 | echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" > /dev/stderr |
40 | exit 1 | 40 | exit 1 |
41 | fi | 41 | fi |
42 | 42 | ||
43 | #---------------# | 43 | #---------------# |
44 | # Parse Options # | 44 | # Parse Options # |
45 | #---------------# | 45 | #---------------# |
46 | while getopts ":hDv:cr" opt | 46 | while getopts ":hDv:cr" opt |
47 | do | 47 | do |
48 | case $opt in | 48 | case $opt in |
49 | h) | 49 | h) |
50 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" | 50 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" |
51 | echo -e "\t Options:" | 51 | echo -e "\t Options:" |
52 | echo -e "\t\t-h :\tprint this message" | 52 | echo -e "\t\t-h :\tprint this message" |
53 | echo -e "\t\t-D :\tDEBUG mode on" | 53 | echo -e "\t\t-D :\tDEBUG mode on" |
54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
55 | echo -e "\t\t-c :\t Check process, stop if error detected" | 55 | echo -e "\t\t-c :\t Check process, stop if error detected" |
56 | echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" | 56 | echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" |
57 | exit 1 | 57 | exit 1 |
58 | ;; | 58 | ;; |
59 | D) | 59 | D) |
60 | DEBUG=1 | 60 | DEBUG=1 |
61 | ;; | 61 | ;; |
62 | v) | 62 | v) |
63 | VERBOSE=$OPTARG | 63 | VERBOSE=$OPTARG |
64 | ;; | 64 | ;; |
65 | c) | 65 | c) |
66 | CHECK=1 | 66 | CHECK=1 |
67 | ;; | 67 | ;; |
68 | r) | 68 | r) |
69 | RERUN=1 | 69 | RERUN=1 |
70 | ;; | 70 | ;; |
71 | :) | 71 | :) |
72 | echo "Option -$OPTARG requires an argument." > /dev/stderr | 72 | echo "Option -$OPTARG requires an argument." > /dev/stderr |
73 | exit 1 | 73 | exit 1 |
74 | ;; | 74 | ;; |
75 | \?) | 75 | \?) |
76 | echo "BAD USAGE : unknow opton -$OPTARG" | 76 | echo "BAD USAGE : unknow opton -$OPTARG" |
77 | #exit 1 | 77 | #exit 1 |
78 | ;; | 78 | ;; |
79 | esac | 79 | esac |
80 | done | 80 | done |
81 | 81 | ||
82 | # mode debug enable | 82 | # mode debug enable |
83 | if [ $DEBUG -eq 1 ] | 83 | if [ $DEBUG -eq 1 ] |
84 | then | 84 | then |
85 | set -x | 85 | set -x |
86 | echo -e "## Mode DEBUG ON ##" | 86 | echo -e "## Mode DEBUG ON ##" |
87 | REDIRECTION_OUTPUT="" | ||
88 | else | ||
89 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
90 | fi | 87 | fi |
91 | 88 | ||
92 | # mode verbose enable | 89 | # mode verbose enable |
93 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 90 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
94 | 91 | ||
95 | # Check USAGE by arguments number | 92 | # Check USAGE by arguments number |
96 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 93 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
97 | then | 94 | then |
98 | echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" | 95 | echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" |
99 | echo "$0 -h for more info" | 96 | echo "$0 -h for more info" |
100 | exit 1 | 97 | exit 1 |
101 | fi | 98 | fi |
102 | 99 | ||
103 | shift $((OPTIND-1)) | 100 | shift $((OPTIND-1)) |
104 | # check input directory - first argument | 101 | # check input directory - first argument |
105 | if [ ! -e $1 ] | 102 | if [ ! -e $1 ] |
106 | then | 103 | then |
107 | print_error "can't open $1" | 104 | print_error "can't open $1" |
108 | exit 1 | 105 | exit 1 |
109 | fi | 106 | fi |
110 | # check treil input directory - second argument | 107 | # check treil input directory - second argument |
111 | if [ ! -e $1/$2 ] | 108 | if [ ! -e $1/$2 ] |
112 | then | 109 | then |
113 | print_error "can't open $1/$2" | 110 | print_error "can't open $1/$2" |
114 | exit 1 | 111 | exit 1 |
115 | fi | 112 | fi |
116 | 113 | ||
114 | print_info "[${BASENAME}] => Conf Pass start | $(date +'%d/%m/%y %H:%M:%S')" 1 | ||
115 | |||
117 | #-------------# | 116 | #-------------# |
118 | # GLOBAL VARS # | 117 | # GLOBAL VARS # |
119 | #-------------# | 118 | #-------------# |
120 | INPUT_DIR=$(readlink -e $1) | 119 | INPUT_DIR=$(readlink -e $1) |
121 | OUTPUT_DIR=$INPUT_DIR | 120 | OUTPUT_DIR=$INPUT_DIR |
122 | BASENAME=$(basename $OUTPUT_DIR) | 121 | BASENAME=$(basename $OUTPUT_DIR) |
123 | RES_NAME=$2 | 122 | RES_NAME=$2 |
124 | RES_P="${INPUT_DIR}/${RES_NAME}" | 123 | RES_P="${INPUT_DIR}/${RES_NAME}" |
125 | USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf | 124 | USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf |
126 | CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" | 125 | CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" |
127 | RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" | 126 | RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" |
128 | LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log" | 127 | LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log" |
129 | ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log" | 128 | ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log" |
130 | 129 | ||
131 | #------------------# | 130 | #------------------# |
132 | # Create Workspace # | 131 | # Create Workspace # |
133 | #------------------# | 132 | #------------------# |
134 | # Lock directory | 133 | # Lock directory |
135 | if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] | 134 | if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] |
136 | then | 135 | then |
137 | print_warn "[${BASENAME}] Confpass is locked -> exit" 2 | 136 | print_warn "[${BASENAME}] Confpass is locked -> exit" 2 |
138 | exit 1 | 137 | exit 1 |
139 | fi | 138 | fi |
140 | rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 | 139 | rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 |
141 | touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 | 140 | touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 |
142 | if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi | 141 | if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi |
143 | if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi | 142 | if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi |
144 | mkdir -p $CONF_DIR > /dev/null 2>&1 | 143 | mkdir -p $CONF_DIR > /dev/null 2>&1 |
145 | mkdir -p $RES_CONF_DIR > /dev/null 2>&1 | 144 | mkdir -p $RES_CONF_DIR > /dev/null 2>&1 |
146 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 | 145 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
147 | 146 | ||
148 | #---------------# | 147 | #---------------# |
149 | # Check Pass # | 148 | # Check Pass # |
150 | #---------------# | 149 | #---------------# |
151 | print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1 | 150 | print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1 |
152 | # if usf contains more than 49% of 0.600 confidence -> usf error | 151 | # if usf contains more than 49% of 0.600 confidence -> usf error |
153 | if [ -s $USF_FILE ] | 152 | if [ -s $USF_FILE ] |
154 | then | 153 | then |
155 | conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") | 154 | conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") |
156 | confall=$(grep -c -E 'confidence=' "${USF_FILE}") | 155 | confall=$(grep -c -E 'confidence=' "${USF_FILE}") |
157 | if [ $confall -gt 0 ] | 156 | if [ $confall -gt 0 ] |
158 | then | 157 | then |
159 | pourcentageofzerosix=$((($conftozerosix*100)/$confall)) | 158 | pourcentageofzerosix=$((($conftozerosix*100)/$confall)) |
160 | if [ $pourcentageofzerosix -gt 49 ] | 159 | if [ $pourcentageofzerosix -gt 49 ] |
161 | then | 160 | then |
162 | print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2 | 161 | print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2 |
163 | print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1 | 162 | print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1 |
164 | mv "${USF_FILE}" "${USF_FILE}.back" | 163 | mv "${USF_FILE}" "${USF_FILE}.back" |
165 | rm -r $CONF_DIR > /dev/null 2>&1 | 164 | rm -r $CONF_DIR > /dev/null 2>&1 |
166 | else | 165 | else |
167 | print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1 | 166 | print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1 |
168 | exit 0 | 167 | exit 0 |
169 | fi | 168 | fi |
170 | fi | 169 | fi |
171 | else | 170 | else |
172 | print_info "[${BASENAME}] No USF file already done, continue..." 1 | 171 | print_info "[${BASENAME}] No USF file already done, continue..." 1 |
173 | fi | 172 | fi |
174 | 173 | ||
175 | # Check if treil are here | 174 | # Check if treil are here |
176 | nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l) | 175 | nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l) |
177 | nbtreil_p=$(ls $RES_P/*.treil 2> /dev/null | wc -l) | 176 | nbtreil_p=$(ls $RES_P/*.treil 2> /dev/null | wc -l) |
178 | if [ $nbtreil_p -eq 0 ] | 177 | if [ $nbtreil_p -eq 0 ] |
179 | then | 178 | then |
180 | print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass" | 179 | print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass" |
181 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi | 180 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi |
182 | exit 1 | 181 | exit 1 |
183 | else | 182 | else |
184 | #Check if more then 89% of treil are done | 183 | #Check if more then 89% of treil are done |
185 | if [ $nbres_p1 -gt 0 ] | 184 | if [ $nbres_p1 -gt 0 ] |
186 | then | 185 | then |
187 | pourcentage=$((($nbtreil_p*100)/$nbres_p1)) | 186 | pourcentage=$((($nbtreil_p*100)/$nbres_p1)) |
188 | if [ $pourcentage -gt 89 ] | 187 | if [ $pourcentage -gt 89 ] |
189 | then | 188 | then |
190 | print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1 | 189 | print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1 |
191 | else | 190 | else |
192 | print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2 | 191 | print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2 |
193 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil " | 192 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil " |
194 | fi | 193 | fi |
195 | fi | 194 | fi |
196 | fi | 195 | fi |
197 | 196 | ||
198 | #------# | 197 | #------# |
199 | # Save # | 198 | # Save # |
200 | #------# | 199 | #------# |
201 | cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg | 200 | cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg |
202 | echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg | 201 | echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg |
203 | echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg | 202 | echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg |
204 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1 | 203 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1 |
205 | 204 | ||
206 | #--------------------# | 205 | #--------------------# |
207 | # CONFIDENCE MEASURE # | 206 | # CONFIDENCE MEASURE # |
208 | #--------------------# | 207 | #--------------------# |
209 | 208 | ||
210 | # Check pourcentage of scored_ctm already done, if < 85% do confidence measure | 209 | # Check pourcentage of scored_ctm already done, if < 85% do confidence measure |
211 | nbres_p=$(ls ${RES_P}/*.treil | wc -l) | 210 | nbres_p=$(ls ${RES_P}/*.treil | wc -l) |
212 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) | 211 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) |
213 | if [ $nbres_p -gt 0 ] | 212 | if [ $nbres_p -gt 0 ] |
214 | then | 213 | then |
215 | pourcentageres=$((($nbconf*100)/$nbres_p)) | 214 | pourcentageres=$((($nbconf*100)/$nbres_p)) |
216 | if [ $pourcentageres -lt 85 ] | 215 | if [ $pourcentageres -lt 85 ] |
217 | then | 216 | then |
218 | print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1 | 217 | print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1 |
219 | $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME $REDIRECTION_OUTPUT | 218 | $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME |
220 | 219 | ||
221 | else | 220 | else |
222 | print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 | 221 | print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 |
223 | fi | 222 | fi |
224 | fi | 223 | fi |
225 | 224 | ||
226 | ### Check scored_ctm number res files ! | 225 | ### Check scored_ctm number res files ! |
227 | if [ $CHECK -eq 1 ] | 226 | if [ $CHECK -eq 1 ] |
228 | then | 227 | then |
229 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) | 228 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) |
230 | if [ $nbres_p -ne $nbconf ] | 229 | if [ $nbres_p -ne $nbconf ] |
231 | then | 230 | then |
232 | print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2 | 231 | print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2 |
233 | print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" | 232 | print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" |
234 | fi | 233 | fi |
235 | fi | 234 | fi |
236 | 235 | ||
237 | #---------------------------# | 236 | #---------------------------# |
238 | # FROM RES WITH CONF => USF # | 237 | # FROM RES WITH CONF => USF # |
239 | #---------------------------# | 238 | #---------------------------# |
240 | print_info "[${BASENAME}] Create USF file for $RES_P" 1 | 239 | print_info "[${BASENAME}] Create USF file for $RES_P" 1 |
241 | for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done | 240 | for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done |
242 | # create USF configuration file | 241 | # create USF configuration file |
243 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg | 242 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg |
244 | # create USF file | 243 | # create USF file |
245 | print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg $REDIRECTION_OUTPUT" 3 | 244 | print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg" 3 |
246 | $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg $REDIRECTION_OUTPUT | 245 | $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg |
247 | rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 | 246 | rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 |
248 | cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE | 247 | cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE |
249 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf | 248 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf |
250 | rm $USF_FILE.tmp > /dev/null 2>&1 | 249 | rm $USF_FILE.tmp > /dev/null 2>&1 |
251 | 250 | ||
252 | #----------------# | 251 | #----------------# |
253 | # Check USF file # | 252 | # Check USF file # |
254 | #----------------# | 253 | #----------------# |
255 | if [ $CHECK -eq 1 ] | 254 | if [ $CHECK -eq 1 ] |
256 | then | 255 | then |
257 | check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" | 256 | check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" |
258 | if [ $? -eq 1 ] | 257 | if [ $? -eq 1 ] |
259 | then | 258 | then |
260 | print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" | 259 | print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" |
261 | print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" | 260 | print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" |
262 | exit 1 | 261 | exit 1 |
263 | fi | 262 | fi |
264 | fi | 263 | fi |
265 | 264 | ||
266 | #-------# | 265 | #-------# |
267 | # CLOSE # | 266 | # CLOSE # |
268 | #-------# | 267 | #-------# |
269 | # Seem OK | 268 | # Seem OK |
270 | print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1 | 269 | print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
271 | 270 | ||
272 | # unlock directory | 271 | # unlock directory |
273 | mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" | 272 | mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" |
main_tools/ExploitConfidencePass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ExploitConfidencePass.sh # | 4 | # File : ExploitConfidencePass.sh # |
5 | # Brief : Exploit the ASR confidence pass to : # | 5 | # Brief : Exploit the ASR confidence pass to : # |
6 | # -> boost the confident zone # | 6 | # -> boost the confident zone # |
7 | # -> find alternative in non confident zone | 7 | # -> find alternative in non confident zone |
8 | # -> dynamicly extend the lexicon # | 8 | # -> dynamicly extend the lexicon # |
9 | # Author : Jean-François Rey # | 9 | # Author : Jean-François Rey # |
10 | # (base on Emmanuel Ferreira # | 10 | # (base on Emmanuel Ferreira # |
11 | # and Hugo Mauchrétien works) # | 11 | # and Hugo Mauchrétien works) # |
12 | # Version : 1.0 # | 12 | # Version : 1.0 # |
13 | # Date : 25/06/13 # | 13 | # Date : 25/06/13 # |
14 | ##################################################### | 14 | ##################################################### |
15 | 15 | ||
16 | echo "### ExploitConfidencePass.sh ###" | 16 | echo "### ExploitConfidencePass.sh ###" |
17 | 17 | ||
18 | # Check OTMEDIA_HOME env var | 18 | # Check OTMEDIA_HOME env var |
19 | if [ -z ${OTMEDIA_HOME} ] | 19 | if [ -z ${OTMEDIA_HOME} ] |
20 | then | 20 | then |
21 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 21 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
22 | export OTMEDIA_HOME=$OTMEDIA_HOME | 22 | export OTMEDIA_HOME=$OTMEDIA_HOME |
23 | fi | 23 | fi |
24 | 24 | ||
25 | # where is ExploitConfidencePass.sh | 25 | # where is ExploitConfidencePass.sh |
26 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 26 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
27 | 27 | ||
28 | if [ -z ${SCRIPT_PATH} ] | 28 | if [ -z ${SCRIPT_PATH} ] |
29 | then | 29 | then |
30 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 30 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
31 | fi | 31 | fi |
32 | 32 | ||
33 | # Include scripts | 33 | # Include scripts |
34 | . $SCRIPT_PATH"/Tools.sh" | 34 | . $SCRIPT_PATH"/Tools.sh" |
35 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" | 35 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" |
36 | 36 | ||
37 | # where is ExploitConfidencePass.cfg | 37 | # where is ExploitConfidencePass.cfg |
38 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" | 38 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" |
39 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] | 39 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] |
40 | then | 40 | then |
41 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE | 41 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE |
42 | else | 42 | else |
43 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 | 43 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 |
44 | exit 1 | 44 | exit 1 |
45 | fi | 45 | fi |
46 | 46 | ||
47 | #---------------# | 47 | #---------------# |
48 | # Parse Options # | 48 | # Parse Options # |
49 | #---------------# | 49 | #---------------# |
50 | while getopts ":hDv:cf:r" opt | 50 | while getopts ":hDv:cf:r" opt |
51 | do | 51 | do |
52 | case $opt in | 52 | case $opt in |
53 | h) | 53 | h) |
54 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" | 54 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" |
55 | echo -e "\t Options:" | 55 | echo -e "\t Options:" |
56 | echo -e "\t\t-h :\tprint this message" | 56 | echo -e "\t\t-h :\tprint this message" |
57 | echo -e "\t\t-D :\tDEBUG mode on" | 57 | echo -e "\t\t-D :\tDEBUG mode on" |
58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
59 | echo -e "\t\t-c :\tCheck process, stop if error detected" | 59 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
60 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 60 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
61 | echo -e "\t\t-r n :\tforce rerun without deleting files" | 61 | echo -e "\t\t-r n :\tforce rerun without deleting files" |
62 | exit 1 | 62 | exit 1 |
63 | ;; | 63 | ;; |
64 | D) | 64 | D) |
65 | DEBUG=1 | 65 | DEBUG=1 |
66 | ;; | 66 | ;; |
67 | v) | 67 | v) |
68 | VERBOSE=$OPTARG | 68 | VERBOSE=$OPTARG |
69 | ;; | 69 | ;; |
70 | c) | 70 | c) |
71 | CHECK=1 | 71 | CHECK=1 |
72 | ;; | 72 | ;; |
73 | f) | 73 | f) |
74 | FORKS="--forks $OPTARG" | 74 | FORKS="--forks $OPTARG" |
75 | ;; | 75 | ;; |
76 | r) | 76 | r) |
77 | RERUN=1 | 77 | RERUN=1 |
78 | ;; | 78 | ;; |
79 | :) | 79 | :) |
80 | echo "Option -$OPTARG requires an argument." > /dev/stderr | 80 | echo "Option -$OPTARG requires an argument." > /dev/stderr |
81 | exit 1 | 81 | exit 1 |
82 | ;; | 82 | ;; |
83 | \?) | 83 | \?) |
84 | echo "BAD USAGE : unknow opton -$OPTARG" | 84 | echo "BAD USAGE : unknow opton -$OPTARG" |
85 | #exit 1 | 85 | #exit 1 |
86 | ;; | 86 | ;; |
87 | esac | 87 | esac |
88 | done | 88 | done |
89 | 89 | ||
90 | # mode debug enable | 90 | # mode debug enable |
91 | if [ $DEBUG -eq 1 ] | 91 | if [ $DEBUG -eq 1 ] |
92 | then | 92 | then |
93 | set -x | 93 | set -x |
94 | echo -e "## Mode DEBUG ON ##" | 94 | echo -e "## Mode DEBUG ON ##" |
95 | REDIRECTION_OUTPUT="" | ||
96 | else | ||
97 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
98 | fi | 95 | fi |
99 | 96 | ||
100 | # mode verbose enable | 97 | # mode verbose enable |
101 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 98 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
102 | 99 | ||
103 | # Check USAGE by arguments number | 100 | # Check USAGE by arguments number |
104 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 101 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
105 | then | 102 | then |
106 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" | 103 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" |
107 | echo "$0 -h for more info" | 104 | echo "$0 -h for more info" |
108 | exit 1 | 105 | exit 1 |
109 | fi | 106 | fi |
110 | 107 | ||
111 | shift $((OPTIND-1)) | 108 | shift $((OPTIND-1)) |
112 | # check input directory - first argument | 109 | # check input directory - first argument |
113 | if [ ! -e $1 ] | 110 | if [ ! -e $1 ] |
114 | then | 111 | then |
115 | print_error "can't open $1" | 112 | print_error "can't open $1" |
116 | exit 1 | 113 | exit 1 |
117 | fi | 114 | fi |
118 | 115 | ||
116 | print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1 | ||
117 | |||
119 | #-------------# | 118 | #-------------# |
120 | # GLOBAL VARS # | 119 | # GLOBAL VARS # |
121 | #-------------# | 120 | #-------------# |
122 | INPUT_DIR=$(readlink -e $1) | 121 | INPUT_DIR=$(readlink -e $1) |
123 | OUTPUT_DIR=$INPUT_DIR | 122 | OUTPUT_DIR=$INPUT_DIR |
124 | BASENAME=$(basename $OUTPUT_DIR) | 123 | BASENAME=$(basename $OUTPUT_DIR) |
125 | SHOW_DIR="$OUTPUT_DIR/shows/" | 124 | SHOW_DIR="$OUTPUT_DIR/shows/" |
126 | SOLR_RES="$OUTPUT_DIR/solr/" | 125 | SOLR_RES="$OUTPUT_DIR/solr/" |
127 | EXT_LEX="$OUTPUT_DIR/LEX/" | 126 | EXT_LEX="$OUTPUT_DIR/LEX/" |
128 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" | 127 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" |
129 | LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" | 128 | LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" |
130 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" | 129 | ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" |
131 | 130 | ||
132 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" | 131 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" |
133 | if [ -e $CONFPASS_CONFIG_FILE ] | 132 | if [ -e $CONFPASS_CONFIG_FILE ] |
134 | then | 133 | then |
135 | { | 134 | { |
136 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") | 135 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") |
137 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") | 136 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") |
138 | print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 | 137 | print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 |
139 | } | 138 | } |
140 | else | 139 | else |
141 | { | 140 | { |
142 | print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" | 141 | print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" |
143 | print_error "[${BASENAME}] -> use res_p2" | 142 | print_error "[${BASENAME}] -> use res_p2" |
144 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" | 143 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" |
145 | RES_CONF="$INPUT_DIR/conf/res_p2" | 144 | RES_CONF="$INPUT_DIR/conf/res_p2" |
146 | } | 145 | } |
147 | fi | 146 | fi |
148 | 147 | ||
149 | mkdir -p $SHOW_DIR > /dev/null 2>&1 | 148 | mkdir -p $SHOW_DIR > /dev/null 2>&1 |
150 | mkdir -p $SOLR_RES > /dev/null 2>&1 | 149 | mkdir -p $SOLR_RES > /dev/null 2>&1 |
151 | mkdir -p $EXT_LEX > /dev/null 2>&1 | 150 | mkdir -p $EXT_LEX > /dev/null 2>&1 |
152 | mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 | 151 | mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 |
153 | 152 | ||
154 | #------------------# | 153 | #------------------# |
155 | # Create Workspace # | 154 | # Create Workspace # |
156 | #------------------# | 155 | #------------------# |
157 | # Lock directory | 156 | # Lock directory |
158 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] | 157 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] |
159 | then | 158 | then |
160 | print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 | 159 | print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 |
161 | exit 1 | 160 | exit 1 |
162 | fi | 161 | fi |
163 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 | 162 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 |
164 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 | 163 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 |
165 | 164 | ||
166 | #------# | 165 | #------# |
167 | # Save # | 166 | # Save # |
168 | #------# | 167 | #------# |
169 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg | 168 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg |
170 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg | 169 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg |
171 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg | 170 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg |
172 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg | 171 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg |
173 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg | 172 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg |
174 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 | 173 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 |
175 | 174 | ||
175 | #---------------# | ||
176 | # Check Pass # | ||
177 | #---------------# | ||
178 | |||
176 | #-----------------------# | 179 | #-----------------------# |
177 | # Segmentation by show # | 180 | # Segmentation by show # |
178 | #-----------------------# | 181 | #-----------------------# |
179 | # create txt file from scored res | 182 | # create txt file from scored res |
180 | # tag pos and lemmatization of the txt file | 183 | # tag pos and lemmatization of the txt file |
181 | # merge the scored res and taglem file | 184 | # merge the scored res and taglem file |
182 | # segment using the last generated file | 185 | # segment using the last generated file |
183 | # and create a ctm file by show | 186 | # and create a ctm file by show |
184 | 187 | ||
185 | print_info "Segmentation by show" 1 | 188 | print_info "[${BASENAME}] Segmentation by show" 1 |
186 | 189 | ||
187 | # -> to txt | 190 | # -> to txt |
188 | print_info "Create txt from scored res" 2 | 191 | print_info "[${BASENAME}] Create txt from scored res" 2 |
189 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm | 192 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm |
190 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp | 193 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp |
191 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt | 194 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt |
192 | 195 | ||
193 | # -> to tagger + lemme | 196 | # -> to tagger + lemme |
194 | print_info "Tag pos and lem in txt file" 2 | 197 | print_info "[${BASENAME}] Tag pos and lem in txt file" 2 |
195 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp | 198 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp |
196 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem | 199 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem |
197 | 200 | ||
198 | # merge sctm and taglem | 201 | # merge sctm and taglem |
199 | print_info "Merge scored ctm with tag pos and lem file" 2 | 202 | print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 2 |
200 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl | 203 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl |
201 | 204 | ||
202 | # -> new seg | 205 | # -> new seg |
203 | print_info "Create xml file and run Topic Seg" 2 | 206 | print_info "[${BASENAME}] Create xml file and run Topic Seg" 2 |
204 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml | 207 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml |
205 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem | 208 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem |
206 | 209 | ||
207 | # Lia_topic_seg : bring together sentences into show | 210 | # Lia_topic_seg : bring together sentences into show |
208 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml | 211 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml |
209 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg | 212 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg |
210 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg | 213 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg |
211 | rm 0.xml $INPUT_DIR/show.seg | 214 | rm 0.xml $INPUT_DIR/show.seg |
212 | 215 | ||
213 | if [ $CHECK -eq 1 ] | 216 | if [ $CHECK -eq 1 ] |
214 | then | 217 | then |
215 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi | 218 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "[${BASENAME}] ERROR : no Topic segmentation" >> $ERRORFILE; fi |
216 | fi | 219 | fi |
217 | 220 | ||
218 | # Segment ctm into several show files and create a seg list by show | 221 | # Segment ctm into several show files and create a seg list by show |
219 | print_info "Segment ctm into show files and a seg list by show" 2 | 222 | print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 2 |
220 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR $REDIRECTION_OUTPUT | 223 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR |
221 | 224 | ||
222 | #-----------------------------------------------------------# | 225 | #-----------------------------------------------------------# |
223 | # SOLR QUERIES # | 226 | # SOLR QUERIES # |
224 | # -> Create Confidente Word # | 227 | # -> Create Confidente Word # |
225 | # Keep conf words and use Tags # | 228 | # Keep conf words and use Tags # |
226 | # -> Query SOLR (document & multimedia) # | 229 | # -> Query SOLR (document & multimedia) # |
227 | # concat word + add date 2 day before and after the show # | 230 | # concat word + add date 2 day before and after the show # |
228 | # query document & multimedia # | 231 | # query document & multimedia # |
229 | #-----------------------------------------------------------# | 232 | #-----------------------------------------------------------# |
230 | print_info "Create SOLR queries and ASK SOLR" 1 | 233 | print_info "[${BASENAME}] Create SOLR queries and ASK SOLR" 1 |
231 | for show in $(ls $SHOW_DIR/*.ctm) | 234 | for show in $(ls $SHOW_DIR/*.ctm) |
232 | do | 235 | do |
233 | bn=$(basename $show .ctm) | 236 | bn=$(basename $show .ctm) |
234 | # Remove words with low confidence and keep useful tagger words | 237 | # Remove words with low confidence and keep useful tagger words |
235 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" | 238 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" |
236 | # Get date 2 day before and after the show | 239 | # Get date 2 day before and after the show |
237 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` | 240 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` |
238 | # Create SOLR queries | 241 | # Create SOLR queries |
239 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" | 242 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" |
240 | # Ask SOLR DB | 243 | # Ask SOLR DB |
241 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then | 244 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then |
242 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" | 245 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" |
243 | echo $query > $SHOW_DIR/$bn.queries | 246 | echo $query > $SHOW_DIR/$bn.queries |
244 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp $REDIRECTION_OUTPUT | 247 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp |
245 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords | 248 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords |
246 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt | 249 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt |
247 | rm $SOLR_RES/*.tmp > /dev/null 2>&1 | 250 | rm $SOLR_RES/*.tmp > /dev/null 2>&1 |
248 | fi | 251 | fi |
249 | 252 | ||
250 | if [ $CHECK -eq 1 ] | 253 | if [ $CHECK -eq 1 ] |
251 | then | 254 | then |
252 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] | 255 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] |
253 | then | 256 | then |
254 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 | 257 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 |
255 | fi | 258 | fi |
256 | fi | 259 | fi |
257 | 260 | ||
258 | done | 261 | done |
259 | 262 | ||
260 | #----------------------------------------------------------------------------------------------- | 263 | #----------------------------------------------------------------------------------------------- |
261 | # Build trigger file | 264 | # Build trigger file |
262 | # 1) keywords are automatically boosted in the non confident zone of the current res | 265 | # 1) keywords are automatically boosted in the non confident zone of the current res |
263 | # confident zone are boosted | 266 | # confident zone are boosted |
264 | # previous words in sensible zone are penalized | 267 | # previous words in sensible zone are penalized |
265 | # 2) OOVs are extracted + phonetized | 268 | # 2) OOVs are extracted + phonetized |
266 | # 3) Try to find OOVs acousticly in the current segment | 269 | # 3) Try to find OOVs acousticly in the current segment |
267 | # 4) Generate the .trigg file | 270 | # 4) Generate the .trigg file |
268 | #------------------------------------------------------------------------------------------------ | 271 | #------------------------------------------------------------------------------------------------ |
269 | print_info "Build trigger files" 1 | 272 | print_info "[${BASENAME}] Build trigger files" 1 |
270 | for i in `ls $SOLR_RES/*.keywords` | 273 | for i in `ls $SOLR_RES/*.keywords` |
271 | do | 274 | do |
272 | basename=`basename $i .keywords` | 275 | basename=`basename $i .keywords` |
273 | 276 | ||
274 | # | 277 | # |
275 | # Tokenize & produce coverage report | 278 | # Tokenize & produce coverage report |
276 | # Use filter you need | 279 | # Use filter you need |
277 | # | 280 | # |
278 | print_info "keywords filtering and produce coverage report" 2 | 281 | print_info "[${BASENAME}] keywords filtering and produce coverage report" 2 |
279 | # Default filter | 282 | # Default filter |
280 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ | 283 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ |
281 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 284 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
282 | # do less filter | 285 | # do less filter |
283 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 286 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
284 | 287 | ||
285 | 288 | ||
286 | # | 289 | # |
287 | # Extract "real" OOV and phonetize them | 290 | # Extract "real" OOV and phonetize them |
288 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits | 291 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits |
289 | # | 292 | # |
290 | print_info "Extract OOV and phonetize them" 2 | 293 | print_info "[${BASENAME}] Extract OOV and phonetize them" 2 |
291 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov | 294 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov |
292 | 295 | ||
293 | # | 296 | # |
294 | # Search INVOC & OOV in the current lattice | 297 | # Search INVOC & OOV in the current lattice |
295 | # | 298 | # |
296 | print_info "Search INVOC and OOV in the current lattice" 2 | 299 | print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 2 |
297 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch | 300 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch |
298 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch | 301 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch |
299 | 302 | ||
300 | # For each treil | 303 | # For each treil |
301 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 304 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
302 | do | 305 | do |
303 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION | 306 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION |
304 | # | 307 | # |
305 | # Produce the boost file for the next decoding pass | 308 | # Produce the boost file for the next decoding pass |
306 | # | 309 | # |
307 | print_info "Produce trigg file : $baseseg " 3 | 310 | print_info "[${BASENAME}] Produce trigg file : $baseseg " 3 |
308 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg | 311 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg |
309 | done | 312 | done |
310 | 313 | ||
311 | done | 314 | done |
312 | 315 | ||
313 | #----------------------------------------------------------------------------------------------- | 316 | #----------------------------------------------------------------------------------------------- |
314 | # Build the extended SPEERAL Lexicon | 317 | # Build the extended SPEERAL Lexicon |
315 | # 1) Merge OOVs + LEXICON | 318 | # 1) Merge OOVs + LEXICON |
316 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) | 319 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) |
317 | # 2) The current lexicon is extended with all the valid OOVs | 320 | # 2) The current lexicon is extended with all the valid OOVs |
318 | #----------------------------------------------------------------------------------------------- | 321 | #----------------------------------------------------------------------------------------------- |
319 | print_info "Build extended Speeral Lexicon" 1 | 322 | print_info "[${BASENAME}] Build extended Speeral Lexicon" 1 |
320 | mkdir -p $EXT_LEX/final | 323 | mkdir -p $EXT_LEX/final |
321 | mkdir -p $EXT_LEX/tmp | 324 | mkdir -p $EXT_LEX/tmp |
322 | mkdir -p $EXT_LEX/tmp/txt | 325 | mkdir -p $EXT_LEX/tmp/txt |
323 | # | 326 | # |
324 | # Collect the acousticly found oov and their phonetisation | 327 | # Collect the acousticly found oov and their phonetisation |
325 | # | 328 | # |
326 | print_info "Get all OOV and retrieve all phonetisation" 2 | 329 | print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 2 |
327 | for i in `ls $SOLR_RES/*.phon_oov` | 330 | for i in `ls $SOLR_RES/*.phon_oov` |
328 | do | 331 | do |
329 | basename=`basename $i .phon_oov` | 332 | basename=`basename $i .phon_oov` |
330 | 333 | ||
331 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null | 334 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null |
332 | # list acousticly found for the show | 335 | # list acousticly found for the show |
333 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 336 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
334 | do | 337 | do |
335 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound | 338 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound |
336 | done | 339 | done |
337 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp | 340 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp |
338 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound | 341 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound |
339 | 342 | ||
340 | # | 343 | # |
341 | # Extract OOV really added | 344 | # Extract OOV really added |
342 | # | 345 | # |
343 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov | 346 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov |
344 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound $REDIRECTION_OUTPUT | 347 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound |
345 | # | 348 | # |
346 | # Retrieve all phonetisation | 349 | # Retrieve all phonetisation |
347 | # | 350 | # |
348 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon | 351 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon |
349 | done | 352 | done |
350 | 353 | ||
351 | # | 354 | # |
352 | # Merge OOVs and their phonetisation | 355 | # Merge OOVs and their phonetisation |
353 | # | 356 | # |
354 | print_info "Merge OOV and their phonetisation" 2 | 357 | print_info "[${BASENAME}] Merge OOV and their phonetisation" 2 |
355 | lexname=$(basename $LEXICON) | 358 | lexname=$(basename $LEXICON) |
356 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon | 359 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon |
357 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound | 360 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound |
358 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon $REDIRECTION_OUTPUT | 361 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon |
359 | 362 | ||
360 | # | 363 | # |
361 | # Collect + clean retrieved txt | 364 | # Collect + clean retrieved txt |
362 | # | 365 | # |
363 | print_info "Collect and clean SOLR txt answers" 2 | 366 | print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2 |
364 | # choose filter | 367 | # choose filter |
365 | # default | 368 | # default |
366 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt | 369 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt |
367 | # low filter | 370 | # low filter |
368 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt | 371 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt |
369 | 372 | ||
370 | # | 373 | # |
371 | # Construct the map file | 374 | # Construct the map file |
372 | # | 375 | # |
373 | # Notes: | 376 | # Notes: |
374 | # - Expected format : | 377 | # - Expected format : |
375 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> | 378 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> |
376 | # | 379 | # |
377 | print_info "Construct map file" 2 | 380 | print_info "[${BASENAME}] Construct map file" 2 |
378 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null | 381 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null |
379 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null | 382 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null |
380 | 383 | ||
381 | while read oov | 384 | while read oov |
382 | do | 385 | do |
383 | oov=`echo $oov | sed "s/\n//g"` | 386 | oov=`echo $oov | sed "s/\n//g"` |
384 | # | 387 | # |
385 | # Obtain the oov's tag | 388 | # Obtain the oov's tag |
386 | # | 389 | # |
387 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` | 390 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` |
388 | # | 391 | # |
389 | # Try to collect text containing the oov word | 392 | # Try to collect text containing the oov word |
390 | # | 393 | # |
391 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt | 394 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt |
392 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then | 395 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then |
393 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 396 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
394 | if [ $nbWords -eq 0 ]; then | 397 | if [ $nbWords -eq 0 ]; then |
395 | echo "UNVALID OOV: $oov => $nbWords occurrences" | 398 | echo "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" |
396 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 399 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
397 | else | 400 | else |
398 | # | 401 | # |
399 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected | 402 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected |
400 | # | 403 | # |
401 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" | 404 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" |
402 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 405 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
403 | if [ ! x$candidate = "x" ]; then | 406 | if [ ! x$candidate = "x" ]; then |
404 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon | 407 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon |
405 | while read phonLine | 408 | while read phonLine |
406 | do | 409 | do |
407 | #<word> <phon> => <word> <candidate> <phon> | 410 | #<word> <phon> => <word> <candidate> <phon> |
408 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map | 411 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map |
409 | done < $EXT_LEX/tmp/$oov.phon | 412 | done < $EXT_LEX/tmp/$oov.phon |
410 | else | 413 | else |
411 | echo "UNVALID OOV: $oov => no availaible Candidate word in LM" | 414 | echo "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" |
412 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 415 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
413 | fi | 416 | fi |
414 | fi | 417 | fi |
415 | else | 418 | else |
416 | echo "UNVALID OOV: $oov" | 419 | echo "[${BASENAME}] UNVALID OOV: $oov" |
417 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 420 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
418 | fi | 421 | fi |
419 | done < $EXT_LEX/final/all.oov_acousticlyfound | 422 | done < $EXT_LEX/final/all.oov_acousticlyfound |
420 | 423 | ||
421 | # | 424 | # |
422 | ### Speeral | 425 | ### Speeral |
423 | # | 426 | # |
424 | 427 | ||
425 | lexname=`basename $LEXICON` | 428 | lexname=`basename $LEXICON` |
426 | # | 429 | # |
427 | # Build the final trigger file | 430 | # Build the final trigger file |
428 | # | 431 | # |
429 | print_info "Clean trigg files" 2 | 432 | print_info "[${BASENAME}] Clean trigg files" 2 |
430 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null | 433 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null |
431 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null | 434 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null |
432 | for i in `ls $TRIGGER_CONFZONE/*.trigg` | 435 | for i in `ls $TRIGGER_CONFZONE/*.trigg` |
433 | do | 436 | do |
434 | basename=`basename $i .trigg` | 437 | basename=`basename $i .trigg` |
435 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg | 438 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg |
436 | done | 439 | done |
437 | # | 440 | # |
438 | # Compile the speeral extended lexicon | 441 | # Compile the speeral extended lexicon |
439 | # | 442 | # |
440 | print_info "Compile Speeral extended lexicon" 2 | 443 | print_info "[${BASENAME}] Compile Speeral extended lexicon" 2 |
441 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext $REDIRECTION_OUTPUT | 444 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext |
442 | 445 | ||
443 | if [ $CHECK -eq 1 ] | 446 | if [ $CHECK -eq 1 ] |
444 | then | 447 | then |
445 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" | 448 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" |
446 | if [ $? -eq 1 ] | 449 | if [ $? -eq 1 ] |
447 | then | 450 | then |
448 | echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE | 451 | echo -e "[${BASENAME}] ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE |
449 | exit 1; | 452 | exit 1; |
450 | fi | 453 | fi |
451 | fi | 454 | fi |
452 | 455 | ||
453 | 456 | ||
454 | #-------# | 457 | #-------# |
455 | # CLOSE # | 458 | # CLOSE # |
456 | #-------# | 459 | #-------# |
457 | # Seem OK | 460 | # Seem OK |
458 | print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 | 461 | print_info "[${BASENAME}] <= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 |
459 | echo -e "#Solr $BASENAME " >> $LOGFILE | 462 | echo -e "[${BASENAME}] #Solr $BASENAME " >> $LOGFILE |
460 | 463 | ||
461 | # unlock directory | 464 | # unlock directory |
462 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" | 465 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" |
main_tools/FirstPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : FirstPass.sh # | 4 | # File : FirstPass.sh # |
5 | # Brief : ASR first pass and speaker diarization # | 5 | # Brief : ASR first pass and speaker diarization # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and Hugo Mauchrétien works) # | 8 | # and Hugo Mauchrétien works) # |
9 | # Version : 1.1 # | 9 | # Version : 1.1 # |
10 | # Date : 18/06/13 # | 10 | # Date : 18/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | echo "### FirstPass.sh ###" | 13 | echo "### FirstPass.sh ###" |
14 | 14 | ||
15 | # Check OTMEDIA_HOME env var | 15 | # Check OTMEDIA_HOME env var |
16 | if [ -z ${OTMEDIA_HOME} ] | 16 | if [ -z ${OTMEDIA_HOME} ] |
17 | then | 17 | then |
18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
19 | export OTMEDIA_HOME=$OTMEDIA_HOME | 19 | export OTMEDIA_HOME=$OTMEDIA_HOME |
20 | fi | 20 | fi |
21 | 21 | ||
22 | # where is FirstPass.sh | 22 | # where is FirstPass.sh |
23 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 23 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
24 | 24 | ||
25 | # scripts path | 25 | # scripts path |
26 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 26 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
27 | 27 | ||
28 | # Include scripts | 28 | # Include scripts |
29 | . $SCRIPT_PATH"/Tools.sh" | 29 | . $SCRIPT_PATH"/Tools.sh" |
30 | . $SCRIPT_PATH"/CheckFirstPass.sh" | 30 | . $SCRIPT_PATH"/CheckFirstPass.sh" |
31 | 31 | ||
32 | # where is FirstPass.cfg | 32 | # where is FirstPass.cfg |
33 | FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" | 33 | FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" |
34 | if [ -e $FIRSTPASS_CONFIG_FILE ] | 34 | if [ -e $FIRSTPASS_CONFIG_FILE ] |
35 | then | 35 | then |
36 | . $FIRSTPASS_CONFIG_FILE | 36 | . $FIRSTPASS_CONFIG_FILE |
37 | else | 37 | else |
38 | echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" > /dev/stderr | 38 | echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" > /dev/stderr |
39 | echo "exit" > /dev/stderr | 39 | echo "exit" > /dev/stderr |
40 | exit 1 | 40 | exit 1 |
41 | fi | 41 | fi |
42 | 42 | ||
43 | #---------------# | 43 | #---------------# |
44 | # Parse Options # | 44 | # Parse Options # |
45 | #---------------# | 45 | #---------------# |
46 | while getopts ":hDv:cf:r" opt | 46 | while getopts ":hDv:cf:r" opt |
47 | do | 47 | do |
48 | case $opt in | 48 | case $opt in |
49 | h) | 49 | h) |
50 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" | 50 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" |
51 | echo -e "\t Options:" | 51 | echo -e "\t Options:" |
52 | echo -e "\t\t-h :\tprint this message" | 52 | echo -e "\t\t-h :\tprint this message" |
53 | echo -e "\t\t-D :\tDEBUG mode on" | 53 | echo -e "\t\t-D :\tDEBUG mode on" |
54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
55 | echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected" | 55 | echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected" |
56 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 56 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
57 | echo -e "\t\t-r :\tforce rerun the wav file" | 57 | echo -e "\t\t-r :\tforce rerun the wav file" |
58 | exit 1 | 58 | exit 1 |
59 | ;; | 59 | ;; |
60 | D) | 60 | D) |
61 | DEBUG=1 | 61 | DEBUG=1 |
62 | ;; | 62 | ;; |
63 | v) | 63 | v) |
64 | VERBOSE=$OPTARG | 64 | VERBOSE=$OPTARG |
65 | ;; | 65 | ;; |
66 | c) | 66 | c) |
67 | CHECK=1 | 67 | CHECK=1 |
68 | ;; | 68 | ;; |
69 | f) | 69 | f) |
70 | FORKS="--forks $OPTARG" | 70 | FORKS="--forks $OPTARG" |
71 | ;; | 71 | ;; |
72 | r) | 72 | r) |
73 | RERUN=1 | 73 | RERUN=1 |
74 | ;; | 74 | ;; |
75 | :) | 75 | :) |
76 | echo "Option -$OPTARG requires an argument." > /dev/stderr | 76 | echo "Option -$OPTARG requires an argument." > /dev/stderr |
77 | exit 1 | 77 | exit 1 |
78 | ;; | 78 | ;; |
79 | \?) | 79 | \?) |
80 | echo "BAD USAGE : unknow opton -$OPTARG" | 80 | echo "BAD USAGE : unknow opton -$OPTARG" |
81 | exit 1 | 81 | exit 1 |
82 | ;; | 82 | ;; |
83 | esac | 83 | esac |
84 | done | 84 | done |
85 | 85 | ||
86 | # mode debug enable | 86 | # mode debug enable |
87 | if [ $DEBUG -eq 1 ] | 87 | if [ $DEBUG -eq 1 ] |
88 | then | 88 | then |
89 | set -x | 89 | set -x |
90 | echo -e "## Mode DEBUG ON ##" | 90 | echo -e "## Mode DEBUG ON ##" |
91 | REDIRECTION_OUTPUT="" | ||
92 | else | ||
93 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
94 | fi | 91 | fi |
95 | 92 | ||
96 | # mode verbose enable | 93 | # mode verbose enable |
97 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
98 | 95 | ||
99 | # Check USAGE by arguments number | 96 | # Check USAGE by arguments number |
100 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 97 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
101 | then | 98 | then |
102 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" | 99 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" |
103 | echo "$0 -h for more info" | 100 | echo "$0 -h for more info" |
104 | exit 1 | 101 | exit 1 |
105 | fi | 102 | fi |
106 | 103 | ||
107 | shift $((OPTIND-1)) | 104 | shift $((OPTIND-1)) |
108 | # check audio file - First argument | 105 | # check audio file - First argument |
109 | if [ -e $1 ] && [ -s $1 ] | 106 | if [ -e $1 ] && [ -s $1 ] |
110 | then | 107 | then |
111 | # absolute path to wav file | 108 | # absolute path to wav file |
112 | WAV_FILE=$(readlink -e $1) | 109 | WAV_FILE=$(readlink -e $1) |
113 | # wav filename | 110 | # wav filename |
114 | FILENAME=$(basename $WAV_FILE) | 111 | FILENAME=$(basename $WAV_FILE) |
115 | # wav filename without extension | 112 | # wav filename without extension |
116 | BASENAME=${FILENAME%.*} | 113 | BASENAME=${FILENAME%.*} |
117 | 114 | ||
118 | print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1 | 115 | print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
119 | print_info "[${BASENAME}] $WAV_FILE OK" 2 | 116 | print_info "[${BASENAME}] $WAV_FILE OK" 2 |
120 | else | 117 | else |
121 | print_error "can't find $1 OR file is empty" | 118 | print_error "can't find $1 OR file is empty" |
122 | exit 1 | 119 | exit 1 |
123 | fi | 120 | fi |
124 | 121 | ||
125 | # check output directory - Second argument | 122 | # check output directory - Second argument |
126 | if [ ! -e $2 ] | 123 | if [ ! -e $2 ] |
127 | then | 124 | then |
128 | mkdir -p $2 | 125 | mkdir -p $2 |
129 | print_info "[${BASENAME}] Make directory $2" 2 | 126 | print_info "[${BASENAME}] Make directory $2" 2 |
130 | fi | 127 | fi |
131 | 128 | ||
132 | 129 | ||
133 | #-------------# | 130 | #-------------# |
134 | # GLOBAL VARS # | 131 | # GLOBAL VARS # |
135 | #-------------# | 132 | #-------------# |
136 | OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path | 133 | OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path |
137 | OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME | 134 | OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME |
138 | PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file | 135 | PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file |
139 | PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory | 136 | PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory |
140 | SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file | 137 | SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file |
141 | LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file | 138 | LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file |
142 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" | 139 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" |
143 | LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log" | 140 | LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log" |
144 | ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log" | 141 | ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log" |
145 | 142 | ||
146 | #------------------# | 143 | #------------------# |
147 | # Create WORKSPACE # | 144 | # Create WORKSPACE # |
148 | #------------------# | 145 | #------------------# |
149 | if [ ! -e $OUTPUT_DIR_BASENAME ] | 146 | if [ ! -e $OUTPUT_DIR_BASENAME ] |
150 | then | 147 | then |
151 | mkdir -p $OUTPUT_DIR_BASENAME | 148 | mkdir -p $OUTPUT_DIR_BASENAME |
152 | print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2 | 149 | print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2 |
153 | fi | 150 | fi |
154 | 151 | ||
155 | # Lock directory | 152 | # Lock directory |
156 | if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi | 153 | if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi |
157 | rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 | 154 | rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 |
158 | touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 | 155 | touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 |
159 | 156 | ||
160 | rm -r $PLP_DIR > /dev/null 2>&1; | 157 | rm -r $PLP_DIR > /dev/null 2>&1; |
161 | mkdir -p $PLP_DIR | 158 | mkdir -p $PLP_DIR |
162 | print_info "[${BASENAME}] Make directory $PLP_DIR" 2 | 159 | print_info "[${BASENAME}] Make directory $PLP_DIR" 2 |
163 | if [ $RERUN -eq 0 ]; | 160 | if [ $RERUN -eq 0 ]; |
164 | then | 161 | then |
165 | rm -r $RES_DIR > /dev/null 2>&1; | 162 | rm -r $RES_DIR > /dev/null 2>&1; |
166 | else | 163 | else |
167 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 164 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
168 | fi | 165 | fi |
169 | mkdir -p $RES_DIR > /dev/null 2>&1 | 166 | mkdir -p $RES_DIR > /dev/null 2>&1 |
170 | print_info "[${BASENAME}] Make directory $RES_DIR" 2 | 167 | print_info "[${BASENAME}] Make directory $RES_DIR" 2 |
171 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 | 168 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
172 | 169 | ||
173 | #--------------------# | 170 | #--------------------# |
174 | # Save configuration # | 171 | # Save configuration # |
175 | #--------------------# | 172 | #--------------------# |
176 | cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg | 173 | cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg |
177 | echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 174 | echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
178 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 175 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
179 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 176 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
180 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 177 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
181 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 178 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
182 | echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 179 | echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
183 | echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 180 | echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
184 | echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 181 | echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
185 | echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 182 | echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
186 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 183 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
187 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 | 184 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 |
188 | 185 | ||
189 | #-------------------------# | 186 | #-------------------------# |
190 | # Check Audio File Format # | 187 | # Check Audio File Format # |
191 | #-------------------------# | 188 | #-------------------------# |
192 | error=0 | 189 | error=0 |
193 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") | 190 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") |
194 | if [ -z "$temp" ]; then error=1; fi | 191 | if [ -z "$temp" ]; then error=1; fi |
195 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") | 192 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") |
196 | if [ -z "$temp" ]; then error=1; fi | 193 | if [ -z "$temp" ]; then error=1; fi |
197 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") | 194 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") |
198 | if [ -z "$temp" ]; then error=1; fi | 195 | if [ -z "$temp" ]; then error=1; fi |
199 | 196 | ||
200 | if [ $error -eq 1 ] | 197 | if [ $error -eq 1 ] |
201 | then | 198 | then |
202 | print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" | 199 | print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" |
203 | print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" | 200 | print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" |
204 | avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT | 201 | avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav |
205 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav | 202 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav |
206 | FILENAME=$BASENAME.wav | 203 | FILENAME=$BASENAME.wav |
207 | print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE" | 204 | print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE" |
208 | fi | 205 | fi |
209 | 206 | ||
210 | #---------------# | 207 | #---------------# |
211 | # Get SRT file # | 208 | # Get SRT file # |
212 | #---------------# | 209 | #---------------# |
213 | if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] | 210 | if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] |
214 | then | 211 | then |
215 | cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT | 212 | cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT |
216 | print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3 | 213 | print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3 |
217 | fi | 214 | fi |
218 | 215 | ||
219 | #------------# | 216 | #------------# |
220 | # WAV -> PLP # | 217 | # WAV -> PLP # |
221 | #------------# | 218 | #------------# |
222 | print_info "[${BASENAME}] convert WAV -> PLP" 1 | 219 | print_info "[${BASENAME}] convert WAV -> PLP" 1 |
223 | echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp | 220 | echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp |
224 | print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3 | 221 | print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3 |
225 | 222 | ||
226 | $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT | 223 | $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms |
227 | 224 | ||
228 | if [ $CHECK -eq 1 ] | 225 | if [ $CHECK -eq 1 ] |
229 | then | 226 | then |
230 | check_first_pass_plp "$PLP_FILE" | 227 | check_first_pass_plp "$PLP_FILE" |
231 | if [ $? -eq 1 ] | 228 | if [ $? -eq 1 ] |
232 | then | 229 | then |
233 | print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE" | 230 | print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE" |
234 | echo "[${BASENAME}] ERROR : ${BASENAME} check $ERRORFILE file" > /dev/stderr | 231 | echo "[${BASENAME}] ERROR : ${BASENAME} check $ERRORFILE file" > /dev/stderr |
235 | exit 1 | 232 | exit 1 |
236 | fi | 233 | fi |
237 | fi | 234 | fi |
238 | 235 | ||
239 | rm $OUTPUT_DIR_BASENAME/list.tmp | 236 | rm $OUTPUT_DIR_BASENAME/list.tmp |
240 | 237 | ||
241 | #------------------------------# | 238 | #------------------------------# |
242 | # S/NS + SPEAKERS SEGMENTATION # | 239 | # S/NS + SPEAKERS SEGMENTATION # |
243 | #------------------------------# | 240 | #------------------------------# |
244 | print_info "[${BASENAME}] Launch speakers diarization" 1 | 241 | print_info "[${BASENAME}] Launch speakers diarization" 1 |
245 | # Calcul seg file | 242 | # Calcul seg file |
246 | print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3 | 243 | print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3 |
247 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME | 244 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME |
248 | java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering | 245 | java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering |
249 | 246 | ||
250 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) | 247 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) |
251 | then | 248 | then |
252 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE" | 249 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE" |
253 | print_error "[${BASENAME}] ERROR : check $ERRORFILE file" | 250 | print_error "[${BASENAME}] ERROR : check $ERRORFILE file" |
254 | exit 1 | 251 | exit 1 |
255 | fi | 252 | fi |
256 | 253 | ||
257 | 254 | ||
258 | # Create LBL file | 255 | # Create LBL file |
259 | print_info "Extract LBL file from SEG file" 1 | 256 | print_info "Extract LBL file from SEG file" 1 |
260 | 257 | ||
261 | cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE | 258 | cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE |
262 | 259 | ||
263 | if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) | 260 | if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) |
264 | then | 261 | then |
265 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE" | 262 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE" |
266 | print_error "[${BASENAME}] ERROR : check $ERRORFILE file" | 263 | print_error "[${BASENAME}] ERROR : check $ERRORFILE file" |
267 | exit 1 | 264 | exit 1 |
268 | fi | 265 | fi |
269 | 266 | ||
270 | 267 | ||
271 | #----------------------------------------------------# | 268 | #----------------------------------------------------# |
272 | # Cut global PLP file depending to LBL segmentations # | 269 | # Cut global PLP file depending to LBL segmentations # |
273 | #----------------------------------------------------# | 270 | #----------------------------------------------------# |
274 | print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1 | 271 | print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1 |
275 | print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3 | 272 | print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3 |
276 | 273 | ||
277 | $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT | 274 | $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG |
278 | 275 | ||
279 | if [ $CHECK -eq 1 ] | 276 | if [ $CHECK -eq 1 ] |
280 | then | 277 | then |
281 | check_first_pass_plps_lbl $PLP_DIR $LBL_FILE | 278 | check_first_pass_plps_lbl $PLP_DIR $LBL_FILE |
282 | if [ $? -eq 1 ] | 279 | if [ $? -eq 1 ] |
283 | then | 280 | then |
284 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files" | 281 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files" |
285 | print_error "[${BASENAME}] ERROR : check $ERRORFILE file" | 282 | print_error "[${BASENAME}] ERROR : check $ERRORFILE file" |
286 | exit 1 | 283 | exit 1 |
287 | fi | 284 | fi |
288 | fi | 285 | fi |
289 | 286 | ||
290 | # change plp files names | 287 | # change plp files names |
291 | cd $PLP_DIR; | 288 | cd $PLP_DIR; |
292 | rename -f s/_/#/g *plp | 289 | rename -f s/_/#/g *plp |
293 | rename -f s/#/_/ *plp | 290 | rename -f s/#/_/ *plp |
294 | cd $OLDPWD | 291 | cd $OLDPWD |
295 | 292 | ||
296 | #---------------------------------------------# | 293 | #---------------------------------------------# |
297 | # PLP files list depending to acoustic models # | 294 | # PLP files list depending to acoustic models # |
298 | #---------------------------------------------# | 295 | #---------------------------------------------# |
299 | print_info "[${BASENAME}] Create PLP list depending of the model" 1 | 296 | print_info "[${BASENAME}] Create PLP list depending of the model" 1 |
300 | # Create a list of plp files | 297 | # Create a list of plp files |
301 | find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst | 298 | find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst |
302 | 299 | ||
303 | rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 | 300 | rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 |
304 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 301 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
305 | do | 302 | do |
306 | a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` | 303 | a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` |
307 | if [ -n "$a" ]; then | 304 | if [ -n "$a" ]; then |
308 | print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 | 305 | print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 |
309 | grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 306 | grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
310 | fi | 307 | fi |
311 | done | 308 | done |
312 | 309 | ||
313 | #-----------------------# | 310 | #-----------------------# |
314 | # First Pass (DECODING) # | 311 | # First Pass (DECODING) # |
315 | #-----------------------# | 312 | #-----------------------# |
316 | # | 313 | # |
317 | # For all AM do decoding | 314 | # For all AM do decoding |
318 | # if Check error -> iter on undone decoding (max 1 times) | 315 | # if Check error -> iter on undone decoding (max 1 times) |
319 | # | 316 | # |
320 | print_info "[${BASENAME}] Launch decoding" 1 | 317 | print_info "[${BASENAME}] Launch decoding" 1 |
321 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 318 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
322 | do | 319 | do |
323 | redo=1; # nb of try if not all segs is decoded | 320 | redo=1; # nb of try if not all segs is decoded |
324 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] | 321 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] |
325 | then | 322 | then |
326 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 323 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
327 | while [ $redo -gt 0 ]; do | 324 | while [ $redo -gt 0 ]; do |
328 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 325 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
329 | print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 | 326 | print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 |
330 | # Run speeral | 327 | # Run speeral |
331 | $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT | 328 | $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock |
332 | 329 | ||
333 | # Check if error | 330 | # Check if error |
334 | if [ $CHECK -eq 1 ] | 331 | if [ $CHECK -eq 1 ] |
335 | then | 332 | then |
336 | check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" | 333 | check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" |
337 | # if error | 334 | # if error |
338 | if [ $? -eq 1 ] | 335 | if [ $? -eq 1 ] |
339 | then | 336 | then |
340 | # rerun | 337 | # rerun |
341 | redo=$(($redo - 1)); | 338 | redo=$(($redo - 1)); |
342 | print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 | 339 | print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 |
343 | print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" | 340 | print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" |
344 | # new plp list | 341 | # new plp list |
345 | # list .seg done and compare to list of seg to do | 342 | # list .seg done and compare to list of seg to do |
346 | ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 343 | ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
347 | diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst | 344 | diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst |
348 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 345 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
349 | # log seg to do | 346 | # log seg to do |
350 | print_log_file $LOGFILE "Segs not done :" | 347 | print_log_file $LOGFILE "Segs not done :" |
351 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE | 348 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE |
352 | todo=${OUTPUT_DIR_BASENAME}/todo.lst | 349 | todo=${OUTPUT_DIR_BASENAME}/todo.lst |
353 | print_warn "[${BASENAME}] Try $redo" 3 | 350 | print_warn "[${BASENAME}] Try $redo" 3 |
354 | fi | 351 | fi |
355 | fi | 352 | fi |
356 | done | 353 | done |
357 | rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 | 354 | rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 |
358 | #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 355 | #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
359 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 356 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
360 | fi | 357 | fi |
361 | done | 358 | done |
362 | 359 | ||
363 | ## Check missing seg and log it | 360 | ## Check missing seg and log it |
364 | if [ $CHECK -eq 1 ] | 361 | if [ $CHECK -eq 1 ] |
365 | then | 362 | then |
366 | ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 363 | ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
367 | todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l) | 364 | todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l) |
368 | if [ $todo -eq 0 ]; then todo=1;fi | 365 | if [ $todo -eq 0 ]; then todo=1;fi |
369 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) | 366 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) |
370 | pourcentage=$((($notdone*100)/$todo)) | 367 | pourcentage=$((($notdone*100)/$todo)) |
371 | 368 | ||
372 | if [ $notdone -ne 0 ] | 369 | if [ $notdone -ne 0 ] |
373 | then | 370 | then |
374 | print_error "[${BASENAME}] ERROR : check $ERRORFILE" | 371 | print_error "[${BASENAME}] ERROR : check $ERRORFILE" |
375 | print_log_file "$ERRORFILE" "ERROR : Segs not done [" | 372 | print_log_file "$ERRORFILE" "ERROR : Segs not done [" |
376 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE | 373 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE |
377 | print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" | 374 | print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" |
378 | else | 375 | else |
379 | print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" | 376 | print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" |
380 | fi | 377 | fi |
381 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 378 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
382 | fi | 379 | fi |
383 | 380 | ||
384 | #---------------# | 381 | #---------------# |
385 | # Convert res # | 382 | # Convert res # |
386 | #---------------# | 383 | #---------------# |
387 | print_info "[${BASENAME}] Convert .res into .ctm" 1 | 384 | print_info "[${BASENAME}] Convert .res into .ctm" 1 |
388 | # .res => .ctm | 385 | # .res => .ctm |
389 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT | 386 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm |
390 | print_info "[${BASENAME}] Convert .res into .trs" 1 | 387 | print_info "[${BASENAME}] Convert .res into .trs" 1 |
391 | # .res => .trs | 388 | # .res => .trs |
392 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | 389 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
393 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT | 390 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
394 | rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null | 391 | rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null |
395 | print_info "[${BASENAME}] Convert .res into .txt" 1 | 392 | print_info "[${BASENAME}] Convert .res into .txt" 1 |
396 | # .res => .txt | 393 | # .res => .txt |
397 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT | 394 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt |
398 | 395 | ||
399 | print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1 | 396 | print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
400 | # unlock directory | 397 | # unlock directory |
401 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" | 398 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" |
402 | 399 |
main_tools/SecondPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : SecondPass.sh # | 4 | # File : SecondPass.sh # |
5 | # Brief : Speaker adaptation + ASR second pass # | 5 | # Brief : Speaker adaptation + ASR second pass # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and Hugo Mauchrétien works) # | 8 | # and Hugo Mauchrétien works) # |
9 | # Version : 1.1 # | 9 | # Version : 1.1 # |
10 | # Date : 18/06/13 # | 10 | # Date : 18/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | echo "### SecondPass.sh ###" | 13 | echo "### SecondPass.sh ###" |
14 | 14 | ||
15 | # Check OTMEDIA_HOME env var | 15 | # Check OTMEDIA_HOME env var |
16 | if [ -z ${OTMEDIA_HOME} ] | 16 | if [ -z ${OTMEDIA_HOME} ] |
17 | then | 17 | then |
18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
19 | export OTMEDIA_HOME=$OTMEDIA_HOME | 19 | export OTMEDIA_HOME=$OTMEDIA_HOME |
20 | fi | 20 | fi |
21 | 21 | ||
22 | # where is SecondPass.sh | 22 | # where is SecondPass.sh |
23 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 23 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
24 | 24 | ||
25 | # Scripts Path | 25 | # Scripts Path |
26 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 26 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
27 | 27 | ||
28 | # Include scripts | 28 | # Include scripts |
29 | . $SCRIPT_PATH"/Tools.sh" | 29 | . $SCRIPT_PATH"/Tools.sh" |
30 | . $SCRIPT_PATH"/CheckSecondPass.sh" | 30 | . $SCRIPT_PATH"/CheckSecondPass.sh" |
31 | 31 | ||
32 | # where is SecondPass.cfg | 32 | # where is SecondPass.cfg |
33 | SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" | 33 | SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" |
34 | if [ -e $SECONDPASS_CONFIG_FILE ] | 34 | if [ -e $SECONDPASS_CONFIG_FILE ] |
35 | then | 35 | then |
36 | . $SECONDPASS_CONFIG_FILE | 36 | . $SECONDPASS_CONFIG_FILE |
37 | else | 37 | else |
38 | echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" > /dev/stderr | 38 | echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" > /dev/stderr |
39 | echo "exit" > /dev/stderr | 39 | echo "exit" > /dev/stderr |
40 | exit 1 | 40 | exit 1 |
41 | fi | 41 | fi |
42 | 42 | ||
43 | #---------------# | 43 | #---------------# |
44 | # Parse Options # | 44 | # Parse Options # |
45 | #---------------# | 45 | #---------------# |
46 | while getopts ":hDv:crf:" opt | 46 | while getopts ":hDv:crf:" opt |
47 | do | 47 | do |
48 | case $opt in | 48 | case $opt in |
49 | h) | 49 | h) |
50 | echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n" | 50 | echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n" |
51 | echo -e "\t Options:" | 51 | echo -e "\t Options:" |
52 | echo -e "\t\t-h :\tprint this message" | 52 | echo -e "\t\t-h :\tprint this message" |
53 | echo -e "\t\t-D :\tDEBUG mode on" | 53 | echo -e "\t\t-D :\tDEBUG mode on" |
54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
55 | echo -e "\t\t-c :\t Check process, stop if error detected" | 55 | echo -e "\t\t-c :\t Check process, stop if error detected" |
56 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" | 56 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" |
57 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" | 57 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" |
58 | exit 1 | 58 | exit 1 |
59 | ;; | 59 | ;; |
60 | D) | 60 | D) |
61 | DEBUG=1 | 61 | DEBUG=1 |
62 | ;; | 62 | ;; |
63 | v) | 63 | v) |
64 | VERBOSE=$OPTARG | 64 | VERBOSE=$OPTARG |
65 | ;; | 65 | ;; |
66 | c) | 66 | c) |
67 | CHECK=1 | 67 | CHECK=1 |
68 | ;; | 68 | ;; |
69 | f) | 69 | f) |
70 | FORKS="--forks $OPTARG" | 70 | FORKS="--forks $OPTARG" |
71 | ;; | 71 | ;; |
72 | r) | 72 | r) |
73 | RERUN=1 | 73 | RERUN=1 |
74 | ;; | 74 | ;; |
75 | :) | 75 | :) |
76 | echo "Option -$OPTARG requires an argument." > /dev/stderr | 76 | echo "Option -$OPTARG requires an argument." > /dev/stderr |
77 | exit 1 | 77 | exit 1 |
78 | ;; | 78 | ;; |
79 | \?) | 79 | \?) |
80 | echo "BAD USAGE : unknow opton -$OPTARG" > /dev/stderr | 80 | echo "BAD USAGE : unknow opton -$OPTARG" > /dev/stderr |
81 | exit 1 | 81 | exit 1 |
82 | ;; | 82 | ;; |
83 | esac | 83 | esac |
84 | done | 84 | done |
85 | 85 | ||
86 | # mode debug enable | 86 | # mode debug enable |
87 | if [ $DEBUG -eq 1 ] | 87 | if [ $DEBUG -eq 1 ] |
88 | then | 88 | then |
89 | set -x | 89 | set -x |
90 | echo -e "## Mode DEBUG ON ##" | 90 | echo -e "## Mode DEBUG ON ##" |
91 | REDIRECTION_OUTPUT="" | ||
92 | else | ||
93 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
94 | fi | 91 | fi |
95 | 92 | ||
96 | # mode verbose enable | 93 | # mode verbose enable |
97 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
98 | 95 | ||
99 | # Check USAGE by arguments number | 96 | # Check USAGE by arguments number |
100 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 97 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
101 | then | 98 | then |
102 | echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" | 99 | echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" |
103 | echo "$0 -h for more info" | 100 | echo "$0 -h for more info" |
104 | exit 1 | 101 | exit 1 |
105 | fi | 102 | fi |
106 | 103 | ||
107 | shift $((OPTIND-1)) | 104 | shift $((OPTIND-1)) |
108 | # check FirstPass directory - First argument | 105 | # check FirstPass directory - First argument |
109 | if [ -e $1 ] && [ -d $1 ] | 106 | if [ -e $1 ] && [ -d $1 ] |
110 | then | 107 | then |
111 | FIRSTPASS_DIR=$(readlink -e $1) | 108 | FIRSTPASS_DIR=$(readlink -e $1) |
112 | else | 109 | else |
113 | print_error "can't find $1 directory" | 110 | print_error "can't find $1 directory" |
114 | exit 1 | 111 | exit 1 |
115 | fi | 112 | fi |
116 | 113 | ||
117 | print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1 | 114 | print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
118 | 115 | ||
119 | #-------------# | 116 | #-------------# |
120 | # GLOBAL VARS # | 117 | # GLOBAL VARS # |
121 | #-------------# | 118 | #-------------# |
122 | FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" | 119 | FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" |
123 | if [ -e $FIRSTPASS_CONFIG_FILE ] | 120 | if [ -e $FIRSTPASS_CONFIG_FILE ] |
124 | then | 121 | then |
125 | WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") | 122 | WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") |
126 | BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") | 123 | BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") |
127 | OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") | 124 | OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") |
128 | OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR | 125 | OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR |
129 | PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") | 126 | PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") |
130 | PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") | 127 | PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") |
131 | SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") | 128 | SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") |
132 | LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") | 129 | LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") |
133 | RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") | 130 | RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") |
134 | else | 131 | else |
135 | print_error "can't find $FIRSTPASS_CONFIG_FILE file" | 132 | print_error "can't find $FIRSTPASS_CONFIG_FILE file" |
136 | print_error "exit" | 133 | print_error "exit" |
137 | exit 1 | 134 | exit 1 |
138 | fi | 135 | fi |
139 | LST=$OUTPUT_DIR_BASENAME"/lists" | 136 | LST=$OUTPUT_DIR_BASENAME"/lists" |
140 | HMM=$OUTPUT_DIR_BASENAME"/hmm/" | 137 | HMM=$OUTPUT_DIR_BASENAME"/hmm/" |
141 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" | 138 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" |
142 | LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log" | 139 | LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log" |
143 | ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log" | 140 | ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log" |
144 | 141 | ||
145 | #------------------# | 142 | #------------------# |
146 | # Create WORKSPACE # | 143 | # Create WORKSPACE # |
147 | #------------------# | 144 | #------------------# |
148 | 145 | ||
149 | # Lock directory | 146 | # Lock directory |
150 | if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi | 147 | if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi |
151 | rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 | 148 | rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 |
152 | touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 | 149 | touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 |
153 | 150 | ||
154 | rm -r $LST > /dev/null 2>&1 | 151 | rm -r $LST > /dev/null 2>&1 |
155 | mkdir -p $LST | 152 | mkdir -p $LST |
156 | print_info "[${BASENAME}] Make directory $LST" 2 | 153 | print_info "[${BASENAME}] Make directory $LST" 2 |
157 | if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi | 154 | if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi |
158 | mkdir -p $HMM | 155 | mkdir -p $HMM |
159 | print_info "[${BASENAME}] Make directory $HMM" 2 | 156 | print_info "[${BASENAME}] Make directory $HMM" 2 |
160 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi | 157 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi |
161 | mkdir -p $RES_DIR > /dev/null 2>&1 | 158 | mkdir -p $RES_DIR > /dev/null 2>&1 |
162 | print_info "[${BASENAME}] Make directory $RES_DIR" 2 | 159 | print_info "[${BASENAME}] Make directory $RES_DIR" 2 |
163 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 | 160 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
164 | 161 | ||
165 | #-------------------# | 162 | #-------------------# |
166 | # Check Pass # | 163 | # Check Pass # |
167 | #-------------------# | 164 | #-------------------# |
168 | print_info "[${BASENAME}] Check Pass 2 directory" 1 | 165 | print_info "[${BASENAME}] Check Pass 2 directory" 1 |
169 | for treil in $(ls $RES_DIR/ | grep treil) | 166 | for treil in $(ls $RES_DIR/ | grep treil) |
170 | do | 167 | do |
171 | if [ ! -s $RES_DIR/$treil ] | 168 | if [ ! -s $RES_DIR/$treil ] |
172 | then | 169 | then |
173 | bn = $(basename $treil ".treil") | 170 | bn = $(basename $treil ".treil") |
174 | rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null | 171 | rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null |
175 | print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2 | 172 | print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2 |
176 | fi | 173 | fi |
177 | done | 174 | done |
178 | 175 | ||
179 | # Check if more then 89% of treil are done | 176 | # Check if more then 89% of treil are done |
180 | nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l) | 177 | nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l) |
181 | nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l) | 178 | nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l) |
182 | if [ $nbres_p1 -gt 0 ] | 179 | if [ $nbres_p1 -gt 0 ] |
183 | then | 180 | then |
184 | pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) | 181 | pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) |
185 | if [ $pourcentage -gt 89 ] | 182 | if [ $pourcentage -gt 89 ] |
186 | then | 183 | then |
187 | print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1 | 184 | print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1 |
188 | exit 0 | 185 | exit 0 |
189 | fi | 186 | fi |
190 | else | 187 | else |
191 | print_error "[${BASENAME}] No First Pass, No .res -> exit P2" | 188 | print_error "[${BASENAME}] No First Pass, No .res -> exit P2" |
192 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi | 189 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi |
193 | exit 1 | 190 | exit 1 |
194 | fi | 191 | fi |
195 | 192 | ||
196 | #--------------------# | 193 | #--------------------# |
197 | # Save configuration # | 194 | # Save configuration # |
198 | #--------------------# | 195 | #--------------------# |
199 | cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg | 196 | cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg |
200 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 197 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
201 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 198 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
202 | echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 199 | echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
203 | echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 200 | echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
204 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 201 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
205 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 202 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
206 | echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 203 | echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
207 | echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 204 | echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
208 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg | 205 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg |
209 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 | 206 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 |
210 | 207 | ||
211 | 208 | ||
212 | #--------------------------------------------------# | 209 | #--------------------------------------------------# |
213 | # Speaker Adaptation (AM) + Second pass (DECODING) # | 210 | # Speaker Adaptation (AM) + Second pass (DECODING) # |
214 | #--------------------------------------------------# | 211 | #--------------------------------------------------# |
215 | print_info "[${BASENAME}] Launch Second Pass" 1 | 212 | print_info "[${BASENAME}] Launch Second Pass" 1 |
216 | 213 | ||
217 | # for all speaker | 214 | # for all speaker |
218 | for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) | 215 | for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) |
219 | do | 216 | do |
220 | ## get seg file from P1 containing the speaker | 217 | ## get seg file from P1 containing the speaker |
221 | find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst | 218 | find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst |
222 | print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3 | 219 | print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3 |
223 | if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi | 220 | if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi |
224 | 221 | ||
225 | 222 | ||
226 | # for all AM | 223 | # for all AM |
227 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 224 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
228 | do | 225 | do |
229 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then | 226 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then |
230 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") | 227 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") |
231 | ## if is the good AM for the speaker | 228 | ## if is the good AM for the speaker |
232 | if [ -n "$type" ] | 229 | if [ -n "$type" ] |
233 | then | 230 | then |
234 | ## HMM adaptation | 231 | ## HMM adaptation |
235 | if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) | 232 | if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) |
236 | then | 233 | then |
237 | print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 | 234 | print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 |
238 | $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ $REDIRECTION_OUTPUT | 235 | $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ |
239 | fi | 236 | fi |
240 | 237 | ||
241 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) | 238 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) |
242 | then | 239 | then |
243 | print_warn "[${BASENAME}] No hmm files created for $speaker" 2 | 240 | print_warn "[${BASENAME}] No hmm files created for $speaker" 2 |
244 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker" | 241 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker" |
245 | print_error "[${BASENAME}] Check $ERRORFILE" | 242 | print_error "[${BASENAME}] Check $ERRORFILE" |
246 | #exit 1 | 243 | #exit 1 |
247 | fi | 244 | fi |
248 | 245 | ||
249 | ## cp map files | 246 | ## cp map files |
250 | cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map | 247 | cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map |
251 | 248 | ||
252 | ## class clustering | 249 | ## class clustering |
253 | if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) | 250 | if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) |
254 | then | 251 | then |
255 | print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 | 252 | print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 |
256 | $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls $REDIRECTION_OUTPUT | 253 | $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls |
257 | fi | 254 | fi |
258 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) | 255 | if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) |
259 | then | 256 | then |
260 | print_warn "[${BASENAME}] No cls file created for $speaker" 2 | 257 | print_warn "[${BASENAME}] No cls file created for $speaker" 2 |
261 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers" | 258 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers" |
262 | print_error "[${BASENAME}] Check $ERRORFILE" | 259 | print_error "[${BASENAME}] Check $ERRORFILE" |
263 | #exit 1 | 260 | #exit 1 |
264 | fi | 261 | fi |
265 | 262 | ||
266 | ## Speeral decoding | 263 | ## Speeral decoding |
267 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] | 264 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] |
268 | then | 265 | then |
269 | print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 | 266 | print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 |
270 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT | 267 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock |
271 | else | 268 | else |
272 | print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 | 269 | print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 |
273 | print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 | 270 | print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 |
274 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT | 271 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock |
275 | fi | 272 | fi |
276 | 273 | ||
277 | if [ $CHECK -eq 1 ] | 274 | if [ $CHECK -eq 1 ] |
278 | then | 275 | then |
279 | check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" | 276 | check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" |
280 | if [ $? -eq 1 ] | 277 | if [ $? -eq 1 ] |
281 | then | 278 | then |
282 | print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 | 279 | print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 |
283 | print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst" | 280 | print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst" |
284 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 281 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
285 | print_log_file $LOGFILE "Segs (and treil) not done :\n[" | 282 | print_log_file $LOGFILE "Segs (and treil) not done :\n[" |
286 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $LOGFILE | 283 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $LOGFILE |
287 | print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" | 284 | print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" |
288 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 285 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
289 | #exit 1 | 286 | #exit 1 |
290 | fi | 287 | fi |
291 | fi | 288 | fi |
292 | break | 289 | break |
293 | fi | 290 | fi |
294 | fi | 291 | fi |
295 | done | 292 | done |
296 | #rm "$HMM/$speaker.*" > /dev/null 2>&1 | 293 | #rm "$HMM/$speaker.*" > /dev/null 2>&1 |
297 | #rm "$LST/$speaker.lst" > /dev/null 2>&1 | 294 | #rm "$LST/$speaker.lst" > /dev/null 2>&1 |
298 | done | 295 | done |
299 | 296 | ||
300 | ## Check missing seg and log it | 297 | ## Check missing seg and log it |
301 | if [ $CHECK -eq 1 ] | 298 | if [ $CHECK -eq 1 ] |
302 | then | 299 | then |
303 | ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 300 | ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
304 | todo=$(cat ${PLP_FILE_P1} | wc -l) | 301 | todo=$(cat ${PLP_FILE_P1} | wc -l) |
305 | if [ $todo -eq 0 ]; then todo=1;fi | 302 | if [ $todo -eq 0 ]; then todo=1;fi |
306 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) | 303 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) |
307 | pourcentage=$((($notdone*100)/$todo)) | 304 | pourcentage=$((($notdone*100)/$todo)) |
308 | if [ $notdone -ne 0 ] | 305 | if [ $notdone -ne 0 ] |
309 | then | 306 | then |
310 | print_error "[${BASENAME}] ERROR : check $ERRORFILE" | 307 | print_error "[${BASENAME}] ERROR : check $ERRORFILE" |
311 | print_log_file "$ERRORFILE" "ERROR : Treil not done [" | 308 | print_log_file "$ERRORFILE" "ERROR : Treil not done [" |
312 | diff ${PLP_FILE_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE | 309 | diff ${PLP_FILE_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE |
313 | print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" | 310 | print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME" |
314 | else | 311 | else |
315 | print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" | 312 | print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" |
316 | fi | 313 | fi |
317 | rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 | 314 | rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 |
318 | fi | 315 | fi |
319 | 316 | ||
320 | #---------------# | 317 | #---------------# |
321 | # Convert res # | 318 | # Convert res # |
322 | #---------------# | 319 | #---------------# |
323 | 320 | ||
324 | print_info "[${BASENAME}] Convert .res into .ctm" 1 | 321 | print_info "[${BASENAME}] Convert .res into .ctm" 1 |
325 | # .res => .ctm | 322 | # .res => .ctm |
326 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm $REDIRECTION_OUTPUT | 323 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm |
327 | print_info "[${BASENAME}] Convert .res into .trs" 1 | 324 | print_info "[${BASENAME}] Convert .res into .trs" 1 |
328 | # .res => .trs | 325 | # .res => .trs |
329 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 326 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
330 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT | 327 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
331 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 | 328 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 |
332 | print_info "[${BASENAME}] Convert .res into .txt" 1 | 329 | print_info "[${BASENAME}] Convert .res into .txt" 1 |
333 | # .res => .txt | 330 | # .res => .txt |
334 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt $REDIRECTION_OUTPUT | 331 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt |
335 | 332 | ||
336 | print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1 | 333 | print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
337 | # unlock directory | 334 | # unlock directory |
338 | mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" | 335 | mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" |
339 | 336 | ||
340 | 337 |
main_tools/ThirdPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : ThirdPass.sh # | 4 | # File : ThirdPass.sh # |
5 | # Brief : ASR third pass using trigg files # | 5 | # Brief : ASR third pass using trigg files # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # Version : 1.0 # | 7 | # Version : 1.0 # |
8 | # Date : 18/07/13 # | 8 | # Date : 18/07/13 # |
9 | ##################################################### | 9 | ##################################################### |
10 | 10 | ||
11 | echo "### ThirdPass.sh ###" | 11 | echo "### ThirdPass.sh ###" |
12 | 12 | ||
13 | # Check OTMEDIA_HOME env var | 13 | # Check OTMEDIA_HOME env var |
14 | if [ -z ${OTMEDIA_HOME} ] | 14 | if [ -z ${OTMEDIA_HOME} ] |
15 | then | 15 | then |
16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 16 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
17 | export OTMEDIA_HOME=$OTMEDIA_HOME | 17 | export OTMEDIA_HOME=$OTMEDIA_HOME |
18 | fi | 18 | fi |
19 | 19 | ||
20 | # where is SecondPass.sh | 20 | # where is SecondPass.sh |
21 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 21 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
22 | 22 | ||
23 | # Scripts Path | 23 | # Scripts Path |
24 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 24 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
25 | 25 | ||
26 | # Include scripts | 26 | # Include scripts |
27 | . $SCRIPT_PATH"/Tools.sh" | 27 | . $SCRIPT_PATH"/Tools.sh" |
28 | . $SCRIPT_PATH"/CheckThirdPass.sh" | 28 | . $SCRIPT_PATH"/CheckThirdPass.sh" |
29 | 29 | ||
30 | # where is ThirdPass.cfg | 30 | # where is ThirdPass.cfg |
31 | THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg" | 31 | THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg" |
32 | if [ -e $THIRDPASS_CONFIG_FILE ] | 32 | if [ -e $THIRDPASS_CONFIG_FILE ] |
33 | then | 33 | then |
34 | . $THIRDPASS_CONFIG_FILE | 34 | . $THIRDPASS_CONFIG_FILE |
35 | else | 35 | else |
36 | echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2 | 36 | echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2 |
37 | exit 1 | 37 | exit 1 |
38 | fi | 38 | fi |
39 | 39 | ||
40 | #---------------# | 40 | #---------------# |
41 | # Parse Options # | 41 | # Parse Options # |
42 | #---------------# | 42 | #---------------# |
43 | while getopts ":hDv:crf:" opt | 43 | while getopts ":hDv:crf:" opt |
44 | do | 44 | do |
45 | case $opt in | 45 | case $opt in |
46 | h) | 46 | h) |
47 | echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n" | 47 | echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n" |
48 | echo -e "\t Options:" | 48 | echo -e "\t Options:" |
49 | echo -e "\t\t-h :\tprint this message" | 49 | echo -e "\t\t-h :\tprint this message" |
50 | echo -e "\t\t-D :\tDEBUG mode on" | 50 | echo -e "\t\t-D :\tDEBUG mode on" |
51 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 51 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
52 | echo -e "\t\t-c :\t Check process, stop if error detected" | 52 | echo -e "\t\t-c :\t Check process, stop if error detected" |
53 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" | 53 | echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" |
54 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" | 54 | echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" |
55 | exit 1 | 55 | exit 1 |
56 | ;; | 56 | ;; |
57 | D) | 57 | D) |
58 | DEBUG=1 | 58 | DEBUG=1 |
59 | ;; | 59 | ;; |
60 | v) | 60 | v) |
61 | VERBOSE=$OPTARG | 61 | VERBOSE=$OPTARG |
62 | ;; | 62 | ;; |
63 | c) | 63 | c) |
64 | CHECK=1 | 64 | CHECK=1 |
65 | ;; | 65 | ;; |
66 | f) | 66 | f) |
67 | FORKS="--forks $OPTARG" | 67 | FORKS="--forks $OPTARG" |
68 | ;; | 68 | ;; |
69 | r) | 69 | r) |
70 | RERUN=1 | 70 | RERUN=1 |
71 | ;; | 71 | ;; |
72 | :) | 72 | :) |
73 | echo "Option -$OPTARG requires an argument." >&2 | 73 | echo "Option -$OPTARG requires an argument." >&2 |
74 | exit 1 | 74 | exit 1 |
75 | ;; | 75 | ;; |
76 | \?) | 76 | \?) |
77 | echo "BAD USAGE : unknow opton -$OPTARG" | 77 | echo "BAD USAGE : unknow opton -$OPTARG" |
78 | #exit 1 | 78 | #exit 1 |
79 | ;; | 79 | ;; |
80 | esac | 80 | esac |
81 | done | 81 | done |
82 | 82 | ||
83 | # mode debug enable | 83 | # mode debug enable |
84 | if [ $DEBUG -eq 1 ] | 84 | if [ $DEBUG -eq 1 ] |
85 | then | 85 | then |
86 | set -x | 86 | set -x |
87 | echo -e "## Mode DEBUG ON ##" | 87 | echo -e "## Mode DEBUG ON ##" |
88 | REDIRECTION_OUTPUT="" | ||
89 | else | ||
90 | REDIRECTION_OUTPUT=" 2> /dev/null" | ||
91 | fi | 88 | fi |
92 | 89 | ||
93 | # mode verbose enable | 90 | # mode verbose enable |
94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi | 91 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi |
95 | 92 | ||
96 | # Check USAGE by arguments number | 93 | # Check USAGE by arguments number |
97 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 94 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
98 | then | 95 | then |
99 | echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>" | 96 | echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>" |
100 | echo "$0 -h for more info" | 97 | echo "$0 -h for more info" |
101 | exit 1 | 98 | exit 1 |
102 | fi | 99 | fi |
103 | 100 | ||
104 | shift $((OPTIND-1)) | 101 | shift $((OPTIND-1)) |
105 | # check Pass directory - First argument | 102 | # check Pass directory - First argument |
106 | if [ -e $1 ] && [ -d $1 ] | 103 | if [ -e $1 ] && [ -d $1 ] |
107 | then | 104 | then |
108 | PASS_DIR=$(readlink -e $1) | 105 | PASS_DIR=$(readlink -e $1) |
109 | else | 106 | else |
110 | print_error "can't find $1 directory" | 107 | print_error "can't find $1 directory" |
111 | exit 1 | 108 | exit 1 |
112 | fi | 109 | fi |
113 | 110 | ||
114 | #-------------# | 111 | #-------------# |
115 | # GLOBAL VARS # | 112 | # GLOBAL VARS # |
116 | #-------------# | 113 | #-------------# |
117 | EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg" | 114 | EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg" |
118 | if [ -e $EXPLOITCONFPASS_CONFIG_FILE ] | 115 | if [ -e $EXPLOITCONFPASS_CONFIG_FILE ] |
119 | then | 116 | then |
120 | TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=") | 117 | TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=") |
121 | LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=") | 118 | LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=") |
122 | LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=") | 119 | LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=") |
123 | LST="" | 120 | LST="" |
124 | PLP_DIR_P1="" | 121 | PLP_DIR_P1="" |
125 | HMM="" | 122 | HMM="" |
126 | else | 123 | else |
127 | print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file" | 124 | print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file" |
128 | #exit 1 | 125 | #exit 1 |
129 | TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/ | 126 | TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/ |
130 | LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext | 127 | LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext |
131 | LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin | 128 | LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin |
132 | fi | 129 | fi |
133 | SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg" | 130 | SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg" |
134 | if [ -e $SECONDPASS_CONFIG_FILE ] | 131 | if [ -e $SECONDPASS_CONFIG_FILE ] |
135 | then | 132 | then |
136 | LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=") | 133 | LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=") |
137 | HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=") | 134 | HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=") |
138 | PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=") | 135 | PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=") |
139 | else | 136 | else |
140 | print_error "can't find $SECONDPASS_CONFIG_FILE file" | 137 | print_error "can't find $SECONDPASS_CONFIG_FILE file" |
141 | #exit 1 | 138 | #exit 1 |
142 | LST=$PASS_DIR/lists | 139 | LST=$PASS_DIR/lists |
143 | HMM=$PASS_DIR/hmm | 140 | HMM=$PASS_DIR/hmm |
144 | PLP_DIR_P1=$PASS_DIR/PLP | 141 | PLP_DIR_P1=$PASS_DIR/PLP |
145 | fi | 142 | fi |
146 | 143 | ||
147 | BASENAME=$(basename $PASS_DIR) | 144 | BASENAME=$(basename $PASS_DIR) |
148 | OUTPUT_DIR_BASENAME=$PASS_DIR | 145 | OUTPUT_DIR_BASENAME=$PASS_DIR |
149 | RES_DIR="$PASS_DIR/res_p3" | 146 | RES_DIR="$PASS_DIR/res_p3" |
150 | LOGFILE=$(dirname $PASS_DIR)"/info_p3.log" | 147 | LOGFILE=$(dirname $PASS_DIR)"/info_p3.log" |
151 | ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log" | 148 | ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log" |
152 | 149 | ||
153 | #------------------# | 150 | #------------------# |
154 | # Create WORKSPACE # | 151 | # Create WORKSPACE # |
155 | #------------------# | 152 | #------------------# |
156 | 153 | ||
157 | # Lock directory | 154 | # Lock directory |
158 | if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi | 155 | if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi |
159 | rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1 | 156 | rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1 |
160 | touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1 | 157 | touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1 |
161 | 158 | ||
162 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi | 159 | if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi |
163 | mkdir -p $RES_DIR | 160 | mkdir -p $RES_DIR |
164 | print_info "Make directory $RES_DIR" 1 | 161 | print_info "Make directory $RES_DIR" 1 |
165 | 162 | ||
166 | #--------------------# | 163 | #--------------------# |
167 | # Save configuration # | 164 | # Save configuration # |
168 | #--------------------# | 165 | #--------------------# |
169 | cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg | 166 | cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg |
170 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg | 167 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg |
171 | print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 | 168 | print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 |
172 | 169 | ||
173 | #--------------------------------------------------# | 170 | #--------------------------------------------------# |
174 | # Third Pass using trigger file (DECODING) # | 171 | # Third Pass using trigger file (DECODING) # |
175 | #--------------------------------------------------# | 172 | #--------------------------------------------------# |
176 | print_info "Launch Third Pass" 1 | 173 | print_info "Launch Third Pass" 1 |
177 | 174 | ||
178 | ## Generate speeral config file adding trigger rep | 175 | ## Generate speeral config file adding trigger rep |
179 | cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \ | 176 | cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \ |
180 | | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \ | 177 | | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \ |
181 | | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml | 178 | | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml |
182 | SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml | 179 | SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml |
183 | 180 | ||
184 | # for all speaker | 181 | # for all speaker |
185 | for lspeaker in $(ls $LST/*.lst) | 182 | for lspeaker in $(ls $LST/*.lst) |
186 | do | 183 | do |
187 | speaker=$(basename $lspeaker ".lst") | 184 | speaker=$(basename $lspeaker ".lst") |
188 | # for all AM | 185 | # for all AM |
189 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 186 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
190 | do | 187 | do |
191 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then | 188 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then |
192 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") | 189 | type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") |
193 | ## if is the good AM for the speaker | 190 | ## if is the good AM for the speaker |
194 | if [ -n "$type" ] | 191 | if [ -n "$type" ] |
195 | then | 192 | then |
196 | ## Speeral decoding | 193 | ## Speeral decoding |
197 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] | 194 | if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] |
198 | then | 195 | then |
199 | print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3 | 196 | print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 |
200 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT | 197 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock |
201 | else | 198 | else |
202 | print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 | 199 | print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 |
203 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT | 200 | $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock |
204 | fi | 201 | fi |
205 | 202 | ||
206 | if [ $CHECK -eq 1 ] | 203 | if [ $CHECK -eq 1 ] |
207 | then | 204 | then |
208 | check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" | 205 | check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" |
209 | if [ $? -eq 1 ] | 206 | if [ $? -eq 1 ] |
210 | then | 207 | then |
211 | echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE | 208 | echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE |
212 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 209 | ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
213 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE | 210 | diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE |
214 | echo -e "] " >> $ERRORFILE | 211 | echo -e "] " >> $ERRORFILE |
215 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 212 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
216 | #exit 1 | 213 | #exit 1 |
217 | fi | 214 | fi |
218 | fi | 215 | fi |
219 | break | 216 | break |
220 | fi | 217 | fi |
221 | fi | 218 | fi |
222 | done | 219 | done |
223 | done | 220 | done |
224 | 221 | ||
225 | ## Check missing seg and log it | 222 | ## Check missing seg and log it |
226 | ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 223 | ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
227 | echo -e "$BASENAME P3 END\n[" >> $LOGFILE | 224 | echo -e "$BASENAME P3 END\n[" >> $LOGFILE |
228 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE | 225 | diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE |
229 | echo -e "] $BASENAME" >> $LOGFILE | 226 | echo -e "] $BASENAME" >> $LOGFILE |
230 | rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 | 227 | rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1 |
231 | 228 | ||
232 | #---------------# | 229 | #---------------# |
233 | # Convert res # | 230 | # Convert res # |
234 | #---------------# | 231 | #---------------# |
235 | 232 | ||
236 | # .res => .ctm | 233 | # .res => .ctm |
237 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm $REDIRECTION_OUTPUT | 234 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm |
238 | # .res => .trs | 235 | # .res => .trs |
239 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg | 236 | echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
240 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT | 237 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg |
241 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 | 238 | rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1 |
242 | # .res => .txt | 239 | # .res => .txt |
243 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt $REDIRECTION_OUTPUT | 240 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt |
244 | 241 | ||
245 | 242 | ||
246 | print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1 | 243 | print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1 |
247 | 244 | ||
248 | # unlock directory | 245 | # unlock directory |
249 | mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" | 246 | mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" |
250 | 247 | ||
251 | 248 |