Commit 1fd315c89e313c446c6f6ecf3a4ea98ac859ac88

Authored by Jean-François Rey
1 parent 0bf609bcce
Exists in master

add Extract audio and check results scripts

Showing 8 changed files with 247 additions and 43 deletions Inline Diff

main_tools/CheckResults.sh
File was created 1 #!/bin/bash
2
3 #################################
4 # File : CheckResults.sh #
5 # Brief : Check the results #
6 # Autohr : Jean-François Rey #
7 # Date : 30/07/2013 #
8 # Version : 1.0 #
9 #################################
10
11
12 # CheckDir( dir )
13 # dir : a directory path
14 # Brief : Check $dir results
15 function CheckDir()
16 {
17 if [ $# -eq 1 ]
18 then
19 plp=$(ls $1/PLP/*.plp 2> /dev/null | wc -l)
20 res_p1=$(ls $1/res_p1/*.res 2> /dev/null | wc -l)
21 res_p2=$(ls $1/res_p2/*.treil 2> /dev/null | wc -l)
22 res_p3=$(ls $1/res_p3/*.treil 2> /dev/null | wc -l)
23 usf_p2=$1/$(basename $1).res_p2.usf
24 usf_p3=$1/$(basename $1).res_p3.usf
25
26 if [ -e $usf_p2 ]
27 then
28 casix=`grep -c -E 'confidence="0.600"' ${usf_p2}`
29 call=`grep -c -E 'confidence=' ${usf_p2}`
30 if [ $call -eq 0 ]
31 then
32 pourcentage_p2=100
33 else
34 pourcentage_p2=$((($casix*100)/$call))
35 fi
36 if [ $pourcentage_p2 -gt 49 ]
37 then
38 usf2="ERR"
39 else
40 usf2="OK"
41 fi
42 else
43 usf2="NAN"
44 fi
45 if [ -e $usf_p3 ]
46 then
47 casix=`grep -c -E 'confidence="0.600"' ${usf_p3}`
48 call=`grep -c -E 'confidence=' ${usf_p3}`
49 if [ $call -eq 0 ]
50 then
51 pourcentage_p3=100
52 else
53 pourcentage_p3=$((($casix*100)/$call))
54 fi
55 if [ $pourcentage_p3 -gt 49 ]
56 then
57 usf3="ERR"
58 else
59 usf3="OK"
60 fi
61 else
62 usf3="NAN"
63 fi
64 fi
65 echo -e "$(basename $1)\t$plp\t$res_p1\t$res_p2\t\t$res_p3\t\t$usf2\t$usf3"
66
67 }
68
69
70 # Check OTMEDIA_HOME env var
71 if [ -z ${OTMEDIA_HOME} ]
72 then
73 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
74 export OTMEDIA_HOME=$OTMEDIA_HOME
75 fi
76
77 # where is CheckResults.sh
78 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
79
80
81 RECURSIVE=0
82
83 #---------------#
84 # Parse Options #
85 #---------------#
86 while getopts ":hr" opt
87 do
88 case $opt in
89 h)
90 echo -e "$0 [OPTIONS] <DIRECTORY>\n"
91 echo -e "\t Options:"
92 echo -e "\t\t-h :\tprint this message"
93 echo -e "\t\t-r :\trecursive mode"
94 exit 1
95 ;;
96 r)
97 RECURSIVE=1
98 ;;
99 :)
100 echo "Option -$OPTARG requires an argument." >&2
101 exit 1
102 ;;
103 \?)
104 echo "BAD USAGE : unknow opton -$OPTARG"
105 exit 1
106 ;;
107 esac
108 done
109
110 # Check USAGE by arguments number
111 if [ $(($#-($OPTIND-1))) -ne 1 ]
112 then
113 echo "BAD USAGE : FirstPass.sh [OPTIONS] <DIRECTORY>"
114 echo "$0 -h for more info"
115 exit 1
116 fi
117
118 shift $((OPTIND-1))
119 # check Directory - First argument
120 if [ -e $1 ] && [ -s $1 ]
121 then
122 DIR=$(readlink -e $1)
123 else
124 echo "ERROR : can't open directory $1"
125 exit 1
126 fi
127
128
129 # Check directory results
130 echo -e "Directory name\t\t#plp\t#res_p1\t#treil_p2\t#treil_p3\tusf_p2\tusf_p3"
131 if [ $RECURSIVE -eq 0 ]
132 then
133 CheckDir "$DIR"
134 else
135 for d in `ls $DIR`
136 do
137 if [ -d "$DIR/$d" ]; then CheckDir "$DIR/$d"; fi
138 done
139 fi
140
141
142
main_tools/ConfPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ConfPass.sh # 4 # File : ConfPass.sh #
5 # Brief : Process the ASR Confidence pass # 5 # Brief : Process the ASR Confidence pass #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and hugo Mauchrétien works) # 8 # and hugo Mauchrétien works) #
9 # Version : 1.0 # 9 # Version : 1.0 #
10 # Date : 17/06/13 # 10 # Date : 17/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 #Check OTMEDIA_HOME env var 13 #Check OTMEDIA_HOME env var
14 if [ -z ${OTMEDIA_HOME} ] 14 if [ -z ${OTMEDIA_HOME} ]
15 then 15 then
16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
17 export OTMEDIA_HOME=$OTMEDIA_HOME 17 export OTMEDIA_HOME=$OTMEDIA_HOME
18 fi 18 fi
19 19
20 20
21 # where is ConfPath.sh 21 # where is ConfPath.sh
22 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 22 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
23 23
24 # Scripts Path 24 # Scripts Path
25 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 25 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
26 26
27 # Include scripts 27 # Include scripts
28 . $SCRIPT_PATH"/Tools.sh" 28 . $SCRIPT_PATH"/Tools.sh"
29 . $SCRIPT_PATH"/CheckConfPass.sh" 29 . $SCRIPT_PATH"/CheckConfPass.sh"
30 30
31 # where is FirstPass.cfg 31 # where is FirstPass.cfg
32 CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" 32 CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg"
33 if [ -e $CONFPASS_CONFIG_FILE ] 33 if [ -e $CONFPASS_CONFIG_FILE ]
34 then 34 then
35 . $CONFPASS_CONFIG_FILE 35 . $CONFPASS_CONFIG_FILE
36 else 36 else
37 echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2 37 echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2
38 exit 1 38 exit 1
39 fi 39 fi
40 40
41 #---------------# 41 #---------------#
42 # Parse Options # 42 # Parse Options #
43 #---------------# 43 #---------------#
44 while getopts ":hDv:cr" opt 44 while getopts ":hDv:cr" opt
45 do 45 do
46 case $opt in 46 case $opt in
47 h) 47 h)
48 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" 48 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n"
49 echo -e "\t Options:" 49 echo -e "\t Options:"
50 echo -e "\t\t-h :\tprint this message" 50 echo -e "\t\t-h :\tprint this message"
51 echo -e "\t\t-D :\tDEBUG mode on" 51 echo -e "\t\t-D :\tDEBUG mode on"
52 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 52 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
53 echo -e "\t\t-c :\t Check process, stop if error detected" 53 echo -e "\t\t-c :\t Check process, stop if error detected"
54 echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" 54 echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files"
55 exit 1 55 exit 1
56 ;; 56 ;;
57 D) 57 D)
58 DEBUG=1 58 DEBUG=1
59 ;; 59 ;;
60 v) 60 v)
61 VERBOSE=$OPTARG 61 VERBOSE=$OPTARG
62 ;; 62 ;;
63 c) 63 c)
64 CHECK=1 64 CHECK=1
65 ;; 65 ;;
66 r) 66 r)
67 RERUN=1 67 RERUN=1
68 ;; 68 ;;
69 :) 69 :)
70 echo "Option -$OPTARG requires an argument." >&2 70 echo "Option -$OPTARG requires an argument." >&2
71 exit 1 71 exit 1
72 ;; 72 ;;
73 \?) 73 \?)
74 echo "BAD USAGE : unknow opton -$OPTARG" 74 echo "BAD USAGE : unknow opton -$OPTARG"
75 #exit 1 75 #exit 1
76 ;; 76 ;;
77 esac 77 esac
78 done 78 done
79 79
80 # mode debug enable 80 # mode debug enable
81 if [ $DEBUG -eq 1 ] 81 if [ $DEBUG -eq 1 ]
82 then 82 then
83 set -x 83 set -x
84 echo -e "## Mode DEBUG ON ##" 84 echo -e "## Mode DEBUG ON ##"
85 REDIRECTION_OUTPUT=""
86 else
87 REDIRECTION_OUTPUT=" 2> /dev/null"
85 fi 88 fi
86 89
87 # mode verbose enable 90 # mode verbose enable
88 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi 91 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
89 92
90 # Check USAGE by arguments number 93 # Check USAGE by arguments number
91 if [ $(($#-($OPTIND-1))) -ne 2 ] 94 if [ $(($#-($OPTIND-1))) -ne 2 ]
92 then 95 then
93 echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" 96 echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>"
94 echo "$0 -h for more info" 97 echo "$0 -h for more info"
95 exit 1 98 exit 1
96 fi 99 fi
97 100
98 shift $((OPTIND-1)) 101 shift $((OPTIND-1))
99 # check input directory - first argument 102 # check input directory - first argument
100 if [ ! -e $1 ] 103 if [ ! -e $1 ]
101 then 104 then
102 print_error "can't open $1" 105 print_error "can't open $1"
103 exit 1 106 exit 1
104 fi 107 fi
105 # check treil input directory - second argument 108 # check treil input directory - second argument
106 if [ ! -e $1/$2 ] 109 if [ ! -e $1/$2 ]
107 then 110 then
108 print_error "can't open $1/$2" 111 print_error "can't open $1/$2"
109 exit 1 112 exit 1
110 fi 113 fi
111 114
112 #-------------# 115 #-------------#
113 # GLOBAL VARS # 116 # GLOBAL VARS #
114 #-------------# 117 #-------------#
115 INPUT_DIR=$(readlink -e $1) 118 INPUT_DIR=$(readlink -e $1)
116 OUTPUT_DIR=$INPUT_DIR 119 OUTPUT_DIR=$INPUT_DIR
117 BASENAME=$(basename $OUTPUT_DIR) 120 BASENAME=$(basename $OUTPUT_DIR)
118 RES_NAME=$2 121 RES_NAME=$2
119 RES_P="${INPUT_DIR}/${RES_NAME}" 122 RES_P="${INPUT_DIR}/${RES_NAME}"
120 USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf 123 USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf
121 CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" 124 CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME"
122 RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" 125 RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm"
123 LOGFILE="$(dirname $OUTPUT_DIR)/info_conf.log" 126 LOGFILE="$(dirname $OUTPUT_DIR)/info_conf.log"
124 ERRORFILE="$(dirname $OUTPUT_DIR)/error_conf.log" 127 ERRORFILE="$(dirname $OUTPUT_DIR)/error_conf.log"
125 128
126 129
127 #------------------# 130 #------------------#
128 # Create Workspace # 131 # Create Workspace #
129 #------------------# 132 #------------------#
130 # Lock directory 133 # Lock directory
131 if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] 134 if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ]
132 then 135 then
133 print_info "Confpass lock $INPUT_DIR -> exit" 1 136 print_info "Confpass lock $INPUT_DIR -> exit" 1
134 exit 1 137 exit 1
135 fi 138 fi
136 rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 139 rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1
137 touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 140 touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1
138 if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi 141 if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi
139 if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi 142 if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi
140 mkdir -p $CONF_DIR 143 mkdir -p $CONF_DIR > /dev/null 2>&1
141 mkdir -p $RES_CONF_DIR 144 mkdir -p $RES_CONF_DIR > /dev/null 2>&1
142 145
143 #---------------# 146 #---------------#
144 # Check Pass # 147 # Check Pass #
145 #---------------# 148 #---------------#
146 149
147 # if usf contains more than 40% of 0.600 confidence -> usf error 150 # if usf contains more than 40% of 0.600 confidence -> usf error
148 if [ -s $USF_FILE ] 151 if [ -s $USF_FILE ]
149 then 152 then
150 conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") 153 conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}")
151 conftoother=$(grep -c -v -E 'confidence="0.600"' "${USF_FILE}") 154 conftoother=$(grep -c -v -E 'confidence="0.600"' "${USF_FILE}")
152 if [ $conftoother -gt 0 ] 155 if [ $conftoother -gt 0 ]
153 then 156 then
154 pourcentageofzerosix=$((($conftozerosix*100)/$conftoother)) 157 pourcentageofzerosix=$((($conftozerosix*100)/$conftoother))
155 if [ $pourcentageofzerosix -gt 40 ] 158 if [ $pourcentageofzerosix -gt 40 ]
156 then 159 then
157 print_warn "${USF_FILE} got $pourcentageofzerosix% of 0.6 confidence" 1 160 print_warn "${USF_FILE} got $pourcentageofzerosix% of 0.6 confidence" 1
158 mv "${USF_FILE}" "${USF_FILE}.back" 161 mv "${USF_FILE}" "${USF_FILE}.back"
159 rm -r $CONF_DIR > /dev/null 2>&1 162 rm -r $CONF_DIR > /dev/null 2>&1
160 else 163 else
161 print_warn "${USF_FILE} already done, skipping it" 1 164 print_warn "${USF_FILE} already done, skipping it" 1
162 exit 0 165 exit 0
163 fi 166 fi
164 fi 167 fi
165 fi 168 fi
166 169
167 #------# 170 #------#
168 # Save # 171 # Save #
169 #------# 172 #------#
170 cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg 173 cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg
171 echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg 174 echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg
172 echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg 175 echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg
173 176
174 #--------------------# 177 #--------------------#
175 # CONFIDENCE MEASURE # 178 # CONFIDENCE MEASURE #
176 #--------------------# 179 #--------------------#
177 180
178 # Check pourcentage of scored_ctm already done, if < 85% done confidence measure 181 # Check pourcentage of scored_ctm already done, if < 85% done confidence measure
179 nbres_p=$(ls ${RES_P}/*.treil | wc -l) 182 nbres_p=$(ls ${RES_P}/*.treil | wc -l)
180 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) 183 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l)
181 if [ $nbres_p -gt 0 ] 184 if [ $nbres_p -gt 0 ]
182 then 185 then
183 pourcentageres=$((($nbconf*100)/$nbres_p)) 186 pourcentageres=$((($nbconf*100)/$nbres_p))
184 if [ $pourcentageres -lt 85 ] 187 if [ $pourcentageres -lt 85 ]
185 then 188 then
186 print_info "Calcul Confidence $INPUT_DIR $RES_NAME" 1 189 print_info "Calcul Confidence $INPUT_DIR $RES_NAME" 1
187 $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME 190 $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME $REDIRECTION_OUTPUT
191
188 else 192 else
189 print_info "skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 193 print_info "skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1
190 fi 194 fi
191 fi 195 fi
192 196
193 ### Check scored_ctm number res files ! 197 ### Check scored_ctm number res files !
194 if [ $CHECK -eq 1 ] 198 if [ $CHECK -eq 1 ]
195 then 199 then
196 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) 200 nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l)
197 if [ $nbres_p -ne $nbconf ];then echo "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" >> $LOGFILE;fi 201 if [ $nbres_p -ne $nbconf ];then echo "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" >> $LOGFILE;fi
198 fi 202 fi
199 203
200 #---------------------------# 204 #---------------------------#
201 # FROM RES WITH CONF => USF # 205 # FROM RES WITH CONF => USF #
202 #---------------------------# 206 #---------------------------#
203 for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done 207 for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done
204 # create USF configuration file 208 # create USF configuration file
205 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg 209 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg
206 # create USF file 210 # create USF file
207 $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg 211 $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg $REDIRECTION_OUTPUT
208 rm $OUTPUT_DIR/$BASENAME.usf_cfg 212 rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1
209 cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE 213 cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE
210 cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf 214 cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf
211 rm $USF_FILE.tmp 215 rm $USF_FILE.tmp > /dev/null 2>&1
212 216
213 #----------------# 217 #----------------#
214 # Check USF file # 218 # Check USF file #
215 #----------------# 219 #----------------#
216 if [ $CHECK -eq 1 ] 220 if [ $CHECK -eq 1 ]
217 then 221 then
218 check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" 222 check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf"
219 if [ $? -eq 1 ] 223 if [ $? -eq 1 ]
220 then 224 then
221 echo -e "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" >> $ERRORFILE 225 echo -e "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" >> $ERRORFILE
222 exit 1 226 exit 1
223 fi 227 fi
224 fi 228 fi
225 229
226 #-------# 230 #-------#
227 # CLOSE # 231 # CLOSE #
228 #-------# 232 #-------#
229 # Seem OK 233 # Seem OK
230 print_info "<= End $BASENAME ConfPass | $(date +'%d/%m/%y %H:%M:%S')" 1 234 print_info "<= End $BASENAME ConfPass | $(date +'%d/%m/%y %H:%M:%S')" 1
231 echo -e "ConfPass $BASENAME OK" >> $LOGFILE 235 echo -e "ConfPass $BASENAME OK" >> $LOGFILE
232 236
233 # unlock directory 237 # unlock directory
234 mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" 238 mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock"
235 239
236 240
main_tools/ExploitConfidencePass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ExploitConfidencePass.sh # 4 # File : ExploitConfidencePass.sh #
5 # Brief : Exploit the ASR confidence pass to : # 5 # Brief : Exploit the ASR confidence pass to : #
6 # -> boost the confident zone # 6 # -> boost the confident zone #
7 # -> find alternative in non confident zone 7 # -> find alternative in non confident zone
8 # -> dynamicly extend the lexicon # 8 # -> dynamicly extend the lexicon #
9 # Author : Jean-François Rey # 9 # Author : Jean-François Rey #
10 # (base on Emmanuel Ferreira # 10 # (base on Emmanuel Ferreira #
11 # and Hugo Mauchrétien works) # 11 # and Hugo Mauchrétien works) #
12 # Version : 1.0 # 12 # Version : 1.0 #
13 # Date : 25/06/13 # 13 # Date : 25/06/13 #
14 ##################################################### 14 #####################################################
15 15
16 # Check OTMEDIA_HOME env var 16 # Check OTMEDIA_HOME env var
17 if [ -z ${OTMEDIA_HOME} ] 17 if [ -z ${OTMEDIA_HOME} ]
18 then 18 then
19 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 19 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
20 export OTMEDIA_HOME=$OTMEDIA_HOME 20 export OTMEDIA_HOME=$OTMEDIA_HOME
21 fi 21 fi
22 22
23 # where is ExploitConfidencePass.sh 23 # where is ExploitConfidencePass.sh
24 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 24 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
25 25
26 if [ -z ${SCRIPT_PATH} ] 26 if [ -z ${SCRIPT_PATH} ]
27 then 27 then
28 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 28 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
29 fi 29 fi
30 30
31 # Include scripts 31 # Include scripts
32 . $SCRIPT_PATH"/Tools.sh" 32 . $SCRIPT_PATH"/Tools.sh"
33 . $SCRIPT_PATH"/CheckExploitConfPass.sh" 33 . $SCRIPT_PATH"/CheckExploitConfPass.sh"
34 34
35 # where is ExploitConfidencePass.cfg 35 # where is ExploitConfidencePass.cfg
36 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" 36 EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg"
37 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] 37 if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ]
38 then 38 then
39 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE 39 . $EXPLOITCONFIDENCEPASS_CONFIG_FILE
40 else 40 else
41 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 41 echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2
42 exit 1 42 exit 1
43 fi 43 fi
44 44
45 #---------------# 45 #---------------#
46 # Parse Options # 46 # Parse Options #
47 #---------------# 47 #---------------#
48 while getopts ":hDv:cf:r" opt 48 while getopts ":hDv:cf:r" opt
49 do 49 do
50 case $opt in 50 case $opt in
51 h) 51 h)
52 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" 52 echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n"
53 echo -e "\t Options:" 53 echo -e "\t Options:"
54 echo -e "\t\t-h :\tprint this message" 54 echo -e "\t\t-h :\tprint this message"
55 echo -e "\t\t-D :\tDEBUG mode on" 55 echo -e "\t\t-D :\tDEBUG mode on"
56 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 56 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
57 echo -e "\t\t-c :\tCheck process, stop if error detected" 57 echo -e "\t\t-c :\tCheck process, stop if error detected"
58 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 58 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
59 echo -e "\t\t-r n :\tforce rerun without deleting files" 59 echo -e "\t\t-r n :\tforce rerun without deleting files"
60 exit 1 60 exit 1
61 ;; 61 ;;
62 D) 62 D)
63 DEBUG=1 63 DEBUG=1
64 ;; 64 ;;
65 v) 65 v)
66 VERBOSE=$OPTARG 66 VERBOSE=$OPTARG
67 ;; 67 ;;
68 c) 68 c)
69 CHECK=1 69 CHECK=1
70 ;; 70 ;;
71 f) 71 f)
72 FORKS="--forks $OPTARG" 72 FORKS="--forks $OPTARG"
73 ;; 73 ;;
74 r) 74 r)
75 RERUN=1 75 RERUN=1
76 ;; 76 ;;
77 :) 77 :)
78 echo "Option -$OPTARG requires an argument." >&2 78 echo "Option -$OPTARG requires an argument." >&2
79 exit 1 79 exit 1
80 ;; 80 ;;
81 \?) 81 \?)
82 echo "BAD USAGE : unknow opton -$OPTARG" 82 echo "BAD USAGE : unknow opton -$OPTARG"
83 #exit 1 83 #exit 1
84 ;; 84 ;;
85 esac 85 esac
86 done 86 done
87 87
88 # mode debug enable 88 # mode debug enable
89 if [ $DEBUG -eq 1 ] 89 if [ $DEBUG -eq 1 ]
90 then 90 then
91 set -x 91 set -x
92 echo -e "## Mode DEBUG ON ##" 92 echo -e "## Mode DEBUG ON ##"
93 REDIRECTION_OUTPUT=""
94 else
95 REDIRECTION_OUTPUT=" 2> /dev/null"
93 fi 96 fi
94 97
95 # mode verbose enable 98 # mode verbose enable
96 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi 99 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
97 100
98 # Check USAGE by arguments number 101 # Check USAGE by arguments number
99 if [ $(($#-($OPTIND-1))) -ne 1 ] 102 if [ $(($#-($OPTIND-1))) -ne 1 ]
100 then 103 then
101 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" 104 echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>"
102 echo "$0 -h for more info" 105 echo "$0 -h for more info"
103 exit 1 106 exit 1
104 fi 107 fi
105 108
106 shift $((OPTIND-1)) 109 shift $((OPTIND-1))
107 # check input directory - first argument 110 # check input directory - first argument
108 if [ ! -e $1 ] 111 if [ ! -e $1 ]
109 then 112 then
110 print_error "can't open $1" 113 print_error "can't open $1"
111 exit 1 114 exit 1
112 fi 115 fi
113 116
114 #-------------# 117 #-------------#
115 # GLOBAL VARS # 118 # GLOBAL VARS #
116 #-------------# 119 #-------------#
117 INPUT_DIR=$(readlink -e $1) 120 INPUT_DIR=$(readlink -e $1)
118 OUTPUT_DIR=$INPUT_DIR 121 OUTPUT_DIR=$INPUT_DIR
119 BASENAME=$(basename $OUTPUT_DIR) 122 BASENAME=$(basename $OUTPUT_DIR)
120 SHOW_DIR="$OUTPUT_DIR/shows/" 123 SHOW_DIR="$OUTPUT_DIR/shows/"
121 SOLR_RES="$OUTPUT_DIR/solr/" 124 SOLR_RES="$OUTPUT_DIR/solr/"
122 EXT_LEX="$OUTPUT_DIR/LEX/" 125 EXT_LEX="$OUTPUT_DIR/LEX/"
123 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" 126 TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/"
124 LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log" 127 LOGFILE="$(dirname $OUTPUT_DIR)/info_exploitconf.log"
125 ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log" 128 ERRORFILE="$(dirname $OUTPUT_DIR)/error_exploitconf.log"
126 129
127 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" 130 CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg"
128 if [ -e $CONFPASS_CONFIG_FILE ] 131 if [ -e $CONFPASS_CONFIG_FILE ]
129 then 132 then
130 { 133 {
131 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") 134 RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=")
132 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") 135 RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=")
133 print_warn "Use confidence measure from : $RES_CONF" 1 136 print_warn "Use confidence measure from : $RES_CONF" 1
134 } 137 }
135 else 138 else
136 { 139 {
137 print_error "Can't find $CONFPASS_CONFIG_FILE" 1 140 print_error "Can't find $CONFPASS_CONFIG_FILE" 1
138 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" 141 RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm"
139 RES_CONF="$INPUT_DIR/conf/res_p2" 142 RES_CONF="$INPUT_DIR/conf/res_p2"
140 } 143 }
141 fi 144 fi
142 145
143 mkdir -p $SHOW_DIR 146 mkdir -p $SHOW_DIR > /dev/null 2>&1
144 mkdir -p $SOLR_RES 147 mkdir -p $SOLR_RES > /dev/null 2>&1
145 mkdir -p $EXT_LEX 148 mkdir -p $EXT_LEX > /dev/null 2>&1
146 mkdir -p $TRIGGER_CONFZONE 149 mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1
147 150
148 #------------------# 151 #------------------#
149 # Create Workspace # 152 # Create Workspace #
150 #------------------# 153 #------------------#
151 # Lock directory 154 # Lock directory
152 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi 155 if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ]; then exit 1; fi
153 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 156 rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1
154 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 157 touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1
155 158
156 #------# 159 #------#
157 # Save # 160 # Save #
158 #------# 161 #------#
159 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg 162 cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg
160 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg 163 echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg
161 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg 164 echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg
162 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg 165 echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg
163 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg 166 echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg
164 167
165 168
166 #-----------------------# 169 #-----------------------#
167 # Segmentation by show # 170 # Segmentation by show #
168 #-----------------------# 171 #-----------------------#
169 # create txt file from scored res 172 # create txt file from scored res
170 # tag pos and lemmatization of the txt file 173 # tag pos and lemmatization of the txt file
171 # merge the scored res and taglem file 174 # merge the scored res and taglem file
172 # segment using the last generated file 175 # segment using the last generated file
173 # and create a ctm file by show 176 # and create a ctm file by show
174 177
175 print_info "Segmentation by show" 1 178 print_info "Segmentation by show" 1
176 179
177 # -> to txt 180 # -> to txt
178 print_info "Create txt from scored res" 2 181 print_info "Create txt from scored res" 2
179 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm 182 cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm
180 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp 183 cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp
181 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt 184 cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt
182 185
183 # -> to tagger + lemme 186 # -> to tagger + lemme
184 print_info "Tag pos and lem in txt file" 2 187 print_info "Tag pos and lem in txt file" 2
185 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp 188 iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp
186 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem 189 $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem
187 190
188 # merge sctm and taglem 191 # merge sctm and taglem
189 print_info "Merge scored ctm with tag pos and lem file" 2 192 print_info "Merge scored ctm with tag pos and lem file" 2
190 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl 193 cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl
191 194
192 # -> new seg 195 # -> new seg
193 print_info "Create xml file and run Topic Seg" 2 196 print_info "Create xml file and run Topic Seg" 2
194 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml 197 $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml
195 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem 198 rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem
196 199
197 # Lia_topic_seg : bring together sentences into show 200 # Lia_topic_seg : bring together sentences into show
198 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml 201 cp $INPUT_DIR/$BASENAME.doc.xml 0.xml
199 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg 202 java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg
200 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg 203 cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg
201 rm 0.xml $INPUT_DIR/show.seg 204 rm 0.xml $INPUT_DIR/show.seg
202 205
203 if [ $CHECK -eq 1 ] 206 if [ $CHECK -eq 1 ]
204 then 207 then
205 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi 208 if [ ! -s $INPUT_DIR/$BASENAME.show.seg ];then echo -e "ERROR : no Topic segmentation" >> $ERRORFILE; fi
206 fi 209 fi
207 210
208 # Segment ctm into several show files and create a seg list by show 211 # Segment ctm into several show files and create a seg list by show
209 print_info "Segment ctm into show files and a seg list by show" 2 212 print_info "Segment ctm into show files and a seg list by show" 2
210 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR 213 $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR $REDIRECTION_OUTPUT
211 214
212 #-----------------------------------------------------------# 215 #-----------------------------------------------------------#
213 # SOLR QUERIES # 216 # SOLR QUERIES #
214 # -> Create Confidente Word # 217 # -> Create Confidente Word #
215 # Keep conf words and use Tags # 218 # Keep conf words and use Tags #
216 # -> Query SOLR (document & multimedia) # 219 # -> Query SOLR (document & multimedia) #
217 # concat word + add date 2 day before and after the show # 220 # concat word + add date 2 day before and after the show #
218 # query document & multimedia # 221 # query document & multimedia #
219 #-----------------------------------------------------------# 222 #-----------------------------------------------------------#
220 print_info "Create SOLR queries and ASK SOLR" 1 223 print_info "Create SOLR queries and ASK SOLR" 1
221 for show in $(ls $SHOW_DIR/*.ctm) 224 for show in $(ls $SHOW_DIR/*.ctm)
222 do 225 do
223 bn=$(basename $show .ctm) 226 bn=$(basename $show .ctm)
224 # Remove words with low confidence and keep useful tagger words 227 # Remove words with low confidence and keep useful tagger words
225 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" 228 cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone"
226 # Get date 2 day before and after the show 229 # Get date 2 day before and after the show
227 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` 230 datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)`
228 # Create SOLR queries 231 # Create SOLR queries
229 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" 232 cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries"
230 # Ask SOLR DB 233 # Ask SOLR DB
231 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then 234 if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then
232 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" 235 query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]"
233 echo $query > $SHOW_DIR/$bn.queries 236 echo $query > $SHOW_DIR/$bn.queries
234 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp 237 python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp $REDIRECTION_OUTPUT
235 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords 238 cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords
236 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt 239 cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt
237 rm $SOLR_RES/*.tmp 240 rm $SOLR_RES/*.tmp > /dev/null 2>&1
238 fi 241 fi
239 242
240 if [ $CHECK -eq 1 ] 243 if [ $CHECK -eq 1 ]
241 then 244 then
242 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] 245 if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ]
243 then 246 then
244 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1 247 print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 1
245 fi 248 fi
246 fi 249 fi
247 250
248 done 251 done
249 252
250 #----------------------------------------------------------------------------------------------- 253 #-----------------------------------------------------------------------------------------------
251 # Build trigger file 254 # Build trigger file
252 # 1) keywords are automatically boosted in the non confident zone of the current res 255 # 1) keywords are automatically boosted in the non confident zone of the current res
253 # confident zone are boosted 256 # confident zone are boosted
254 # previous words in sensible zone are penalized 257 # previous words in sensible zone are penalized
255 # 2) OOVs are extracted + phonetized 258 # 2) OOVs are extracted + phonetized
256 # 3) Try to find OOVs acousticly in the current segment 259 # 3) Try to find OOVs acousticly in the current segment
257 # 4) Generate the .trigg file 260 # 4) Generate the .trigg file
258 #------------------------------------------------------------------------------------------------ 261 #------------------------------------------------------------------------------------------------
259 print_info "Build trigger files" 1 262 print_info "Build trigger files" 1
260 for i in `ls $SOLR_RES/*.keywords` 263 for i in `ls $SOLR_RES/*.keywords`
261 do 264 do
262 basename=`basename $i .keywords` 265 basename=`basename $i .keywords`
263 266
264 # 267 #
265 # Tokenize & produce coverage report 268 # Tokenize & produce coverage report
266 # Use filter you need 269 # Use filter you need
267 # 270 #
268 print_info "keywords filtering and produce coverage report" 2 271 print_info "keywords filtering and produce coverage report" 2
269 # Default filter 272 # Default filter
270 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ 273 cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\
271 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 274 $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
272 # do less filter 275 # do less filter
273 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok 276 #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok
274 277
275 278
276 # 279 #
277 # Extract "real" OOV and phonetize them 280 # Extract "real" OOV and phonetize them
278 # -> petit filtrage persoo pour eviter d'avoir trop de bruits 281 # -> petit filtrage persoo pour eviter d'avoir trop de bruits
279 # 282 #
280 print_info "Extract OOV and phonetize them" 2 283 print_info "Extract OOV and phonetize them" 2
281 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov 284 ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov
282 285
283 # 286 #
284 # Search INVOC & OOV in the current lattice 287 # Search INVOC & OOV in the current lattice
285 # 288 #
286 print_info "Search INVOC and OOV in the current lattice" 2 289 print_info "Search INVOC and OOV in the current lattice" 2
287 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch 290 cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch
288 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch 291 cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch
289 292
290 # For each treil 293 # For each treil
291 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 294 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
292 do 295 do
293 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound 296 $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION
294 # 297 #
295 # Produce the boost file for the next decoding pass 298 # Produce the boost file for the next decoding pass
296 # 299 #
297 print_info "Produce trigg file : $baseseg " 3 300 print_info "Produce trigg file : $baseseg " 3
298 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg 301 cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg
299 done 302 done
300 303
301 done 304 done
302 305
303 #----------------------------------------------------------------------------------------------- 306 #-----------------------------------------------------------------------------------------------
304 # Build the extended SPEERAL Lexicon 307 # Build the extended SPEERAL Lexicon
305 # 1) Merge OOVs + LEXICON 308 # 1) Merge OOVs + LEXICON
306 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) 309 # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba)
307 # 2) The current lexicon is extended with all the valid OOVs 310 # 2) The current lexicon is extended with all the valid OOVs
308 #----------------------------------------------------------------------------------------------- 311 #-----------------------------------------------------------------------------------------------
309 print_info "Build extended Speeral Lexicon" 1 312 print_info "Build extended Speeral Lexicon" 1
310 mkdir -p $EXT_LEX/final 313 mkdir -p $EXT_LEX/final
311 mkdir -p $EXT_LEX/tmp 314 mkdir -p $EXT_LEX/tmp
312 mkdir -p $EXT_LEX/tmp/txt 315 mkdir -p $EXT_LEX/tmp/txt
313 # 316 #
314 # Collect the acousticly found oov and their phonetisation 317 # Collect the acousticly found oov and their phonetisation
315 # 318 #
316 print_info "Get all OOV and retrieve all phonetisation" 2 319 print_info "Get all OOV and retrieve all phonetisation" 2
317 for i in `ls $SOLR_RES/*.phon_oov` 320 for i in `ls $SOLR_RES/*.phon_oov`
318 do 321 do
319 basename=`basename $i .phon_oov` 322 basename=`basename $i .phon_oov`
320 323
321 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null 324 rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null
322 # list acousticly found for the show 325 # list acousticly found for the show
323 for baseseg in $(cat "$SHOW_DIR/$basename.lst") 326 for baseseg in $(cat "$SHOW_DIR/$basename.lst")
324 do 327 do
325 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound 328 cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound
326 done 329 done
327 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp 330 cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp
328 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound 331 mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound
329 332
330 # 333 #
331 # Extract OOV really added 334 # Extract OOV really added
332 # 335 #
333 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov 336 cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov
334 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound 337 $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound $REDIRECTION_OUTPUT
335 # 338 #
336 # Retrieve all phonetisation 339 # Retrieve all phonetisation
337 # 340 #
338 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon 341 cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon
339 done 342 done
340 343
341 # 344 #
342 # Merge OOVs and their phonetisation 345 # Merge OOVs and their phonetisation
343 # 346 #
344 print_info "Merge OOV and their phonetisation" 2 347 print_info "Merge OOV and their phonetisation" 2
345 lexname=$(basename $LEXICON) 348 lexname=$(basename $LEXICON)
346 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon 349 cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon
347 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound 350 cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound
348 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon 351 $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon $REDIRECTION_OUTPUT
349 352
350 # 353 #
351 # Collect + clean retrieved txt 354 # Collect + clean retrieved txt
352 # 355 #
353 print_info "Collect and clean SOLR txt answers" 2 356 print_info "Collect and clean SOLR txt answers" 2
354 # choose filter 357 # choose filter
355 # default 358 # default
356 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt 359 cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt
357 # low filter 360 # low filter
358 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt 361 #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt
359 362
360 # 363 #
361 # Construct the map file 364 # Construct the map file
362 # 365 #
363 # Notes: 366 # Notes:
364 # - Expected format : 367 # - Expected format :
365 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> 368 # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1>
366 # 369 #
367 print_info "Construct map file" 2 370 print_info "Construct map file" 2
368 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null 371 rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null
369 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null 372 rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null
370 373
371 while read oov 374 while read oov
372 do 375 do
373 oov=`echo $oov | sed "s/\n//g"` 376 oov=`echo $oov | sed "s/\n//g"`
374 # 377 #
375 # Obtain the oov's tag 378 # Obtain the oov's tag
376 # 379 #
377 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` 380 #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2`
378 # 381 #
379 # Try to collect text containing the oov word 382 # Try to collect text containing the oov word
380 # 383 #
381 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt 384 cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt
382 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then 385 if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then
383 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 386 nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
384 if [ $nbWords -eq 0 ]; then 387 if [ $nbWords -eq 0 ]; then
385 echo "UNVALID OOV: $oov => $nbWords occurrences" 388 echo "UNVALID OOV: $oov => $nbWords occurrences"
386 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 389 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
387 else 390 else
388 # 391 #
389 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected 392 # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected
390 # 393 #
391 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" 394 #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt"
392 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 395 candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "`
393 if [ ! x$candidate = "x" ]; then 396 if [ ! x$candidate = "x" ]; then
394 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon 397 grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon
395 while read phonLine 398 while read phonLine
396 do 399 do
397 #<word> <phon> => <word> <candidate> <phon> 400 #<word> <phon> => <word> <candidate> <phon>
398 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map 401 echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map
399 done < $EXT_LEX/tmp/$oov.phon 402 done < $EXT_LEX/tmp/$oov.phon
400 else 403 else
401 echo "UNVALID OOV: $oov => no availaible Candidate word in LM" 404 echo "UNVALID OOV: $oov => no availaible Candidate word in LM"
402 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 405 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
403 fi 406 fi
404 fi 407 fi
405 else 408 else
406 echo "UNVALID OOV: $oov" 409 echo "UNVALID OOV: $oov"
407 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov 410 echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov
408 fi 411 fi
409 done < $EXT_LEX/final/all.oov_acousticlyfound 412 done < $EXT_LEX/final/all.oov_acousticlyfound
410 413
411 # 414 #
412 ### Speeral 415 ### Speeral
413 # 416 #
414 417
415 lexname=`basename $LEXICON` 418 lexname=`basename $LEXICON`
416 # 419 #
417 # Build the final trigger file 420 # Build the final trigger file
418 # 421 #
419 print_info "Clean trigg files" 2 422 print_info "Clean trigg files" 2
420 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null 423 mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null
421 mkdir -p $EXT_LEX/speeral/ 2> /dev/null 424 mkdir -p $EXT_LEX/speeral/ 2> /dev/null
422 for i in `ls $TRIGGER_CONFZONE/*.trigg` 425 for i in `ls $TRIGGER_CONFZONE/*.trigg`
423 do 426 do
424 basename=`basename $i .trigg` 427 basename=`basename $i .trigg`
425 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg 428 cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg
426 done 429 done
427 # 430 #
428 # Compile the speeral extended lexicon 431 # Compile the speeral extended lexicon
429 # 432 #
430 print_info "Compile Speeral extended lexicon" 2 433 print_info "Compile Speeral extended lexicon" 2
431 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext 434 $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext $REDIRECTION_OUTPUT
432 435
433 if [ $CHECK -eq 1 ] 436 if [ $CHECK -eq 1 ]
434 then 437 then
435 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" 438 check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext"
436 if [ $? -eq 1 ] 439 if [ $? -eq 1 ]
437 then 440 then
438 echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE 441 echo -e "ERROR : Building Speeral Lexicon $INPUT_DIR " >> $ERRORFILE
439 exit 1; 442 exit 1;
440 fi 443 fi
441 fi 444 fi
442 445
443 446
444 #-------# 447 #-------#
445 # CLOSE # 448 # CLOSE #
446 #-------# 449 #-------#
447 # Seem OK 450 # Seem OK
448 print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1 451 print_info "<= End $BASENAME Solr | $(date +'%d/%m/%y %H:%M:%S')" 1
449 echo -e "#Solr $BASENAME " >> $LOGFILE 452 echo -e "#Solr $BASENAME " >> $LOGFILE
450 453
451 # unlock directory 454 # unlock directory
452 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" 455 mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock"
453 456
454 457
455 458
main_tools/FirstPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : FirstPass.sh # 4 # File : FirstPass.sh #
5 # Brief : ASR first pass and speaker diarization # 5 # Brief : ASR first pass and speaker diarization #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and Hugo Mauchrétien works) # 8 # and Hugo Mauchrétien works) #
9 # Version : 1.1 # 9 # Version : 1.1 #
10 # Date : 18/06/13 # 10 # Date : 18/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 # Check OTMEDIA_HOME env var 13 # Check OTMEDIA_HOME env var
14 if [ -z ${OTMEDIA_HOME} ] 14 if [ -z ${OTMEDIA_HOME} ]
15 then 15 then
16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
17 export OTMEDIA_HOME=$OTMEDIA_HOME 17 export OTMEDIA_HOME=$OTMEDIA_HOME
18 fi 18 fi
19 19
20 # where is FirstPass.sh 20 # where is FirstPass.sh
21 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 21 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
22 22
23 # scripts path 23 # scripts path
24 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 24 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
25 25
26 # Include scripts 26 # Include scripts
27 . $SCRIPT_PATH"/Tools.sh" 27 . $SCRIPT_PATH"/Tools.sh"
28 . $SCRIPT_PATH"/CheckFirstPass.sh" 28 . $SCRIPT_PATH"/CheckFirstPass.sh"
29 29
30 # where is FirstPass.cfg 30 # where is FirstPass.cfg
31 FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" 31 FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg"
32 if [ -e $FIRSTPASS_CONFIG_FILE ] 32 if [ -e $FIRSTPASS_CONFIG_FILE ]
33 then 33 then
34 . $FIRSTPASS_CONFIG_FILE 34 . $FIRSTPASS_CONFIG_FILE
35 else 35 else
36 echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2 36 echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2
37 exit 1 37 exit 1
38 fi 38 fi
39 39
40 #---------------# 40 #---------------#
41 # Parse Options # 41 # Parse Options #
42 #---------------# 42 #---------------#
43 while getopts ":hDv:cf:r" opt 43 while getopts ":hDv:cf:r" opt
44 do 44 do
45 case $opt in 45 case $opt in
46 h) 46 h)
47 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" 47 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n"
48 echo -e "\t Options:" 48 echo -e "\t Options:"
49 echo -e "\t\t-h :\tprint this message" 49 echo -e "\t\t-h :\tprint this message"
50 echo -e "\t\t-D :\tDEBUG mode on" 50 echo -e "\t\t-D :\tDEBUG mode on"
51 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 51 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
52 echo -e "\t\t-c :\tCheck process, stop if error detected" 52 echo -e "\t\t-c :\tCheck process, stop if error detected"
53 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 53 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
54 echo -e "\t\t-r :\tforce rerun the wav file" 54 echo -e "\t\t-r :\tforce rerun the wav file"
55 exit 1 55 exit 1
56 ;; 56 ;;
57 D) 57 D)
58 DEBUG=1 58 DEBUG=1
59 ;; 59 ;;
60 v) 60 v)
61 VERBOSE=$OPTARG 61 VERBOSE=$OPTARG
62 ;; 62 ;;
63 c) 63 c)
64 CHECK=1 64 CHECK=1
65 ;; 65 ;;
66 f) 66 f)
67 FORKS="--forks $OPTARG" 67 FORKS="--forks $OPTARG"
68 ;; 68 ;;
69 r) 69 r)
70 RERUN=1 70 RERUN=1
71 ;; 71 ;;
72 :) 72 :)
73 echo "Option -$OPTARG requires an argument." >&2 73 echo "Option -$OPTARG requires an argument." >&2
74 exit 1 74 exit 1
75 ;; 75 ;;
76 \?) 76 \?)
77 echo "BAD USAGE : unknow opton -$OPTARG" 77 echo "BAD USAGE : unknow opton -$OPTARG"
78 exit 1 78 exit 1
79 ;; 79 ;;
80 esac 80 esac
81 done 81 done
82 82
83 # mode debug enable 83 # mode debug enable
84 if [ $DEBUG -eq 1 ] 84 if [ $DEBUG -eq 1 ]
85 then 85 then
86 set -x 86 set -x
87 echo -e "## Mode DEBUG ON ##" 87 echo -e "## Mode DEBUG ON ##"
88 REDIRECTION_OUTPUT="" 88 REDIRECTION_OUTPUT=""
89 else 89 else
90 REDIRECTION_OUTPUT=" > /dev/null 2>&1" 90 REDIRECTION_OUTPUT=" 2> /dev/null"
91 fi 91 fi
92 92
93 # mode verbose enable 93 # mode verbose enable
94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; REDIRECTION_OUTPUT=" 2> /dev/null"; fi 94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
95 95
96 # Check USAGE by arguments number 96 # Check USAGE by arguments number
97 if [ $(($#-($OPTIND-1))) -ne 2 ] 97 if [ $(($#-($OPTIND-1))) -ne 2 ]
98 then 98 then
99 echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" 99 echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>"
100 echo "$0 -h for more info" 100 echo "$0 -h for more info"
101 exit 1 101 exit 1
102 fi 102 fi
103 103
104 shift $((OPTIND-1)) 104 shift $((OPTIND-1))
105 # check audio file - First argument 105 # check audio file - First argument
106 if [ -e $1 ] && [ -s $1 ] 106 if [ -e $1 ] && [ -s $1 ]
107 then 107 then
108 # absolute path to wav file 108 # absolute path to wav file
109 WAV_FILE=$(readlink -e $1) 109 WAV_FILE=$(readlink -e $1)
110 # wav filename 110 # wav filename
111 FILENAME=$(basename $WAV_FILE) 111 FILENAME=$(basename $WAV_FILE)
112 # wav filename without extension 112 # wav filename without extension
113 BASENAME=${FILENAME%.*} 113 BASENAME=${FILENAME%.*}
114 114
115 print_info "=> $BASENAME P1 | $(date +'%d/%m/%y %H:%M:%S')" 1 115 print_info "=> $BASENAME P1 | $(date +'%d/%m/%y %H:%M:%S')" 1
116 print_info "$WAV_FILE OK" 1 116 print_info "$WAV_FILE OK" 1
117 else 117 else
118 print_error "can't find $1 OR file is empty" 118 print_error "can't find $1 OR file is empty"
119 exit 1 119 exit 1
120 fi 120 fi
121 121
122 # check output directory - Second argument 122 # check output directory - Second argument
123 if [ ! -e $2 ] 123 if [ ! -e $2 ]
124 then 124 then
125 mkdir -p $2 125 mkdir -p $2
126 print_info "Make directory $2" 1 126 print_info "Make directory $2" 1
127 fi 127 fi
128 128
129 129
130 #-------------# 130 #-------------#
131 # GLOBAL VARS # 131 # GLOBAL VARS #
132 #-------------# 132 #-------------#
133 OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path 133 OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path
134 OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME 134 OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME
135 PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file 135 PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file
136 PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory 136 PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory
137 SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file 137 SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file
138 LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file 138 LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file
139 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" 139 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1"
140 LOGFILE="$OUTPUT_DIR/info_p1.log" 140 LOGFILE="$OUTPUT_DIR/info_p1.log"
141 ERRORFILE="$OUTPUT_DIR/error_p1.log" 141 ERRORFILE="$OUTPUT_DIR/error_p1.log"
142 142
143 #------------------# 143 #------------------#
144 # Create WORKSPACE # 144 # Create WORKSPACE #
145 #------------------# 145 #------------------#
146 if [ ! -e $OUTPUT_DIR_BASENAME ] 146 if [ ! -e $OUTPUT_DIR_BASENAME ]
147 then 147 then
148 mkdir -p $OUTPUT_DIR_BASENAME 148 mkdir -p $OUTPUT_DIR_BASENAME
149 print_info "Make directory $OUTPUT_DIR_BASENAME" 1 149 print_info "Make directory $OUTPUT_DIR_BASENAME" 1
150 fi 150 fi
151 151
152 # Lock directory 152 # Lock directory
153 if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi 153 if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi
154 rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 154 rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1
155 touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 155 touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1
156 156
157 rm -r $PLP_DIR > /dev/null 2>&1; 157 rm -r $PLP_DIR > /dev/null 2>&1;
158 mkdir -p $PLP_DIR 158 mkdir -p $PLP_DIR
159 print_info "Make directory $PLP_DIR" 1 159 print_info "Make directory $PLP_DIR" 1
160 if [ $RERUN -eq 0 ]; 160 if [ $RERUN -eq 0 ];
161 then 161 then
162 rm -r $RES_DIR > /dev/null 2>&1; 162 rm -r $RES_DIR > /dev/null 2>&1;
163 else 163 else
164 rm $RES_DIR/*.lock > /dev/null 2>&1 164 rm $RES_DIR/*.lock > /dev/null 2>&1
165 fi 165 fi
166 mkdir -p $RES_DIR $REDIRECTION_OUTPUT 166 mkdir -p $RES_DIR > /dev/null 2>&1
167 print_info "Make directory $RES_DIR" 1 167 print_info "Make directory $RES_DIR" 1
168 168
169 #--------------------# 169 #--------------------#
170 # Save configuration # 170 # Save configuration #
171 #--------------------# 171 #--------------------#
172 cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg 172 cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg
173 echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 173 echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
174 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 174 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
175 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 175 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
176 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 176 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
177 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 177 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
178 echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 178 echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
179 echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 179 echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
180 echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 180 echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
181 echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 181 echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
182 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 182 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
183 print_info "save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 183 print_info "save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1
184 184
185 #-------------------------# 185 #-------------------------#
186 # Check Audio File Format # 186 # Check Audio File Format #
187 #-------------------------# 187 #-------------------------#
188 error=0 188 error=0
189 temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") 189 temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz")
190 if [ -z "$temp" ]; then error=1; fi 190 if [ -z "$temp" ]; then error=1; fi
191 temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") 191 temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels")
192 if [ -z "$temp" ]; then error=1; fi 192 if [ -z "$temp" ]; then error=1; fi
193 temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") 193 temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16")
194 if [ -z "$temp" ]; then error=1; fi 194 if [ -z "$temp" ]; then error=1; fi
195 195
196 if [ $error -eq 1 ] 196 if [ $error -eq 1 ]
197 then 197 then
198 print_message $WARNING 2 "$WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" 198 print_message $WARNING 2 "$WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert"
199 print_message $INFO 3 "avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" 199 print_message $INFO 3 "avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav"
200 avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT 200 avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav $REDIRECTION_OUTPUT
201 WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav 201 WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav
202 FILENAME=$BASENAME.wav 202 FILENAME=$BASENAME.wav
203 print_message $INFO 1 "new wav file : $WAV_FILE" 203 print_message $INFO 1 "new wav file : $WAV_FILE"
204 fi 204 fi
205 205
206 #---------------# 206 #---------------#
207 # Get SRT file # 207 # Get SRT file #
208 #---------------# 208 #---------------#
209 if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] 209 if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ]
210 then 210 then
211 cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT 211 cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT
212 print_info "copy $BASENAME.SRT file into workingspace" 1 212 print_info "copy $BASENAME.SRT file into workingspace" 1
213 fi 213 fi
214 214
215 #------------# 215 #------------#
216 # WAV -> PLP # 216 # WAV -> PLP #
217 #------------# 217 #------------#
218 print_info "convert WAV -> PLP" 1 218 print_info "convert WAV -> PLP" 1
219 echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp 219 echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp
220 print_info "$BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms 220 print_info "$BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms
221 " 2 221 " 2
222 222
223 $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT 223 $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms $REDIRECTION_OUTPUT
224 224
225 if [ $CHECK -eq 1 ] 225 if [ $CHECK -eq 1 ]
226 then 226 then
227 check_first_pass_plp "$PLP_FILE" 227 check_first_pass_plp "$PLP_FILE"
228 if [ $? -eq 1 ] 228 if [ $? -eq 1 ]
229 then 229 then
230 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP_FILE" >> $ERRORFILE 230 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP_FILE" >> $ERRORFILE
231 exit 1 231 exit 1
232 fi 232 fi
233 fi 233 fi
234 234
235 rm $OUTPUT_DIR_BASENAME/list.tmp 235 rm $OUTPUT_DIR_BASENAME/list.tmp
236 236
237 #------------------------------# 237 #------------------------------#
238 # S/NS + SPEAKERS SEGMENTATION # 238 # S/NS + SPEAKERS SEGMENTATION #
239 #------------------------------# 239 #------------------------------#
240 print_info "Launch speakers diarization" 1 240 print_info "Launch speakers diarization" 1
241 # Calcul seg file 241 # Calcul seg file
242 print_info "java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 2 242 print_info "java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 2
243 #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME 243 #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME
244 java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering 244 java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME $REDIRECTION_OUTPUT #–doCEClustering
245 245
246 if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) 246 if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] )
247 then 247 then
248 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $SEG_FILE" >> $ERRORFILE 248 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $SEG_FILE" >> $ERRORFILE
249 exit 1 249 exit 1
250 fi 250 fi
251 251
252 252
253 # Create LBL file 253 # Create LBL file
254 print_info "Extract LBL file from SEG file" 2 254 print_info "Extract LBL file from SEG file" 2
255 255
256 cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE 256 cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE
257 257
258 if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) 258 if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] )
259 then 259 then
260 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $LBL_FILE" >> $ERRORFILE 260 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $LBL_FILE" >> $ERRORFILE
261 exit 1 261 exit 1
262 fi 262 fi
263 263
264 264
265 #----------------------------------------------------# 265 #----------------------------------------------------#
266 # Cut global PLP file depending to LBL segmentations # 266 # Cut global PLP file depending to LBL segmentations #
267 #----------------------------------------------------# 267 #----------------------------------------------------#
268 print_info "Cut PLP file depending to LBL segmentations" 1 268 print_info "Cut PLP file depending to LBL segmentations" 1
269 print_info "$BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 2 269 print_info "$BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 2
270 270
271 $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT 271 $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG $REDIRECTION_OUTPUT
272 272
273 if [ $CHECK -eq 1 ] 273 if [ $CHECK -eq 1 ]
274 then 274 then
275 check_first_pass_plps_lbl $PLP_DIR $LBL_FILE 275 check_first_pass_plps_lbl $PLP_DIR $LBL_FILE
276 if [ $? -eq 1 ] 276 if [ $? -eq 1 ]
277 then 277 then
278 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong .plp files number" >> $ERRORFILE 278 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong .plp files number" >> $ERRORFILE
279 exit 1 279 exit 1
280 fi 280 fi
281 fi 281 fi
282 282
283 # change plp files names 283 # change plp files names
284 cd $PLP_DIR; 284 cd $PLP_DIR;
285 rename -f s/_/#/g *plp 285 rename -f s/_/#/g *plp
286 rename -f s/#/_/ *plp 286 rename -f s/#/_/ *plp
287 cd $OLDPWD 287 cd $OLDPWD
288 288
289 #---------------------------------------------# 289 #---------------------------------------------#
290 # PLP files list depending to acoustic models # 290 # PLP files list depending to acoustic models #
291 #---------------------------------------------# 291 #---------------------------------------------#
292 print_info "Create PLP list depending of the model" 1 292 print_info "Create PLP list depending of the model" 1
293 # Create a list of plp files 293 # Create a list of plp files
294 find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst 294 find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $OUTPUT_DIR_BASENAME/plp.lst
295 295
296 rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 296 rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1
297 for (( i=0; $i<${#MTAG[@]} ; i++ )) 297 for (( i=0; $i<${#MTAG[@]} ; i++ ))
298 do 298 do
299 a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` 299 a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst`
300 if [ -n "$a" ]; then 300 if [ -n "$a" ]; then
301 print_info "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 301 print_info "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3
302 grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 302 grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
303 fi 303 fi
304 done 304 done
305 305
306 #-----------------------# 306 #-----------------------#
307 # First Pass (DECODING) # 307 # First Pass (DECODING) #
308 #-----------------------# 308 #-----------------------#
309 # 309 #
310 # For all AM do decoding 310 # For all AM do decoding
311 # if Check error -> iter on undone decoding (max 1 times) 311 # if Check error -> iter on undone decoding (max 1 times)
312 # 312 #
313 print_info "Launch decoding" 1 313 print_info "Launch decoding" 1
314 for (( i=0; $i<${#MTAG[@]} ; i++ )) 314 for (( i=0; $i<${#MTAG[@]} ; i++ ))
315 do 315 do
316 redo=1; # nb of try if not all segs is decoded 316 redo=1; # nb of try if not all segs is decoded
317 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] 317 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]
318 then 318 then
319 todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 319 todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
320 while [ $redo -gt 0 ]; do 320 while [ $redo -gt 0 ]; do
321 rm $RES_DIR/*.lock > /dev/null 2>&1 321 rm $RES_DIR/*.lock > /dev/null 2>&1
322 print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 2 322 print_info "$SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT" 2
323 # Run speeral 323 # Run speeral
324 $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT 324 $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT
325 325
326 # Check if error 326 # Check if error
327 if [ $CHECK -eq 1 ] 327 if [ $CHECK -eq 1 ]
328 then 328 then
329 check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" 329 check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR"
330 # if error 330 # if error
331 if [ $? -eq 1 ] 331 if [ $? -eq 1 ]
332 then 332 then
333 # rerun 333 # rerun
334 redo=$(($redo - 1)); 334 redo=$(($redo - 1));
335 echo -e "WARN : Speeral output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" >> $ERRORFILE 335 echo -e "WARN : Speeral output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" >> $ERRORFILE
336 # new plp list 336 # new plp list
337 # list .seg done and compare to list of seg to do 337 # list .seg done and compare to list of seg to do
338 ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 338 ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
339 diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst 339 diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst
340 rm ${OUTPUT_DIR_BASENAME}/.tmp 340 rm ${OUTPUT_DIR_BASENAME}/.tmp
341 # log seg to do 341 # log seg to do
342 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE 342 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE
343 todo=${OUTPUT_DIR_BASENAME}/todo.lst 343 todo=${OUTPUT_DIR_BASENAME}/todo.lst
344 echo -e "WARN : Try $redo" >> $ERRORFILE 344 echo -e "WARN : Try $redo" >> $ERRORFILE
345 fi 345 fi
346 else 346 else
347 redo=-5; 347 redo=-5;
348 fi 348 fi
349 done 349 done
350 if [ $redo -eq 0 ] 350 if [ $redo -eq 0 ]
351 then 351 then
352 echo -e "ERROR : Speeral $todo" >> $ERRORFILE 352 echo -e "ERROR : Speeral $todo" >> $ERRORFILE
353 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE 353 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $ERRORFILE
354 #exit 1 354 #exit 1
355 fi 355 fi
356 rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 356 rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1
357 #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 357 #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
358 rm $RES_DIR/*.lock > /dev/null 2>&1 358 rm $RES_DIR/*.lock > /dev/null 2>&1
359 fi 359 fi
360 done 360 done
361 361
362 print_info "<= End P1 $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" 1 362 print_info "<= End P1 $BASENAME | $(date +'%d/%m/%y %H:%M:%S')" 1
363 363
364 ## Check missing seg and log it 364 ## Check missing seg and log it
365 ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 365 ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
366 echo -e "$BASENAME P1 END\n[" >> $LOGFILE 366 echo -e "$BASENAME P1 END\n[" >> $LOGFILE
367 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE 367 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE
368 todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l) 368 todo=$(cat ${OUTPUT_DIR_BASENAME}/plp.lst | wc -l)
369 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) 369 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l)))
370 pourcentage=$((($notdone*100)/$todo)) 370 pourcentage=$((($notdone*100)/$todo))
371 echo -e "] $pourcentage% $BASENAME" >> $LOGFILE 371 echo -e "] $pourcentage% $BASENAME" >> $LOGFILE
372 rm ${OUTPUT_DIR_BASENAME}/.tmp 372 rm ${OUTPUT_DIR_BASENAME}/.tmp
373 373
374 374
375 #---------------# 375 #---------------#
376 # Convert res # 376 # Convert res #
377 #---------------# 377 #---------------#
378 378
379 # .res => .ctm 379 # .res => .ctm
380 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT 380 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm $REDIRECTION_OUTPUT
381 # .res => .trs 381 # .res => .trs
382 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 382 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg
383 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT 383 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg $REDIRECTION_OUTPUT
384 rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null 384 rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null
385 # .res => .txt 385 # .res => .txt
386 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT 386 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt $REDIRECTION_OUTPUT
387 387
388 # unlock directory 388 # unlock directory
389 mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" 389 mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock"
390 390
main_tools/OneScriptToRuleThemAll.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################### 3 #####################################
4 # File: OneScriptToRuleThemAll.sh # 4 # File: OneScriptToRuleThemAll.sh #
5 # Brief : Script to launch OTMEDIA # 5 # Brief : Script to launch OTMEDIA #
6 # Version : 1.0 # 6 # Version : 1.0 #
7 # Date : 23/07/2013 # 7 # Date : 23/07/2013 #
8 # Author : Jean-François Rey # 8 # Author : Jean-François Rey #
9 ##################################### 9 #####################################
10 10
11 echo -e "OneScriptToRuleThemAll :" 11 echo -e "OneScriptToRuleThemAll :"
12 LORD=" Three::rings 12 LORD=" Three::rings
13 for:::the::Elven-King 13 for:::the::Elven-King
14 under:the:sky,:Seven:for:the 14 under:the:sky,:Seven:for:the
15 Dwarf-Lords::in::their::halls:of 15 Dwarf-Lords::in::their::halls:of
16 stone,:Nine for:Mortal 16 stone,:Nine for:Mortal
17 :::Men::: ________ doomed::to 17 :::Men::: ________ doomed::to
18 die.:One _,-'...:... \`-. for:::the 18 die.:One _,-'...:... \`-. for:::the
19 ::Dark:: ,- .:::::::::::. \`. Lord::on 19 ::Dark:: ,- .:::::::::::. \`. Lord::on
20 his:dark ,' .:::::zzz:::::. \`. :throne: 20 his:dark ,' .:::::zzz:::::. \`. :throne:
21 In:::the/ ::::OTMEDIA:::: \ Land::of 21 In:::the/ ::::OTMEDIA:::: \ Land::of
22 :Mordor:\ ::::SCRIPTS:::: / :where:: 22 :Mordor:\ ::::SCRIPTS:::: / :where::
23 ::the::: '. '::::YEEEP::::' ,' Shadows: 23 ::the::: '. '::::YEEEP::::' ,' Shadows:
24 lie.::One \`. \`\`:::::::::'' ,' Ring::to 24 lie.::One \`. \`\`:::::::::'' ,' Ring::to
25 ::rule:: \`-._\`\`\`:'''_,-' ::them:: 25 ::rule:: \`-._\`\`\`:'''_,-' ::them::
26 all,::One \`-----' ring::to 26 all,::One \`-----' ring::to
27 ::find::: them,:One 27 ::find::: them,:One
28 Ring:::::to bring::them 28 Ring:::::to bring::them
29 all::and::in:the:darkness:bind 29 all::and::in:the:darkness:bind
30 them:In:the:Land:of:Mordor 30 them:In:the:Land:of:Mordor
31 where:::the::Shadows 31 where:::the::Shadows
32 :::lie.:::" 32 :::lie.:::"
33 33
34 34
35 # Check OTMEDIA_HOME env var 35 # Check OTMEDIA_HOME env var
36 if [ -z ${OTMEDIA_HOME} ] 36 if [ -z ${OTMEDIA_HOME} ]
37 then 37 then
38 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 38 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
39 export OTMEDIA_HOME=$OTMEDIA_HOME 39 export OTMEDIA_HOME=$OTMEDIA_HOME
40 fi 40 fi
41 41
42 # where is OneScriptToRuleThemAll.sh 42 # where is OneScriptToRuleThemAll.sh
43 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 43 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
44 44
45 RING="" 45 RING=""
46 46
47 #---------------# 47 #---------------#
48 # Parse Options # 48 # Parse Options #
49 #---------------# 49 #---------------#
50 while getopts ":hDv:cf:r" opt 50 while getopts ":hDv:cf:r" opt
51 do 51 do
52 case $opt in 52 case $opt in
53 h) 53 h)
54 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" 54 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n"
55 echo -e "\t Options:" 55 echo -e "\t Options:"
56 echo -e "\t\t-h :\tprint this message" 56 echo -e "\t\t-h :\tprint this message"
57 echo -e "\t\t-D :\tDEBUG mode on" 57 echo -e "\t\t-D :\tDEBUG mode on"
58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 58 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
59 echo -e "\t\t-c :\tCheck process, stop if error detected" 59 echo -e "\t\t-c :\tCheck process, stop if error detected"
60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 60 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
61 echo -e "\t\t-r :\tforce rerun the wav file" 61 echo -e "\t\t-r :\tforce rerun the wav file"
62 exit 1 62 exit 1
63 ;; 63 ;;
64 D) 64 D)
65 RING=$RING" -D" 65 RING=$RING" -D"
66 ;; 66 ;;
67 v) 67 v)
68 RING=$RING" -v $OPTARG" 68 RING=$RING" -v $OPTARG"
69 ;; 69 ;;
70 c) 70 c)
71 RING=$RING" -c" 71 RING=$RING" -c"
72 ;; 72 ;;
73 f) 73 f)
74 RING=$RING" -f $OPTARG" 74 RING=$RING" -f $OPTARG"
75 ;; 75 ;;
76 r) 76 r)
77 RING=$RING" -r" 77 RING=$RING" -r"
78 ;; 78 ;;
79 :) 79 :)
80 echo "Option -$OPTARG requires an argument." >&2 80 echo "Option -$OPTARG requires an argument." >&2
81 exit 1 81 exit 1
82 ;; 82 ;;
83 \?) 83 \?)
84 echo "BAD USAGE : unknow opton -$OPTARG" 84 echo "BAD USAGE : unknow opton -$OPTARG"
85 exit 1 85 exit 1
86 ;; 86 ;;
87 esac 87 esac
88 done 88 done
89 89
90 # Check USAGE by arguments number 90 # Check USAGE by arguments number
91 if [ $(($#-($OPTIND-1))) -ne 2 ] 91 if [ $(($#-($OPTIND-1))) -ne 2 ]
92 then 92 then
93 echo "BAD USAGE : $0 [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" 93 echo "BAD USAGE : $0 [OPTIONS] <WAV_FILE> <OUTPUT_DIR>"
94 echo "$0 -h for more info" 94 echo "$0 -h for more info"
95 exit 1 95 exit 1
96 fi 96 fi
97 97
98 shift $((OPTIND-1)) 98 shift $((OPTIND-1))
99 # check audio file - First argument 99 # check audio file - First argument
100 if [ -e $1 ] && [ -s $1 ] 100 if [ -e $1 ] && [ -s $1 ]
101 then 101 then
102 echo -e "$LORD\n" 102 echo -e "$LORD\n"
103 REP_OUT=$2/${1%.*} 103 REP_OUT=$2/${1%.*}
104 ${MAIN_SCRIPT_PATH}/FirstPass.sh ${RING} $1 $2 104 ${MAIN_SCRIPT_PATH}/FirstPass.sh ${RING} $1 $2
105 ${MAIN_SCRIPT_PATH}/SecondPass.sh ${RING} ${REP_OUT} 105 ${MAIN_SCRIPT_PATH}/SecondPass.sh ${RING} ${REP_OUT}
106 ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p2" 106 ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p2"
107 ${MAIN_SCRIPT_PATH}/ExploitConfPass.sh ${RING} ${REP_OUT} 107 ${MAIN_SCRIPT_PATH}/ExploitConfidencePass.sh ${RING} ${REP_OUT}
108 ${MAIN_SCRIPT_PATH}/ThirdPass.sh ${RING} ${REP_OUT} 108 ${MAIN_SCRIPT_PATH}/ThirdPass.sh ${RING} ${REP_OUT}
109 ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p3" 109 ${MAIN_SCRIPT_PATH}/ConfPass.sh ${RING} ${REP_OUT} "res_p3"
110 ${MAIN_SCRIPT_PATH}/RecomposePass.sh ${RING} ${REP_OUT} 110 ${MAIN_SCRIPT_PATH}/RecomposePass.sh ${RING} ${REP_OUT}
111 ${MAIN_SCRIPT_PATH}/ScoringRes.sh ${RING} ${REP_OUT} 111 ${MAIN_SCRIPT_PATH}/ScoringRes.sh ${RING} ${REP_OUT}
112 else 112 else
113 echo "can't find $1 OR file is empty" 113 echo "can't find $1 OR file is empty"
114 exit 1 114 exit 1
115 fi 115 fi
116 116
117 117
118 118
119 119
main_tools/SecondPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : SecondPass.sh # 4 # File : SecondPass.sh #
5 # Brief : Speaker adaptation + ASR second pass # 5 # Brief : Speaker adaptation + ASR second pass #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and Hugo Mauchrétien works) # 8 # and Hugo Mauchrétien works) #
9 # Version : 1.1 # 9 # Version : 1.1 #
10 # Date : 18/06/13 # 10 # Date : 18/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 # Check OTMEDIA_HOME env var 13 # Check OTMEDIA_HOME env var
14 if [ -z ${OTMEDIA_HOME} ] 14 if [ -z ${OTMEDIA_HOME} ]
15 then 15 then
16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 16 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
17 export OTMEDIA_HOME=$OTMEDIA_HOME 17 export OTMEDIA_HOME=$OTMEDIA_HOME
18 fi 18 fi
19 19
20 # where is SecondPass.sh 20 # where is SecondPass.sh
21 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 21 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
22 22
23 # Scripts Path 23 # Scripts Path
24 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 24 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
25 25
26 # Include scripts 26 # Include scripts
27 . $SCRIPT_PATH"/Tools.sh" 27 . $SCRIPT_PATH"/Tools.sh"
28 . $SCRIPT_PATH"/CheckSecondPass.sh" 28 . $SCRIPT_PATH"/CheckSecondPass.sh"
29 29
30 # where is SecondPass.cfg 30 # where is SecondPass.cfg
31 SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg" 31 SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg"
32 if [ -e $SECONDPASS_CONFIG_FILE ] 32 if [ -e $SECONDPASS_CONFIG_FILE ]
33 then 33 then
34 . $SECONDPASS_CONFIG_FILE 34 . $SECONDPASS_CONFIG_FILE
35 else 35 else
36 echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" >&2 36 echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" >&2
37 exit 1 37 exit 1
38 fi 38 fi
39 39
40 #---------------# 40 #---------------#
41 # Parse Options # 41 # Parse Options #
42 #---------------# 42 #---------------#
43 while getopts ":hDv:crf:" opt 43 while getopts ":hDv:crf:" opt
44 do 44 do
45 case $opt in 45 case $opt in
46 h) 46 h)
47 echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n" 47 echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n"
48 echo -e "\t Options:" 48 echo -e "\t Options:"
49 echo -e "\t\t-h :\tprint this message" 49 echo -e "\t\t-h :\tprint this message"
50 echo -e "\t\t-D :\tDEBUG mode on" 50 echo -e "\t\t-D :\tDEBUG mode on"
51 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 51 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
52 echo -e "\t\t-c :\t Check process, stop if error detected" 52 echo -e "\t\t-c :\t Check process, stop if error detected"
53 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" 53 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)"
54 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" 54 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done"
55 exit 1 55 exit 1
56 ;; 56 ;;
57 D) 57 D)
58 DEBUG=1 58 DEBUG=1
59 ;; 59 ;;
60 v) 60 v)
61 VERBOSE=$OPTARG 61 VERBOSE=$OPTARG
62 ;; 62 ;;
63 c) 63 c)
64 CHECK=1 64 CHECK=1
65 ;; 65 ;;
66 f) 66 f)
67 FORKS="--forks $OPTARG" 67 FORKS="--forks $OPTARG"
68 ;; 68 ;;
69 r) 69 r)
70 RERUN=1 70 RERUN=1
71 ;; 71 ;;
72 :) 72 :)
73 echo "Option -$OPTARG requires an argument." >&2 73 echo "Option -$OPTARG requires an argument." >&2
74 exit 1 74 exit 1
75 ;; 75 ;;
76 \?) 76 \?)
77 echo "BAD USAGE : unknow opton -$OPTARG" 77 echo "BAD USAGE : unknow opton -$OPTARG"
78 exit 1 78 exit 1
79 ;; 79 ;;
80 esac 80 esac
81 done 81 done
82 82
83 # mode debug enable 83 # mode debug enable
84 if [ $DEBUG -eq 1 ] 84 if [ $DEBUG -eq 1 ]
85 then 85 then
86 set -x 86 set -x
87 echo -e "## Mode DEBUG ON ##" 87 echo -e "## Mode DEBUG ON ##"
88 REDIRECTION_OUTPUT=""
89 else
90 REDIRECTION_OUTPUT=" 2> /dev/null"
88 fi 91 fi
89 92
90 # mode verbose enable 93 # mode verbose enable
91 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi 94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
92 95
93 # Check USAGE by arguments number 96 # Check USAGE by arguments number
94 if [ $(($#-($OPTIND-1))) -ne 1 ] 97 if [ $(($#-($OPTIND-1))) -ne 1 ]
95 then 98 then
96 echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>" 99 echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>"
97 echo "$0 -h for more info" 100 echo "$0 -h for more info"
98 exit 1 101 exit 1
99 fi 102 fi
100 103
101 shift $((OPTIND-1)) 104 shift $((OPTIND-1))
102 # check FirstPass directory - First argument 105 # check FirstPass directory - First argument
103 if [ -e $1 ] && [ -d $1 ] 106 if [ -e $1 ] && [ -d $1 ]
104 then 107 then
105 FIRSTPASS_DIR=$(readlink -e $1) 108 FIRSTPASS_DIR=$(readlink -e $1)
106 else 109 else
107 print_error "can't find $1 directory" 110 print_error "can't find $1 directory"
108 exit 1 111 exit 1
109 fi 112 fi
110 113
111 #-------------# 114 #-------------#
112 # GLOBAL VARS # 115 # GLOBAL VARS #
113 #-------------# 116 #-------------#
114 FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg" 117 FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg"
115 if [ -e $FIRSTPASS_CONFIG_FILE ] 118 if [ -e $FIRSTPASS_CONFIG_FILE ]
116 then 119 then
117 WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=") 120 WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=")
118 BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=") 121 BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=")
119 OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=") 122 OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=")
120 OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR 123 OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR
121 PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=") 124 PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=")
122 PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=") 125 PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=")
123 SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=") 126 SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=")
124 LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=") 127 LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=")
125 RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=") 128 RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=")
126 else 129 else
127 print_error "can't find $FIRSTPASS_CONFIG_FILE file" 130 print_error "can't find $FIRSTPASS_CONFIG_FILE file"
128 exit 1 131 exit 1
129 fi 132 fi
130 LST=$OUTPUT_DIR_BASENAME"/lists" 133 LST=$OUTPUT_DIR_BASENAME"/lists"
131 HMM=$OUTPUT_DIR_BASENAME"/hmm/" 134 HMM=$OUTPUT_DIR_BASENAME"/hmm/"
132 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2" 135 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2"
133 LOGFILE="$OUTPUT_DIR/info_p2.log" 136 LOGFILE="$OUTPUT_DIR/info_p2.log"
134 ERRORFILE="$OUTPUT_DIR/error_p2.log" 137 ERRORFILE="$OUTPUT_DIR/error_p2.log"
135 138
136 #------------------# 139 #------------------#
137 # Create WORKSPACE # 140 # Create WORKSPACE #
138 #------------------# 141 #------------------#
139 142
140 # Lock directory 143 # Lock directory
141 if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi 144 if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi
142 rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1 145 rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1
143 touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1 146 touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1
144 147
145 rm -r $LST > /dev/null 2>&1 148 rm -r $LST > /dev/null 2>&1
146 mkdir -p $LST 149 mkdir -p $LST
147 print_info "Make directory $LST" 1 150 print_info "Make directory $LST" 1
148 if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi 151 if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi
149 mkdir -p $HMM 152 mkdir -p $HMM
150 print_info "Make directory $HMM" 1 153 print_info "Make directory $HMM" 1
151 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi 154 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi
152 mkdir -p $RES_DIR 155 mkdir -p $RES_DIR > /dev/null 2>&1
153 print_info "Make directory $RES_DIR" 1 156 print_info "Make directory $RES_DIR" 1
154 157
155 #-------------------# 158 #-------------------#
156 # Check Pass # 159 # Check Pass #
157 #-------------------# 160 #-------------------#
158 print_info "Check Pass 2 directory" 1 161 print_info "Check Pass 2 directory" 1
159 for treil in $(ls $RES_DIR/ | grep treil) 162 for treil in $(ls $RES_DIR/ | grep treil)
160 do 163 do
161 if [ ! -s $RES_DIR/$treil ] 164 if [ ! -s $RES_DIR/$treil ]
162 then 165 then
163 bn = $(basename $treil ".treil") 166 bn = $(basename $treil ".treil")
164 rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null 167 rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null
165 print_info "$RES_DIR/$bn.* files deleted.." 2 168 print_info "$RES_DIR/$bn.* files deleted.." 2
166 fi 169 fi
167 done 170 done
168 171
169 # Check if more then 89% of treil are done 172 # Check if more then 89% of treil are done
170 nbres_p1=$(ls $RES_DIR_P1/*.res | wc -l) 173 nbres_p1=$(ls $RES_DIR_P1/*.res | wc -l)
171 nbtreil_p2=$(ls $RES_DIR/*.treil | wc -l) 174 nbtreil_p2=$(ls $RES_DIR/*.treil | wc -l)
172 if [ $nbres_p1 -gt 0 ] 175 if [ $nbres_p1 -gt 0 ]
173 then 176 then
174 pourcentage=$((($nbtreil_p2*100)/$nbres_p1)) 177 pourcentage=$((($nbtreil_p2*100)/$nbres_p1))
175 if [ $pourcentage -gt 89 ] 178 if [ $pourcentage -gt 89 ]
176 then 179 then
177 echo "Lattice already done, skipping $BASENAME" 180 echo "Lattice already done, skipping $BASENAME"
178 exit 0 181 exit 0
179 fi 182 fi
180 fi 183 fi
181 184
182 #--------------------# 185 #--------------------#
183 # Save configuration # 186 # Save configuration #
184 #--------------------# 187 #--------------------#
185 cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg 188 cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg
186 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 189 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
187 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 190 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
188 echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 191 echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
189 echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 192 echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
190 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 193 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
191 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 194 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
192 echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 195 echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
193 echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 196 echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
194 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg 197 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
195 print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 198 print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1
196 199
197 200
198 #--------------------------------------------------# 201 #--------------------------------------------------#
199 # Speaker Adaptation (AM) + Second pass (DECODING) # 202 # Speaker Adaptation (AM) + Second pass (DECODING) #
200 #--------------------------------------------------# 203 #--------------------------------------------------#
201 print_info "Launch Second Pass" 2 204 print_info "Launch Second Pass" 2
202 205
203 # for all speaker 206 # for all speaker
204 for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq) 207 for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq)
205 do 208 do
206 ## get seg file from P1 containing the speaker 209 ## get seg file from P1 containing the speaker
207 find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst 210 find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst
208 print_info "file for $speaker in $LST/$speaker.lst" 3 211 print_info "file for $speaker in $LST/$speaker.lst" 3
209 if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi 212 if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi
210 213
211 214
212 # for all AM 215 # for all AM
213 for (( i=0; $i<${#MTAG[@]} ; i++ )) 216 for (( i=0; $i<${#MTAG[@]} ; i++ ))
214 do 217 do
215 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then 218 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then
216 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") 219 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst")
217 ## if is the good AM for the speaker 220 ## if is the good AM for the speaker
218 if [ -n "$type" ] 221 if [ -n "$type" ]
219 then 222 then
220 ## HMM adaptation 223 ## HMM adaptation
221 if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) 224 if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] )
222 then 225 then
223 print_info "$SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3 226 print_info "$SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3
224 $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ 227 $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/ $REDIRECTION_OUTPUT
225 fi 228 fi
226 229
227 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] ) 230 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] )
228 then 231 then
229 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.hmm" >> $ERRORFILE 232 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.hmm" >> $ERRORFILE
230 #exit 1 233 #exit 1
231 fi 234 fi
232 235
233 ## cp map files 236 ## cp map files
234 cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map 237 cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map
235 238
236 ## class clustering 239 ## class clustering
237 if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )) 240 if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ))
238 then 241 then
239 print_info "$SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3 242 print_info "$SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3
240 $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls 243 $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls $REDIRECTION_OUTPUT
241 fi 244 fi
242 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ) 245 if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )
243 then 246 then
244 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.cls" >> $ERRORFILE 247 echo "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $HMM/$speaker.cls" >> $ERRORFILE
245 #exit 1 248 #exit 1
246 fi 249 fi
247 250
248 ## Speeral decoding 251 ## Speeral decoding
249 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] 252 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ]
250 then 253 then
251 print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 254 print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3
252 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock 255 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT
253 else 256 else
254 print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 257 print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2
255 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock 258 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT
256 fi 259 fi
257 260
258 if [ $CHECK -eq 1 ] 261 if [ $CHECK -eq 1 ]
259 then 262 then
260 check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" 263 check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR"
261 if [ $? -eq 1 ] 264 if [ $? -eq 1 ]
262 then 265 then
263 echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE 266 echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE
264 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 267 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
265 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE 268 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE
266 echo -e "] " >> $ERRORFILE 269 echo -e "] " >> $ERRORFILE
267 rm ${OUTPUT_DIR_BASENAME}/.tmp 270 rm ${OUTPUT_DIR_BASENAME}/.tmp
268 #exit 1 271 #exit 1
269 fi 272 fi
270 fi 273 fi
271 break 274 break
272 fi 275 fi
273 fi 276 fi
274 done 277 done
275 #rm "$HMM/$speaker.*" > /dev/null 2>&1 278 #rm "$HMM/$speaker.*" > /dev/null 2>&1
276 #rm "$LST/$speaker.lst" > /dev/null 2>&1 279 #rm "$LST/$speaker.lst" > /dev/null 2>&1
277 done 280 done
278 281
279 print_info "<= End $BASENAME P2 | $(date +'%d/%m/%y %H:%M:%S')" 1 282 print_info "<= End $BASENAME P2 | $(date +'%d/%m/%y %H:%M:%S')" 1
280 283
281 ## Check missing seg and log it 284 ## Check missing seg and log it
282 ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 285 ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
283 echo -e "$BASENAME P2 END\n[" >> $LOGFILE 286 echo -e "$BASENAME P2 END\n[" >> $LOGFILE
284 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE 287 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE
285 echo -e "] $BASENAME" >> $LOGFILE 288 echo -e "] $BASENAME" >> $LOGFILE
286 rm ${OUTPUT_DIR_BASENAME}/.tmp 289 rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1
287 290
288 #---------------# 291 #---------------#
289 # Convert res # 292 # Convert res #
290 #---------------# 293 #---------------#
291 294
292 # .res => .ctm 295 # .res => .ctm
293 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm 296 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm $REDIRECTION_OUTPUT
294 # .res => .trs 297 # .res => .trs
295 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 298 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
296 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 299 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT
297 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 300 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1
298 # .res => .txt 301 # .res => .txt
299 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt 302 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt $REDIRECTION_OUTPUT
300 303
301 304
302 # unlock directory 305 # unlock directory
303 mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" 306 mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock"
304 307
305 308
main_tools/ThirdPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : ThirdPass.sh # 4 # File : ThirdPass.sh #
5 # Brief : ASR third pass using trigg files # 5 # Brief : ASR third pass using trigg files #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # Version : 1.0 # 7 # Version : 1.0 #
8 # Date : 18/07/13 # 8 # Date : 18/07/13 #
9 ##################################################### 9 #####################################################
10 10
11 # Check OTMEDIA_HOME env var 11 # Check OTMEDIA_HOME env var
12 if [ -z ${OTMEDIA_HOME} ] 12 if [ -z ${OTMEDIA_HOME} ]
13 then 13 then
14 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 14 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
15 export OTMEDIA_HOME=$OTMEDIA_HOME 15 export OTMEDIA_HOME=$OTMEDIA_HOME
16 fi 16 fi
17 17
18 # where is SecondPass.sh 18 # where is SecondPass.sh
19 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 19 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
20 20
21 # Scripts Path 21 # Scripts Path
22 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 22 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
23 23
24 # Include scripts 24 # Include scripts
25 . $SCRIPT_PATH"/Tools.sh" 25 . $SCRIPT_PATH"/Tools.sh"
26 . $SCRIPT_PATH"/CheckThirdPass.sh" 26 . $SCRIPT_PATH"/CheckThirdPass.sh"
27 27
28 # where is ThirdPass.cfg 28 # where is ThirdPass.cfg
29 THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg" 29 THIRDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ThirdPass.cfg"
30 if [ -e $THIRDPASS_CONFIG_FILE ] 30 if [ -e $THIRDPASS_CONFIG_FILE ]
31 then 31 then
32 . $THIRDPASS_CONFIG_FILE 32 . $THIRDPASS_CONFIG_FILE
33 else 33 else
34 echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2 34 echo "ERROR : Can't find configuration file $THIRDPASS_CONFIG_FILE" >&2
35 exit 1 35 exit 1
36 fi 36 fi
37 37
38 #---------------# 38 #---------------#
39 # Parse Options # 39 # Parse Options #
40 #---------------# 40 #---------------#
41 while getopts ":hDv:crf:" opt 41 while getopts ":hDv:crf:" opt
42 do 42 do
43 case $opt in 43 case $opt in
44 h) 44 h)
45 echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n" 45 echo -e "$0 [OPTIONS] <PASS_DIRECTORY>\n"
46 echo -e "\t Options:" 46 echo -e "\t Options:"
47 echo -e "\t\t-h :\tprint this message" 47 echo -e "\t\t-h :\tprint this message"
48 echo -e "\t\t-D :\tDEBUG mode on" 48 echo -e "\t\t-D :\tDEBUG mode on"
49 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 49 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
50 echo -e "\t\t-c :\t Check process, stop if error detected" 50 echo -e "\t\t-c :\t Check process, stop if error detected"
51 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)" 51 echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)"
52 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done" 52 echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done"
53 exit 1 53 exit 1
54 ;; 54 ;;
55 D) 55 D)
56 DEBUG=1 56 DEBUG=1
57 ;; 57 ;;
58 v) 58 v)
59 VERBOSE=$OPTARG 59 VERBOSE=$OPTARG
60 ;; 60 ;;
61 c) 61 c)
62 CHECK=1 62 CHECK=1
63 ;; 63 ;;
64 f) 64 f)
65 FORKS="--forks $OPTARG" 65 FORKS="--forks $OPTARG"
66 ;; 66 ;;
67 r) 67 r)
68 RERUN=1 68 RERUN=1
69 ;; 69 ;;
70 :) 70 :)
71 echo "Option -$OPTARG requires an argument." >&2 71 echo "Option -$OPTARG requires an argument." >&2
72 exit 1 72 exit 1
73 ;; 73 ;;
74 \?) 74 \?)
75 echo "BAD USAGE : unknow opton -$OPTARG" 75 echo "BAD USAGE : unknow opton -$OPTARG"
76 #exit 1 76 #exit 1
77 ;; 77 ;;
78 esac 78 esac
79 done 79 done
80 80
81 # mode debug enable 81 # mode debug enable
82 if [ $DEBUG -eq 1 ] 82 if [ $DEBUG -eq 1 ]
83 then 83 then
84 set -x 84 set -x
85 echo -e "## Mode DEBUG ON ##" 85 echo -e "## Mode DEBUG ON ##"
86 REDIRECTION_OUTPUT=""
87 else
88 REDIRECTION_OUTPUT=" 2> /dev/null"
86 fi 89 fi
87 90
88 # mode verbose enable 91 # mode verbose enable
89 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi 92 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ; fi
90 93
91 # Check USAGE by arguments number 94 # Check USAGE by arguments number
92 if [ $(($#-($OPTIND-1))) -ne 1 ] 95 if [ $(($#-($OPTIND-1))) -ne 1 ]
93 then 96 then
94 echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>" 97 echo "BAD USAGE : ThirdPass.sh [OPTIONS] <PASS_DIR>"
95 echo "$0 -h for more info" 98 echo "$0 -h for more info"
96 exit 1 99 exit 1
97 fi 100 fi
98 101
99 shift $((OPTIND-1)) 102 shift $((OPTIND-1))
100 # check Pass directory - First argument 103 # check Pass directory - First argument
101 if [ -e $1 ] && [ -d $1 ] 104 if [ -e $1 ] && [ -d $1 ]
102 then 105 then
103 PASS_DIR=$(readlink -e $1) 106 PASS_DIR=$(readlink -e $1)
104 else 107 else
105 print_error "can't find $1 directory" 108 print_error "can't find $1 directory"
106 exit 1 109 exit 1
107 fi 110 fi
108 111
109 #-------------# 112 #-------------#
110 # GLOBAL VARS # 113 # GLOBAL VARS #
111 #-------------# 114 #-------------#
112 EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg" 115 EXPLOITCONFPASS_CONFIG_FILE="$PASS_DIR/ExploitConfPass.cfg"
113 if [ -e $EXPLOITCONFPASS_CONFIG_FILE ] 116 if [ -e $EXPLOITCONFPASS_CONFIG_FILE ]
114 then 117 then
115 TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=") 118 TRIGGER_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "TRIGGER_SPEERAL=" | cut -f2 -d"=")
116 LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=") 119 LEX_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_SPEERAL=" | cut -f2 -d"=")
117 LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=") 120 LEX_BINODE_SPEERAL=$(cat $EXPLOITCONFPASS_CONFIG_FILE | grep "LEX_BINODE_SPEERAL=" | cut -f2 -d"=")
118 LST="" 121 LST=""
119 PLP_DIR_P1="" 122 PLP_DIR_P1=""
120 HMM="" 123 HMM=""
121 else 124 else
122 print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file" 125 print_error "can't find $EXPLOITCONFPASS_CONFIG_FILE file"
123 #exit 1 126 #exit 1
124 TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/ 127 TRIGGER_SPEERAL=$PASS_DIR/trigg/speeral/
125 LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext 128 LEX_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext
126 LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin 129 LEX_BINODE_SPEERAL=$PASS_DIR/LEX/speeral/LEXIQUE_V6_ext.bin
127 fi 130 fi
128 SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg" 131 SECONDPASS_CONFIG_FILE="$PASS_DIR/SecondPass.cfg"
129 if [ -e $SECONDPASS_CONFIG_FILE ] 132 if [ -e $SECONDPASS_CONFIG_FILE ]
130 then 133 then
131 LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=") 134 LST=$(cat $SECONDPASS_CONFIG_FILE | grep "^LST=" | cut -f2 -d"=")
132 HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=") 135 HMM=$(cat $SECONDPASS_CONFIG_FILE | grep "^HMM=" | cut -f2 -d"=")
133 PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=") 136 PLP_DIR_P1=$(cat $SECONDPASS_CONFIG_FILE | grep "^PLP_DIR_P1=" | cut -f2 -d"=")
134 else 137 else
135 print_error "can't find $SECONDPASS_CONFIG_FILE file" 138 print_error "can't find $SECONDPASS_CONFIG_FILE file"
136 #exit 1 139 #exit 1
137 LST=$PASS_DIR/lists 140 LST=$PASS_DIR/lists
138 HMM=$PASS_DIR/hmm 141 HMM=$PASS_DIR/hmm
139 PLP_DIR_P1=$PASS_DIR/PLP 142 PLP_DIR_P1=$PASS_DIR/PLP
140 fi 143 fi
141 144
142 BASENAME=$(basename $PASS_DIR) 145 BASENAME=$(basename $PASS_DIR)
143 OUTPUT_DIR_BASENAME=$PASS_DIR 146 OUTPUT_DIR_BASENAME=$PASS_DIR
144 RES_DIR="$PASS_DIR/res_p3" 147 RES_DIR="$PASS_DIR/res_p3"
145 LOGFILE=$(dirname $PASS_DIR)"/info_p3.log" 148 LOGFILE=$(dirname $PASS_DIR)"/info_p3.log"
146 ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log" 149 ERRORFILE=$(dirname $PASS_DIR)"/error_p3.log"
147 150
148 #------------------# 151 #------------------#
149 # Create WORKSPACE # 152 # Create WORKSPACE #
150 #------------------# 153 #------------------#
151 154
152 # Lock directory 155 # Lock directory
153 if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi 156 if [ -e $OUTPUT_DIR_BASENAME/THIRDPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1;fi
154 rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1 157 rm "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" > /dev/null 2>&1
155 touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1 158 touch "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" > /dev/null 2>&1
156 159
157 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi 160 if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi
158 mkdir -p $RES_DIR 161 mkdir -p $RES_DIR
159 print_info "Make directory $RES_DIR" 1 162 print_info "Make directory $RES_DIR" 1
160 163
161 #--------------------# 164 #--------------------#
162 # Save configuration # 165 # Save configuration #
163 #--------------------# 166 #--------------------#
164 cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg 167 cp $THIRDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/ThirdPass.cfg
165 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg 168 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/ThirdPass.cfg
166 print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1 169 print_info "save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1
167 170
168 #--------------------------------------------------# 171 #--------------------------------------------------#
169 # Third Pass using trigger file (DECODING) # 172 # Third Pass using trigger file (DECODING) #
170 #--------------------------------------------------# 173 #--------------------------------------------------#
171 print_info "Launch Third Pass" 1 174 print_info "Launch Third Pass" 1
172 175
173 ## Generate speeral config file adding trigger rep 176 ## Generate speeral config file adding trigger rep
174 cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \ 177 cat $SPEERAL_CFG_PATH/$SPEERAL_CFG_FILE | sed -e "s|<nom>[^<]*</nom>|<nom>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext</nom>|g" \
175 | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \ 178 | sed -e "s|<binode>[^<]*</binode>|<binode>$OUTPUT_DIR_BASENAME/LEX/speeral/LEXIQUE_V6_ext.bin</binode>|g" \
176 | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml 179 | sed -e "s|<trigger><dir>[^<]*</dir></trigger>|<trigger><dir>$TRIGGER_SPEERAL</dir></trigger>|g" > $OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml
177 SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml 180 SPEERAL_THIRD_CFG=$OUTPUT_DIR_BASENAME/SpeeralThirdPass.xml
178 181
179 # for all speaker 182 # for all speaker
180 for lspeaker in $(ls $LST/*.lst) 183 for lspeaker in $(ls $LST/*.lst)
181 do 184 do
182 speaker=$(basename $lspeaker ".lst") 185 speaker=$(basename $lspeaker ".lst")
183 # for all AM 186 # for all AM
184 for (( i=0; $i<${#MTAG[@]} ; i++ )) 187 for (( i=0; $i<${#MTAG[@]} ; i++ ))
185 do 188 do
186 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then 189 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then
187 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst") 190 type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst")
188 ## if is the good AM for the speaker 191 ## if is the good AM for the speaker
189 if [ -n "$type" ] 192 if [ -n "$type" ]
190 then 193 then
191 ## Speeral decoding 194 ## Speeral decoding
192 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ] 195 if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ]
193 then 196 then
194 print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3 197 print_info "$SPEERAL_BIN $LST/$speaker.lst $RES_DIR $SPEERAL_THIRD_CFG -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT" 3
195 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock 198 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock $REDIRECTION_OUTPUT
196 else 199 else
197 print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2 200 print_warn "$HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2
198 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock 201 $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_THIRD_CFG} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock $REDIRECTION_OUTPUT
199 fi 202 fi
200 203
201 if [ $CHECK -eq 1 ] 204 if [ $CHECK -eq 1 ]
202 then 205 then
203 check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR" 206 check_third_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR"
204 if [ $? -eq 1 ] 207 if [ $? -eq 1 ]
205 then 208 then
206 echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE 209 echo -e "ERROR : Speeral $LST/$speaker.lst\n[" >> $ERRORFILE
207 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 210 ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
208 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE 211 diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp >> $ERRORFILE
209 echo -e "] " >> $ERRORFILE 212 echo -e "] " >> $ERRORFILE
210 rm ${OUTPUT_DIR_BASENAME}/.tmp 213 rm ${OUTPUT_DIR_BASENAME}/.tmp
211 #exit 1 214 #exit 1
212 fi 215 fi
213 fi 216 fi
214 break 217 break
215 fi 218 fi
216 fi 219 fi
217 done 220 done
218 done 221 done
219 222
220 ## Check missing seg and log it 223 ## Check missing seg and log it
221 ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 224 ls $RES_DIR/*.res | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.res//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
222 echo -e "$BASENAME P3 END\n[" >> $LOGFILE 225 echo -e "$BASENAME P3 END\n[" >> $LOGFILE
223 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE 226 diff ${OUTPUT_DIR_BASENAME}/plp.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $LOGFILE
224 echo -e "] $BASENAME" >> $LOGFILE 227 echo -e "] $BASENAME" >> $LOGFILE
225 rm ${OUTPUT_DIR_BASENAME}/.tmp 228 rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1
226 229
227 #---------------# 230 #---------------#
228 # Convert res # 231 # Convert res #
229 #---------------# 232 #---------------#
230 233
231 # .res => .ctm 234 # .res => .ctm
232 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm 235 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.ctm $REDIRECTION_OUTPUT
233 # .res => .trs 236 # .res => .trs
234 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 237 echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
235 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 238 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg $REDIRECTION_OUTPUT
236 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg 239 rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1
237 # .res => .txt 240 # .res => .txt
238 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt 241 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.3pass.txt $REDIRECTION_OUTPUT
239 242
240 243
241 print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1 244 print_info "<= End $BASENAME P3 | $(date +'%d/%m/%y %H:%M:%S')" 1
242 245
243 # unlock directory 246 # unlock directory
244 mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock" 247 mv "$OUTPUT_DIR_BASENAME/THIRDPASS.lock" "$OUTPUT_DIR_BASENAME/THIRDPASS.unlock"
245 248
246 249
tools/scripts/ExtractAudioFromTV.sh
File was created 1 #!/bin/bash
2
3 # File : ExtractAudioFromTV.sh
4 # Brief : extract audio file and subtitle from TV corpus
5 # version 1.0
6 #
7
8 ### ExtractAudioFromCorpus.sh INA-TV2 MYTV
9
10 if [ $(($#-($OPTIND-1))) -ne 2 ]
11 then
12 echo "BAD USAGE : $0 <INPUT_DIRECTORY> <OUTPUT_DIRECTORY>"
13 exit 1
14 fi
15
16 if [ -e $1 ]
17 then
18 IN=$(readlink -e $1)
19 else
20 echo "ERROR : Can't read input $1"
21 exit 1
22 fi
23
24 if [ -e $2 ]
25 then
26 OUT=$(readlink -e $2)
27 else
28 echo "ERROR : Can't read OUTPUT $2"
29 exit 1
30 fi
31
32
33 for f in `ls $IN`
34 do
35 channel=`echo $f | sed -re 's/NAS_//'`
36 mkdir $OUT/$channel
37 cd $f
38 for d in `ls $f`
39 do
40 for file in `ls ${f}/${d} | grep MP4`
41 do
42 avconv -i ${IN}/${f}/${d}/${file} -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 ${OUT}/${channel}/${d}_$(basename ${file} .MP4).wav
43 if [ -e ${IN}/${f}/${d}/$(basename ${file} .MP4).SRT ]
44 then
45 cp ${IN}/${f}/${d}/$(basename ${file} .MP4).SRT ${OUT}/${channel}/${d}_$(basename ${file} .MP4).SRT
46 fi
47 done
48 done
49 done
50
51