Commit eaad729ec536752c7df3d1ca5f909dbb590cda8f

Authored by Jean-François Rey
1 parent 6a0f2780d0
Exists in master

update

Showing 1 changed file with 2 additions and 0 deletions Inline Diff

main_tools/FirstPass.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 ##################################################### 3 #####################################################
4 # File : FirstPass.sh # 4 # File : FirstPass.sh #
5 # Brief : ASR first pass and speaker diarization # 5 # Brief : ASR first pass and speaker diarization #
6 # Author : Jean-François Rey # 6 # Author : Jean-François Rey #
7 # (base on Emmanuel Ferreira # 7 # (base on Emmanuel Ferreira #
8 # and Hugo Mauchrétien works) # 8 # and Hugo Mauchrétien works) #
9 # Version : 1.1 # 9 # Version : 1.1 #
10 # Date : 18/06/13 # 10 # Date : 18/06/13 #
11 ##################################################### 11 #####################################################
12 12
13 echo "### FirstPass.sh ###" 13 echo "### FirstPass.sh ###"
14 14
15 # Check OTMEDIA_HOME env var 15 # Check OTMEDIA_HOME env var
16 if [ -z ${OTMEDIA_HOME} ] 16 if [ -z ${OTMEDIA_HOME} ]
17 then 17 then
18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) 18 OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
19 export OTMEDIA_HOME=$OTMEDIA_HOME 19 export OTMEDIA_HOME=$OTMEDIA_HOME
20 fi 20 fi
21 21
22 # where is FirstPass.sh 22 # where is FirstPass.sh
23 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) 23 MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))
24 24
25 # scripts path 25 # scripts path
26 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts 26 SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts
27 27
28 # Include scripts 28 # Include scripts
29 . $SCRIPT_PATH"/Tools.sh" 29 . $SCRIPT_PATH"/Tools.sh"
30 . $SCRIPT_PATH"/CheckFirstPass.sh" 30 . $SCRIPT_PATH"/CheckFirstPass.sh"
31 31
32 # where is FirstPass.cfg 32 # where is FirstPass.cfg
33 FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" 33 FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg"
34 if [ -e $FIRSTPASS_CONFIG_FILE ] 34 if [ -e $FIRSTPASS_CONFIG_FILE ]
35 then 35 then
36 . $FIRSTPASS_CONFIG_FILE 36 . $FIRSTPASS_CONFIG_FILE
37 else 37 else
38 echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2 38 echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2
39 echo "exit" >&2 39 echo "exit" >&2
40 exit 1 40 exit 1
41 fi 41 fi
42 42
43 #---------------# 43 #---------------#
44 # Parse Options # 44 # Parse Options #
45 #---------------# 45 #---------------#
46 while getopts ":hDv:cf:r" opt 46 while getopts ":hDv:cf:r" opt
47 do 47 do
48 case $opt in 48 case $opt in
49 h) 49 h)
50 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" 50 echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n"
51 echo -e "\t Options:" 51 echo -e "\t Options:"
52 echo -e "\t\t-h :\tprint this message" 52 echo -e "\t\t-h :\tprint this message"
53 echo -e "\t\t-D :\tDEBUG mode on" 53 echo -e "\t\t-D :\tDEBUG mode on"
54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" 54 echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
55 echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected" 55 echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected"
56 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" 56 echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)"
57 echo -e "\t\t-r :\tforce rerun the wav file" 57 echo -e "\t\t-r :\tforce rerun the wav file"
58 exit 1 58 exit 1
59 ;; 59 ;;
60 D) 60 D)
61 DEBUG=1 61 DEBUG=1
62 ;; 62 ;;
63 v) 63 v)
64 VERBOSE=$OPTARG 64 VERBOSE=$OPTARG
65 ;; 65 ;;
66 c) 66 c)
67 CHECK=1 67 CHECK=1
68 ;; 68 ;;
69 f) 69 f)
70 FORKS="--forks $OPTARG" 70 FORKS="--forks $OPTARG"
71 ;; 71 ;;
72 r) 72 r)
73 RERUN=1 73 RERUN=1
74 ;; 74 ;;
75 :) 75 :)
76 echo "Option -$OPTARG requires an argument." >&2 76 echo "Option -$OPTARG requires an argument." >&2
77 exit 1 77 exit 1
78 ;; 78 ;;
79 \?) 79 \?)
80 echo "BAD USAGE : unknow opton -$OPTARG" 80 echo "BAD USAGE : unknow opton -$OPTARG"
81 exit 1 81 exit 1
82 ;; 82 ;;
83 esac 83 esac
84 done 84 done
85 85
86 # mode debug enable 86 # mode debug enable
87 if [ $DEBUG -eq 1 ] 87 if [ $DEBUG -eq 1 ]
88 then 88 then
89 set -x 89 set -x
90 echo -e "## Mode DEBUG ON ##" 90 echo -e "## Mode DEBUG ON ##"
91 fi 91 fi
92 92
93 # mode verbose enable 93 # mode verbose enable
94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi 94 if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi
95 95
96 # Check USAGE by arguments number 96 # Check USAGE by arguments number
97 if [ $(($#-($OPTIND-1))) -ne 2 ] 97 if [ $(($#-($OPTIND-1))) -ne 2 ]
98 then 98 then
99 echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" 99 echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>"
100 echo "$0 -h for more info" 100 echo "$0 -h for more info"
101 exit 1 101 exit 1
102 fi 102 fi
103 103
104 shift $((OPTIND-1)) 104 shift $((OPTIND-1))
105 # check audio file - First argument 105 # check audio file - First argument
106 if [ -e $1 ] && [ -s $1 ] 106 if [ -e $1 ] && [ -s $1 ]
107 then 107 then
108 # absolute path to wav file 108 # absolute path to wav file
109 WAV_FILE=$(readlink -e $1) 109 WAV_FILE=$(readlink -e $1)
110 # wav filename 110 # wav filename
111 FILENAME=$(basename $WAV_FILE) 111 FILENAME=$(basename $WAV_FILE)
112 # wav filename without extension 112 # wav filename without extension
113 BASENAME=${FILENAME%.*} 113 BASENAME=${FILENAME%.*}
114 114
115 print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1 115 print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1
116 print_info "[${BASENAME}] $WAV_FILE OK" 2 116 print_info "[${BASENAME}] $WAV_FILE OK" 2
117 else 117 else
118 print_error " Can't find $1 OR file is empty" 118 print_error " Can't find $1 OR file is empty"
119 exit 1 119 exit 1
120 fi 120 fi
121 121
122 # check output directory - Second argument 122 # check output directory - Second argument
123 if [ ! -e $2 ] 123 if [ ! -e $2 ]
124 then 124 then
125 mkdir -p $2 125 mkdir -p $2
126 print_info "[${BASENAME}] Make directory $2" 2 126 print_info "[${BASENAME}] Make directory $2" 2
127 fi 127 fi
128 128
129 129
130 #-------------# 130 #-------------#
131 # GLOBAL VARS # 131 # GLOBAL VARS #
132 #-------------# 132 #-------------#
133 OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path 133 OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path
134 OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME 134 OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME
135 PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file 135 PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file
136 PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory 136 PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory
137 PLP_LIST="$OUTPUT_DIR_BASENAME/plp.lst" # list of plp files 137 PLP_LIST="$OUTPUT_DIR_BASENAME/plp.lst" # list of plp files
138 SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file 138 SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file
139 LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file 139 LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file
140 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" 140 RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1"
141 LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log" 141 LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log"
142 ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log" 142 ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log"
143 143
144 #------------------# 144 #------------------#
145 # Create WORKSPACE # 145 # Create WORKSPACE #
146 #------------------# 146 #------------------#
147 if [ ! -e $OUTPUT_DIR_BASENAME ] 147 if [ ! -e $OUTPUT_DIR_BASENAME ]
148 then 148 then
149 mkdir -p $OUTPUT_DIR_BASENAME 149 mkdir -p $OUTPUT_DIR_BASENAME
150 print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2 150 print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2
151 fi 151 fi
152 152
153 # Lock directory 153 # Lock directory
154 if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi 154 if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi
155 rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 155 rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1
156 touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 156 touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1
157 157
158 rm -r $PLP_DIR > /dev/null 2>&1; 158 rm -r $PLP_DIR > /dev/null 2>&1;
159 mkdir -p $PLP_DIR 159 mkdir -p $PLP_DIR
160 print_info "[${BASENAME}] Make directory $PLP_DIR" 2 160 print_info "[${BASENAME}] Make directory $PLP_DIR" 2
161 if [ $RERUN -eq 0 ]; 161 if [ $RERUN -eq 0 ];
162 then 162 then
163 rm -r $RES_DIR > /dev/null 2>&1; 163 rm -r $RES_DIR > /dev/null 2>&1;
164 else 164 else
165 rm $RES_DIR/*.lock > /dev/null 2>&1 165 rm $RES_DIR/*.lock > /dev/null 2>&1
166 fi 166 fi
167 mkdir -p $RES_DIR > /dev/null 2>&1 167 mkdir -p $RES_DIR > /dev/null 2>&1
168 print_info "[${BASENAME}] Make directory $RES_DIR" 2 168 print_info "[${BASENAME}] Make directory $RES_DIR" 2
169 rm $LOGFILE $ERRORFILE > /dev/null 2>&1 169 rm $LOGFILE $ERRORFILE > /dev/null 2>&1
170 170
171 #--------------------# 171 #--------------------#
172 # Save configuration # 172 # Save configuration #
173 #--------------------# 173 #--------------------#
174 cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg 174 cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg
175 echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 175 echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
176 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 176 echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
177 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 177 echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
178 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 178 echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
179 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 179 echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
180 echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 180 echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
181 echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 181 echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
182 echo "PLP_LIST=$PLP_LIST" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 182 echo "PLP_LIST=$PLP_LIST" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
183 echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 183 echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
184 echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 184 echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
185 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg 185 echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg
186 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 186 print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1
187 187
188 #-------------------------# 188 #-------------------------#
189 # Check Audio File Format # 189 # Check Audio File Format #
190 #-------------------------# 190 #-------------------------#
191 error=0 191 error=0
192 temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") 192 temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz")
193 if [ -z "$temp" ]; then error=1; fi 193 if [ -z "$temp" ]; then error=1; fi
194 temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") 194 temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels")
195 if [ -z "$temp" ]; then error=1; fi 195 if [ -z "$temp" ]; then error=1; fi
196 temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") 196 temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16")
197 if [ -z "$temp" ]; then error=1; fi 197 if [ -z "$temp" ]; then error=1; fi
198 198
199 if [ $error -eq 1 ] 199 if [ $error -eq 1 ]
200 then 200 then
201 print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" 201 print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert"
202 print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" 202 print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav"
203 avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav 203 avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav
204 WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav 204 WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav
205 FILENAME=$BASENAME.wav 205 FILENAME=$BASENAME.wav
206 print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE" 206 print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE"
207 fi 207 fi
208 208
209 #---------------# 209 #---------------#
210 # Get SRT file # 210 # Get SRT file #
211 #---------------# 211 #---------------#
212 if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] 212 if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ]
213 then 213 then
214 cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT 214 cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT
215 print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3 215 print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3
216 fi 216 fi
217 217
218 #------------# 218 #------------#
219 # WAV -> PLP # 219 # WAV -> PLP #
220 #------------# 220 #------------#
221 print_info "[${BASENAME}] convert WAV -> PLP" 1 221 print_info "[${BASENAME}] convert WAV -> PLP" 1
222 echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp 222 echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp
223 print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3 223 print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3
224 224
225 $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms 225 $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms
226 226
227 if [ $CHECK -eq 1 ] 227 if [ $CHECK -eq 1 ]
228 then 228 then
229 check_first_pass_plp "$PLP_FILE" 229 check_first_pass_plp "$PLP_FILE"
230 if [ $? -eq 1 ] 230 if [ $? -eq 1 ]
231 then 231 then
232 print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE" 232 print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE"
233 print_error "[${BASENAME}] -> exit, Check $ERRORFILE file" 233 print_error "[${BASENAME}] -> exit, Check $ERRORFILE file"
234 exit 1 234 exit 1
235 fi 235 fi
236 fi 236 fi
237 237
238 rm $OUTPUT_DIR_BASENAME/list.tmp 2> /dev/null 238 rm $OUTPUT_DIR_BASENAME/list.tmp 2> /dev/null
239 239
240 #------------------------------# 240 #------------------------------#
241 # S/NS + SPEAKERS SEGMENTATION # 241 # S/NS + SPEAKERS SEGMENTATION #
242 #------------------------------# 242 #------------------------------#
243 print_info "[${BASENAME}] Launch speakers diarization" 1 243 print_info "[${BASENAME}] Launch speakers diarization" 1
244 # Calcul seg file 244 # Calcul seg file
245 print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3 245 print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3
246 #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME 246 #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME
247 java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering 247 java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering
248 248
249 if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) 249 if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] )
250 then 250 then
251 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE" 251 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE"
252 print_error "[${BASENAME}] Check $ERRORFILE file" 252 print_error "[${BASENAME}] Check $ERRORFILE file"
253 exit 1 253 exit 1
254 fi 254 fi
255 255
256 256
257 # Create LBL file 257 # Create LBL file
258 print_info "Extract LBL file from SEG file" 1 258 print_info "Extract LBL file from SEG file" 1
259 259
260 cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE 260 cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE
261 261
262 if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) 262 if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] )
263 then 263 then
264 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE" 264 print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE"
265 print_error "[${BASENAME}] Check $ERRORFILE file" 265 print_error "[${BASENAME}] Check $ERRORFILE file"
266 exit 1 266 exit 1
267 fi 267 fi
268 268
269 269
270 #----------------------------------------------------# 270 #----------------------------------------------------#
271 # Cut global PLP file depending to LBL segmentations # 271 # Cut global PLP file depending to LBL segmentations #
272 #----------------------------------------------------# 272 #----------------------------------------------------#
273 print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1 273 print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1
274 print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3 274 print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3
275 275
276 $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG 276 $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG
277 277
278 if [ $CHECK -eq 1 ] 278 if [ $CHECK -eq 1 ]
279 then 279 then
280 check_first_pass_plps_lbl $PLP_DIR $LBL_FILE 280 check_first_pass_plps_lbl $PLP_DIR $LBL_FILE
281 if [ $? -eq 1 ] 281 if [ $? -eq 1 ]
282 then 282 then
283 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files" 283 print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files"
284 print_error "[${BASENAME}] Check $ERRORFILE file" 284 print_error "[${BASENAME}] Check $ERRORFILE file"
285 exit 1 285 exit 1
286 fi 286 fi
287 fi 287 fi
288 288
289 # change plp files names 289 # change plp files names
290 cd $PLP_DIR; 290 cd $PLP_DIR;
291 rename -f s/_/#/g *plp 291 rename -f s/_/#/g *plp
292 rename -f s/#/_/ *plp 292 rename -f s/#/_/ *plp
293 cd $OLDPWD 293 cd $OLDPWD
294 294
295 #---------------------------------------------# 295 #---------------------------------------------#
296 # PLP files list depending to acoustic models # 296 # PLP files list depending to acoustic models #
297 #---------------------------------------------# 297 #---------------------------------------------#
298 print_info "[${BASENAME}] Create PLP list depending of the model" 1 298 print_info "[${BASENAME}] Create PLP list depending of the model" 1
299 # Create a list of plp files 299 # Create a list of plp files
300 find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $PLP_LIST 300 find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $PLP_LIST
301 301
302 rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 302 rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1
303 for (( i=0; $i<${#MTAG[@]} ; i++ )) 303 for (( i=0; $i<${#MTAG[@]} ; i++ ))
304 do 304 do
305 a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` 305 a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst`
306 if [ -n "$a" ]; then 306 if [ -n "$a" ]; then
307 print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 307 print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3
308 grep -e "${MTAG[$i]}" $PLP_LIST | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 308 grep -e "${MTAG[$i]}" $PLP_LIST | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
309 fi 309 fi
310 done 310 done
311 311
312 #-----------------------# 312 #-----------------------#
313 # First Pass (DECODING) # 313 # First Pass (DECODING) #
314 #-----------------------# 314 #-----------------------#
315 # 315 #
316 # For all AM do decoding 316 # For all AM do decoding
317 # if Check error -> iter on undone decoding (max 1 times) 317 # if Check error -> iter on undone decoding (max 1 times)
318 # 318 #
319 print_info "[${BASENAME}] Launch decoding" 1 319 print_info "[${BASENAME}] Launch decoding" 1
320 for (( i=0; $i<${#MTAG[@]} ; i++ )) 320 for (( i=0; $i<${#MTAG[@]} ; i++ ))
321 do 321 do
322 redo=1; # nb of try if not all segs is decoded 322 redo=1; # nb of try if not all segs is decoded
323 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] 323 if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]
324 then 324 then
325 todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 325 todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
326 while [ $redo -gt 0 ]; do 326 while [ $redo -gt 0 ]; do
327 rm $RES_DIR/*.lock > /dev/null 2>&1 327 rm $RES_DIR/*.lock > /dev/null 2>&1
328 print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 328 print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3
329 # Run speeral 329 # Run speeral
330 $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock 330 $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock
331 331
332 # Check if error 332 # Check if error
333 if [ $CHECK -eq 1 ] 333 if [ $CHECK -eq 1 ]
334 then 334 then
335 check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" 335 check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR"
336 # if error 336 # if error
337 if [ $? -eq 1 ] 337 if [ $? -eq 1 ]
338 then 338 then
339 # rerun 339 # rerun
340 redo=$(($redo - 1)); 340 redo=$(($redo - 1));
341 print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 341 print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2
342 print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" 342 print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst"
343 # new plp list 343 # new plp list
344 # list .seg done and compare to list of seg to do 344 # list .seg done and compare to list of seg to do
345 ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 345 ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
346 diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst 346 diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst
347 rm ${OUTPUT_DIR_BASENAME}/.tmp 347 rm ${OUTPUT_DIR_BASENAME}/.tmp
348 # log seg to do 348 # log seg to do
349 print_log_file $LOGFILE "Segs not done [" 349 print_log_file $LOGFILE "Segs not done ["
350 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE 350 cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE
351 todo=${OUTPUT_DIR_BASENAME}/todo.lst 351 todo=${OUTPUT_DIR_BASENAME}/todo.lst
352 print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" 352 print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]"
353 print_warn "[${BASENAME}] Try $redo" 3 353 print_warn "[${BASENAME}] Try $redo" 3
354 else
355 redo=-1
354 fi 356 fi
355 else 357 else
356 redo=-1 358 redo=-1
357 fi 359 fi
358 done 360 done
359 rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 361 rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1
360 #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst 362 #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst
361 rm $RES_DIR/*.lock > /dev/null 2>&1 363 rm $RES_DIR/*.lock > /dev/null 2>&1
362 fi 364 fi
363 done 365 done
364 366
365 ## Check missing seg and log it 367 ## Check missing seg and log it
366 if [ "$CHECK" -eq 1 ] 368 if [ "$CHECK" -eq 1 ]
367 then 369 then
368 ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp 370 ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
369 todo=$(cat ${PLP_LIST} 2> /dev/null | wc -l) 371 todo=$(cat ${PLP_LIST} 2> /dev/null | wc -l)
370 if [ "$todo" -eq 0 ]; then todo=1;fi 372 if [ "$todo" -eq 0 ]; then todo=1;fi
371 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) 373 notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l)))
372 pourcentage=$((($notdone*100)/$todo)) 374 pourcentage=$((($notdone*100)/$todo))
373 375
374 if [ "$notdone" -ne 0 ] 376 if [ "$notdone" -ne 0 ]
375 then 377 then
376 print_error "[${BASENAME}] Check ${ERRORFILE}" 378 print_error "[${BASENAME}] Check ${ERRORFILE}"
377 print_log_file "${ERRORFILE}" "ERROR : Segs not done [" 379 print_log_file "${ERRORFILE}" "ERROR : Segs not done ["
378 diff ${PLP_LIST} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> ${ERRORFILE} 380 diff ${PLP_LIST} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> ${ERRORFILE}
379 print_log_file "${ERRORFILE}" "] $pourcentage% $BASENAME" 381 print_log_file "${ERRORFILE}" "] $pourcentage% $BASENAME"
380 else 382 else
381 print_log_file "$LOGFILE" "P1 OK ${BASENAME} | $(date +'%d/%m/%y %H:%M:%S')" 383 print_log_file "$LOGFILE" "P1 OK ${BASENAME} | $(date +'%d/%m/%y %H:%M:%S')"
382 fi 384 fi
383 rm ${OUTPUT_DIR_BASENAME}/.tmp 385 rm ${OUTPUT_DIR_BASENAME}/.tmp
384 fi 386 fi
385 387
386 #---------------# 388 #---------------#
387 # Convert res # 389 # Convert res #
388 #---------------# 390 #---------------#
389 print_info "[${BASENAME}] Convert .res into .ctm" 1 391 print_info "[${BASENAME}] Convert .res into .ctm" 1
390 # .res => .ctm 392 # .res => .ctm
391 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm 393 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm
392 print_info "[${BASENAME}] Convert .res into .trs" 1 394 print_info "[${BASENAME}] Convert .res into .trs" 1
393 # .res => .trs 395 # .res => .trs
394 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 396 echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg
395 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 397 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg
396 rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null 398 rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null
397 print_info "[${BASENAME}] Convert .res into .txt" 1 399 print_info "[${BASENAME}] Convert .res into .txt" 1
398 # .res => .txt 400 # .res => .txt
399 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt 401 $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt
400 402
401 print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1 403 print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1
402 # unlock directory 404 # unlock directory
403 mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" 405 mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock"
404 406