Commit 6a0f2780d092c628b7c463ace02dfeaa7aa75f12
1 parent
7f1c831f7a
Exists in
master
:D bugfix
Showing 1 changed file with 2 additions and 0 deletions Inline Diff
main_tools/FirstPass.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | 2 | ||
3 | ##################################################### | 3 | ##################################################### |
4 | # File : FirstPass.sh # | 4 | # File : FirstPass.sh # |
5 | # Brief : ASR first pass and speaker diarization # | 5 | # Brief : ASR first pass and speaker diarization # |
6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
8 | # and Hugo Mauchrétien works) # | 8 | # and Hugo Mauchrétien works) # |
9 | # Version : 1.1 # | 9 | # Version : 1.1 # |
10 | # Date : 18/06/13 # | 10 | # Date : 18/06/13 # |
11 | ##################################################### | 11 | ##################################################### |
12 | 12 | ||
13 | echo "### FirstPass.sh ###" | 13 | echo "### FirstPass.sh ###" |
14 | 14 | ||
15 | # Check OTMEDIA_HOME env var | 15 | # Check OTMEDIA_HOME env var |
16 | if [ -z ${OTMEDIA_HOME} ] | 16 | if [ -z ${OTMEDIA_HOME} ] |
17 | then | 17 | then |
18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
19 | export OTMEDIA_HOME=$OTMEDIA_HOME | 19 | export OTMEDIA_HOME=$OTMEDIA_HOME |
20 | fi | 20 | fi |
21 | 21 | ||
22 | # where is FirstPass.sh | 22 | # where is FirstPass.sh |
23 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 23 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
24 | 24 | ||
25 | # scripts path | 25 | # scripts path |
26 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 26 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
27 | 27 | ||
28 | # Include scripts | 28 | # Include scripts |
29 | . $SCRIPT_PATH"/Tools.sh" | 29 | . $SCRIPT_PATH"/Tools.sh" |
30 | . $SCRIPT_PATH"/CheckFirstPass.sh" | 30 | . $SCRIPT_PATH"/CheckFirstPass.sh" |
31 | 31 | ||
32 | # where is FirstPass.cfg | 32 | # where is FirstPass.cfg |
33 | FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" | 33 | FIRSTPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/FirstPass.cfg" |
34 | if [ -e $FIRSTPASS_CONFIG_FILE ] | 34 | if [ -e $FIRSTPASS_CONFIG_FILE ] |
35 | then | 35 | then |
36 | . $FIRSTPASS_CONFIG_FILE | 36 | . $FIRSTPASS_CONFIG_FILE |
37 | else | 37 | else |
38 | echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2 | 38 | echo "ERROR : Can't find configuration file $FIRSTPASS_CONFIG_FILE" >&2 |
39 | echo "exit" >&2 | 39 | echo "exit" >&2 |
40 | exit 1 | 40 | exit 1 |
41 | fi | 41 | fi |
42 | 42 | ||
43 | #---------------# | 43 | #---------------# |
44 | # Parse Options # | 44 | # Parse Options # |
45 | #---------------# | 45 | #---------------# |
46 | while getopts ":hDv:cf:r" opt | 46 | while getopts ":hDv:cf:r" opt |
47 | do | 47 | do |
48 | case $opt in | 48 | case $opt in |
49 | h) | 49 | h) |
50 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" | 50 | echo -e "$0 [OPTIONS] <WAV_FILE> <OUTPUT_DIRECTORY>\n" |
51 | echo -e "\t Options:" | 51 | echo -e "\t Options:" |
52 | echo -e "\t\t-h :\tprint this message" | 52 | echo -e "\t\t-h :\tprint this message" |
53 | echo -e "\t\t-D :\tDEBUG mode on" | 53 | echo -e "\t\t-D :\tDEBUG mode on" |
54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
55 | echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected" | 55 | echo -e "\t\t-c :\tCheck process, and log it into files, can stop if error detected" |
56 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 56 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
57 | echo -e "\t\t-r :\tforce rerun the wav file" | 57 | echo -e "\t\t-r :\tforce rerun the wav file" |
58 | exit 1 | 58 | exit 1 |
59 | ;; | 59 | ;; |
60 | D) | 60 | D) |
61 | DEBUG=1 | 61 | DEBUG=1 |
62 | ;; | 62 | ;; |
63 | v) | 63 | v) |
64 | VERBOSE=$OPTARG | 64 | VERBOSE=$OPTARG |
65 | ;; | 65 | ;; |
66 | c) | 66 | c) |
67 | CHECK=1 | 67 | CHECK=1 |
68 | ;; | 68 | ;; |
69 | f) | 69 | f) |
70 | FORKS="--forks $OPTARG" | 70 | FORKS="--forks $OPTARG" |
71 | ;; | 71 | ;; |
72 | r) | 72 | r) |
73 | RERUN=1 | 73 | RERUN=1 |
74 | ;; | 74 | ;; |
75 | :) | 75 | :) |
76 | echo "Option -$OPTARG requires an argument." >&2 | 76 | echo "Option -$OPTARG requires an argument." >&2 |
77 | exit 1 | 77 | exit 1 |
78 | ;; | 78 | ;; |
79 | \?) | 79 | \?) |
80 | echo "BAD USAGE : unknow opton -$OPTARG" | 80 | echo "BAD USAGE : unknow opton -$OPTARG" |
81 | exit 1 | 81 | exit 1 |
82 | ;; | 82 | ;; |
83 | esac | 83 | esac |
84 | done | 84 | done |
85 | 85 | ||
86 | # mode debug enable | 86 | # mode debug enable |
87 | if [ $DEBUG -eq 1 ] | 87 | if [ $DEBUG -eq 1 ] |
88 | then | 88 | then |
89 | set -x | 89 | set -x |
90 | echo -e "## Mode DEBUG ON ##" | 90 | echo -e "## Mode DEBUG ON ##" |
91 | fi | 91 | fi |
92 | 92 | ||
93 | # mode verbose enable | 93 | # mode verbose enable |
94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 94 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
95 | 95 | ||
96 | # Check USAGE by arguments number | 96 | # Check USAGE by arguments number |
97 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 97 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
98 | then | 98 | then |
99 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" | 99 | echo "BAD USAGE : FirstPass.sh [OPTIONS] <WAV_FILE> <OUTPUT_DIR>" |
100 | echo "$0 -h for more info" | 100 | echo "$0 -h for more info" |
101 | exit 1 | 101 | exit 1 |
102 | fi | 102 | fi |
103 | 103 | ||
104 | shift $((OPTIND-1)) | 104 | shift $((OPTIND-1)) |
105 | # check audio file - First argument | 105 | # check audio file - First argument |
106 | if [ -e $1 ] && [ -s $1 ] | 106 | if [ -e $1 ] && [ -s $1 ] |
107 | then | 107 | then |
108 | # absolute path to wav file | 108 | # absolute path to wav file |
109 | WAV_FILE=$(readlink -e $1) | 109 | WAV_FILE=$(readlink -e $1) |
110 | # wav filename | 110 | # wav filename |
111 | FILENAME=$(basename $WAV_FILE) | 111 | FILENAME=$(basename $WAV_FILE) |
112 | # wav filename without extension | 112 | # wav filename without extension |
113 | BASENAME=${FILENAME%.*} | 113 | BASENAME=${FILENAME%.*} |
114 | 114 | ||
115 | print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1 | 115 | print_info "[${BASENAME}] => P1 start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
116 | print_info "[${BASENAME}] $WAV_FILE OK" 2 | 116 | print_info "[${BASENAME}] $WAV_FILE OK" 2 |
117 | else | 117 | else |
118 | print_error " Can't find $1 OR file is empty" | 118 | print_error " Can't find $1 OR file is empty" |
119 | exit 1 | 119 | exit 1 |
120 | fi | 120 | fi |
121 | 121 | ||
122 | # check output directory - Second argument | 122 | # check output directory - Second argument |
123 | if [ ! -e $2 ] | 123 | if [ ! -e $2 ] |
124 | then | 124 | then |
125 | mkdir -p $2 | 125 | mkdir -p $2 |
126 | print_info "[${BASENAME}] Make directory $2" 2 | 126 | print_info "[${BASENAME}] Make directory $2" 2 |
127 | fi | 127 | fi |
128 | 128 | ||
129 | 129 | ||
130 | #-------------# | 130 | #-------------# |
131 | # GLOBAL VARS # | 131 | # GLOBAL VARS # |
132 | #-------------# | 132 | #-------------# |
133 | OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path | 133 | OUTPUT_DIR=$(readlink -e $2) # Output directory absolute path |
134 | OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME | 134 | OUTPUT_DIR_BASENAME="$OUTPUT_DIR/$BASENAME/" # New OUTPUT_DIR with BASENAME |
135 | PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file | 135 | PLP_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.plp" # Global PLP file |
136 | PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory | 136 | PLP_DIR="$OUTPUT_DIR_BASENAME/PLP/" # Segmented PLP files directory |
137 | PLP_LIST="$OUTPUT_DIR_BASENAME/plp.lst" # list of plp files | 137 | PLP_LIST="$OUTPUT_DIR_BASENAME/plp.lst" # list of plp files |
138 | SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file | 138 | SEG_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.seg" # Global Seg file |
139 | LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file | 139 | LBL_FILE="$OUTPUT_DIR_BASENAME/$BASENAME.lbl" # Global LBL file |
140 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" | 140 | RES_DIR=$OUTPUT_DIR_BASENAME"/res_p1" |
141 | LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log" | 141 | LOGFILE="$OUTPUT_DIR_BASENAME/info_p1.log" |
142 | ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log" | 142 | ERRORFILE="$OUTPUT_DIR_BASENAME/error_p1.log" |
143 | 143 | ||
144 | #------------------# | 144 | #------------------# |
145 | # Create WORKSPACE # | 145 | # Create WORKSPACE # |
146 | #------------------# | 146 | #------------------# |
147 | if [ ! -e $OUTPUT_DIR_BASENAME ] | 147 | if [ ! -e $OUTPUT_DIR_BASENAME ] |
148 | then | 148 | then |
149 | mkdir -p $OUTPUT_DIR_BASENAME | 149 | mkdir -p $OUTPUT_DIR_BASENAME |
150 | print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2 | 150 | print_info "[${BASENAME}] Make directory $OUTPUT_DIR_BASENAME" 2 |
151 | fi | 151 | fi |
152 | 152 | ||
153 | # Lock directory | 153 | # Lock directory |
154 | if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi | 154 | if [ -e $OUTPUT_DIR_BASENAME/FIRSTPASS.lock ] && [ $RERUN -eq 0 ]; then exit 1; fi |
155 | rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 | 155 | rm "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" > /dev/null 2>&1 |
156 | touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 | 156 | touch "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" > /dev/null 2>&1 |
157 | 157 | ||
158 | rm -r $PLP_DIR > /dev/null 2>&1; | 158 | rm -r $PLP_DIR > /dev/null 2>&1; |
159 | mkdir -p $PLP_DIR | 159 | mkdir -p $PLP_DIR |
160 | print_info "[${BASENAME}] Make directory $PLP_DIR" 2 | 160 | print_info "[${BASENAME}] Make directory $PLP_DIR" 2 |
161 | if [ $RERUN -eq 0 ]; | 161 | if [ $RERUN -eq 0 ]; |
162 | then | 162 | then |
163 | rm -r $RES_DIR > /dev/null 2>&1; | 163 | rm -r $RES_DIR > /dev/null 2>&1; |
164 | else | 164 | else |
165 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 165 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
166 | fi | 166 | fi |
167 | mkdir -p $RES_DIR > /dev/null 2>&1 | 167 | mkdir -p $RES_DIR > /dev/null 2>&1 |
168 | print_info "[${BASENAME}] Make directory $RES_DIR" 2 | 168 | print_info "[${BASENAME}] Make directory $RES_DIR" 2 |
169 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 | 169 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
170 | 170 | ||
171 | #--------------------# | 171 | #--------------------# |
172 | # Save configuration # | 172 | # Save configuration # |
173 | #--------------------# | 173 | #--------------------# |
174 | cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg | 174 | cp $FIRSTPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/FirstPass.cfg |
175 | echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 175 | echo "FIRSTPASS_SCRIPT_PATH=$MAIN_SCRIPT_PATH" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
176 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 176 | echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
177 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 177 | echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
178 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 178 | echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
179 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 179 | echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
180 | echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 180 | echo "PLP_FILE=$PLP_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
181 | echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 181 | echo "PLP_DIR=$PLP_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
182 | echo "PLP_LIST=$PLP_LIST" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 182 | echo "PLP_LIST=$PLP_LIST" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
183 | echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 183 | echo "SEG_FILE=$SEG_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
184 | echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 184 | echo "LBL_FILE=$LBL_FILE" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
185 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg | 185 | echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/FirstPass.cfg |
186 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 | 186 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/FirstPass.cfg" 1 |
187 | 187 | ||
188 | #-------------------------# | 188 | #-------------------------# |
189 | # Check Audio File Format # | 189 | # Check Audio File Format # |
190 | #-------------------------# | 190 | #-------------------------# |
191 | error=0 | 191 | error=0 |
192 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") | 192 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "16000 Hz") |
193 | if [ -z "$temp" ]; then error=1; fi | 193 | if [ -z "$temp" ]; then error=1; fi |
194 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") | 194 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "1 channels") |
195 | if [ -z "$temp" ]; then error=1; fi | 195 | if [ -z "$temp" ]; then error=1; fi |
196 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") | 196 | temp=$(avconv -i $WAV_FILE 2>&1 | grep "s16") |
197 | if [ -z "$temp" ]; then error=1; fi | 197 | if [ -z "$temp" ]; then error=1; fi |
198 | 198 | ||
199 | if [ $error -eq 1 ] | 199 | if [ $error -eq 1 ] |
200 | then | 200 | then |
201 | print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" | 201 | print_message $WARNING 2 "[${BASENAME}] $WAV_FILE is not a wav file at 16000 Hz, 1 channel, 16bits\nhave to convert" |
202 | print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" | 202 | print_message $INFO 3 "[${BASENAME}] avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav" |
203 | avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav | 203 | avconv -i $WAV_FILE -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 $OUTPUT_DIR_BASENAME/$BASENAME.wav |
204 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav | 204 | WAV_FILE=$OUTPUT_DIR_BASENAME/$BASENAME.wav |
205 | FILENAME=$BASENAME.wav | 205 | FILENAME=$BASENAME.wav |
206 | print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE" | 206 | print_message $INFO 1 "[${BASENAME}] new wav file : $WAV_FILE" |
207 | fi | 207 | fi |
208 | 208 | ||
209 | #---------------# | 209 | #---------------# |
210 | # Get SRT file # | 210 | # Get SRT file # |
211 | #---------------# | 211 | #---------------# |
212 | if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] | 212 | if [ -s $(dirname $WAV_FILE)/$BASENAME.SRT ] |
213 | then | 213 | then |
214 | cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT | 214 | cp $(dirname $WAV_FILE)/$BASENAME.SRT $OUTPUT_DIR_BASENAME/$BASENAME.SRT |
215 | print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3 | 215 | print_info "[${BASENAME}] copy $BASENAME.SRT file into $OUTPUT_DIR_BASENAME" 3 |
216 | fi | 216 | fi |
217 | 217 | ||
218 | #------------# | 218 | #------------# |
219 | # WAV -> PLP # | 219 | # WAV -> PLP # |
220 | #------------# | 220 | #------------# |
221 | print_info "[${BASENAME}] convert WAV -> PLP" 1 | 221 | print_info "[${BASENAME}] convert WAV -> PLP" 1 |
222 | echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp | 222 | echo $FILENAME > $OUTPUT_DIR_BASENAME/list.tmp |
223 | print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3 | 223 | print_info "[${BASENAME}] $BIN_PATH/lia_plp_mt.32 --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms" 3 |
224 | 224 | ||
225 | $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms | 225 | $BIN_PATH/lia_plp_mt$ARCH --lst $OUTPUT_DIR_BASENAME/list.tmp --input_dir $(dirname $WAV_FILE) --output_dir $OUTPUT_DIR_BASENAME --input_type WAV --output_type HTK --nb_coef 12 --cms |
226 | 226 | ||
227 | if [ $CHECK -eq 1 ] | 227 | if [ $CHECK -eq 1 ] |
228 | then | 228 | then |
229 | check_first_pass_plp "$PLP_FILE" | 229 | check_first_pass_plp "$PLP_FILE" |
230 | if [ $? -eq 1 ] | 230 | if [ $? -eq 1 ] |
231 | then | 231 | then |
232 | print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE" | 232 | print_log_file "$ERROFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating PLP file : $PLP_FILE" |
233 | print_error "[${BASENAME}] -> exit, Check $ERRORFILE file" | 233 | print_error "[${BASENAME}] -> exit, Check $ERRORFILE file" |
234 | exit 1 | 234 | exit 1 |
235 | fi | 235 | fi |
236 | fi | 236 | fi |
237 | 237 | ||
238 | rm $OUTPUT_DIR_BASENAME/list.tmp 2> /dev/null | 238 | rm $OUTPUT_DIR_BASENAME/list.tmp 2> /dev/null |
239 | 239 | ||
240 | #------------------------------# | 240 | #------------------------------# |
241 | # S/NS + SPEAKERS SEGMENTATION # | 241 | # S/NS + SPEAKERS SEGMENTATION # |
242 | #------------------------------# | 242 | #------------------------------# |
243 | print_info "[${BASENAME}] Launch speakers diarization" 1 | 243 | print_info "[${BASENAME}] Launch speakers diarization" 1 |
244 | # Calcul seg file | 244 | # Calcul seg file |
245 | print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3 | 245 | print_info "[${BASENAME}] java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME" 3 |
246 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME | 246 | #java -Xmx8000m -Xms2048 -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME |
247 | java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering | 247 | java -Xmx4096m -jar $BIN_PATH/LIUM_SpkDiarization-4.2.jar --fInputMask=${WAV_FILE} --sOutputMask=${SEG_FILE} $BASENAME #–doCEClustering |
248 | 248 | ||
249 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) | 249 | if [ $CHECK -eq 1 ] && ( [ ! -e $SEG_FILE ] || [ -z $SEG_FILE ] ) |
250 | then | 250 | then |
251 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE" | 251 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating SEG file : $SEG_FILE" |
252 | print_error "[${BASENAME}] Check $ERRORFILE file" | 252 | print_error "[${BASENAME}] Check $ERRORFILE file" |
253 | exit 1 | 253 | exit 1 |
254 | fi | 254 | fi |
255 | 255 | ||
256 | 256 | ||
257 | # Create LBL file | 257 | # Create LBL file |
258 | print_info "Extract LBL file from SEG file" 1 | 258 | print_info "Extract LBL file from SEG file" 1 |
259 | 259 | ||
260 | cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE | 260 | cat $SEG_FILE | grep -v ";;" | cut -f3,4,5,8 -d" " | tr " " "#" | sort -k1 -n | tr "#" " " > $LBL_FILE |
261 | 261 | ||
262 | if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) | 262 | if [ $CHECK -eq 1 ] && ( [ ! -e $LBL_FILE ] || [ -z $LBL_FILE ] ) |
263 | then | 263 | then |
264 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE" | 264 | print_log_file "$ERRORFILE" "ERROR [$(date +'%d/%m/%y %H:%M:%S')] Creating LBL file : $LBL_FILE" |
265 | print_error "[${BASENAME}] Check $ERRORFILE file" | 265 | print_error "[${BASENAME}] Check $ERRORFILE file" |
266 | exit 1 | 266 | exit 1 |
267 | fi | 267 | fi |
268 | 268 | ||
269 | 269 | ||
270 | #----------------------------------------------------# | 270 | #----------------------------------------------------# |
271 | # Cut global PLP file depending to LBL segmentations # | 271 | # Cut global PLP file depending to LBL segmentations # |
272 | #----------------------------------------------------# | 272 | #----------------------------------------------------# |
273 | print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1 | 273 | print_info "[${BASENAME}] Cut PLP file depending to LBL segmentations" 1 |
274 | print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3 | 274 | print_info "[${BASENAME}] $BIN_PATH/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG" 3 |
275 | 275 | ||
276 | $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG | 276 | $SPEERAL_TOOLS/gcep $PLP_FILE $LBL_FILE 500 $PLP_DIR -FSEG |
277 | 277 | ||
278 | if [ $CHECK -eq 1 ] | 278 | if [ $CHECK -eq 1 ] |
279 | then | 279 | then |
280 | check_first_pass_plps_lbl $PLP_DIR $LBL_FILE | 280 | check_first_pass_plps_lbl $PLP_DIR $LBL_FILE |
281 | if [ $? -eq 1 ] | 281 | if [ $? -eq 1 ] |
282 | then | 282 | then |
283 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files" | 283 | print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] $PLP wrong number of .plp files" |
284 | print_error "[${BASENAME}] Check $ERRORFILE file" | 284 | print_error "[${BASENAME}] Check $ERRORFILE file" |
285 | exit 1 | 285 | exit 1 |
286 | fi | 286 | fi |
287 | fi | 287 | fi |
288 | 288 | ||
289 | # change plp files names | 289 | # change plp files names |
290 | cd $PLP_DIR; | 290 | cd $PLP_DIR; |
291 | rename -f s/_/#/g *plp | 291 | rename -f s/_/#/g *plp |
292 | rename -f s/#/_/ *plp | 292 | rename -f s/#/_/ *plp |
293 | cd $OLDPWD | 293 | cd $OLDPWD |
294 | 294 | ||
295 | #---------------------------------------------# | 295 | #---------------------------------------------# |
296 | # PLP files list depending to acoustic models # | 296 | # PLP files list depending to acoustic models # |
297 | #---------------------------------------------# | 297 | #---------------------------------------------# |
298 | print_info "[${BASENAME}] Create PLP list depending of the model" 1 | 298 | print_info "[${BASENAME}] Create PLP list depending of the model" 1 |
299 | # Create a list of plp files | 299 | # Create a list of plp files |
300 | find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $PLP_LIST | 300 | find $PLP_DIR -type f -exec basename "{}" .plp \; | sort > $PLP_LIST |
301 | 301 | ||
302 | rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 | 302 | rm $OUTPUT_DIR_BASENAME/plp_*.lst > /dev/null 2>&1 |
303 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 303 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
304 | do | 304 | do |
305 | a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` | 305 | a=`grep -e "${MTAG[$i]}" $OUTPUT_DIR_BASENAME/plp.lst` |
306 | if [ -n "$a" ]; then | 306 | if [ -n "$a" ]; then |
307 | print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 | 307 | print_info "[${BASENAME}] Creating $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst" 3 |
308 | grep -e "${MTAG[$i]}" $PLP_LIST | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 308 | grep -e "${MTAG[$i]}" $PLP_LIST | sort > $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
309 | fi | 309 | fi |
310 | done | 310 | done |
311 | 311 | ||
312 | #-----------------------# | 312 | #-----------------------# |
313 | # First Pass (DECODING) # | 313 | # First Pass (DECODING) # |
314 | #-----------------------# | 314 | #-----------------------# |
315 | # | 315 | # |
316 | # For all AM do decoding | 316 | # For all AM do decoding |
317 | # if Check error -> iter on undone decoding (max 1 times) | 317 | # if Check error -> iter on undone decoding (max 1 times) |
318 | # | 318 | # |
319 | print_info "[${BASENAME}] Launch decoding" 1 | 319 | print_info "[${BASENAME}] Launch decoding" 1 |
320 | for (( i=0; $i<${#MTAG[@]} ; i++ )) | 320 | for (( i=0; $i<${#MTAG[@]} ; i++ )) |
321 | do | 321 | do |
322 | redo=1; # nb of try if not all segs is decoded | 322 | redo=1; # nb of try if not all segs is decoded |
323 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] | 323 | if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ] |
324 | then | 324 | then |
325 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 325 | todo=$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
326 | while [ $redo -gt 0 ]; do | 326 | while [ $redo -gt 0 ]; do |
327 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 327 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
328 | print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 | 328 | print_info "[${BASENAME}] $SPEERAL_BIN $todo $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3 |
329 | # Run speeral | 329 | # Run speeral |
330 | $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock | 330 | $SPEERAL_BIN ${todo} $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock |
331 | 331 | ||
332 | # Check if error | 332 | # Check if error |
333 | if [ $CHECK -eq 1 ] | 333 | if [ $CHECK -eq 1 ] |
334 | then | 334 | then |
335 | check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" | 335 | check_first_pass_output_speeral "${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" "$RES_DIR" |
336 | # if error | 336 | # if error |
337 | if [ $? -eq 1 ] | 337 | if [ $? -eq 1 ] |
338 | then | 338 | then |
339 | # rerun | 339 | # rerun |
340 | redo=$(($redo - 1)); | 340 | redo=$(($redo - 1)); |
341 | print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 | 341 | print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2 |
342 | print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" | 342 | print_log_file $LOGFILE "WARN : Speeral number of output ERROR ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst" |
343 | # new plp list | 343 | # new plp list |
344 | # list .seg done and compare to list of seg to do | 344 | # list .seg done and compare to list of seg to do |
345 | ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 345 | ls $RES_DIR/*.seg | grep -e "${MTAG[$i]}" | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
346 | diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst | 346 | diff ${OUTPUT_DIR_BASENAME}/plp_${MODS[$i]}.lst ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" > ${OUTPUT_DIR_BASENAME}/todo.lst |
347 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 347 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
348 | # log seg to do | 348 | # log seg to do |
349 | print_log_file $LOGFILE "Segs not done [" | 349 | print_log_file $LOGFILE "Segs not done [" |
350 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE | 350 | cat ${OUTPUT_DIR_BASENAME}/todo.lst >> $LOGFILE |
351 | todo=${OUTPUT_DIR_BASENAME}/todo.lst | 351 | todo=${OUTPUT_DIR_BASENAME}/todo.lst |
352 | print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" | 352 | print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]" |
353 | print_warn "[${BASENAME}] Try $redo" 3 | 353 | print_warn "[${BASENAME}] Try $redo" 3 |
354 | fi | 354 | fi |
355 | else | ||
356 | redo=-1 | ||
355 | fi | 357 | fi |
356 | done | 358 | done |
357 | rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 | 359 | rm ${OUTPUT_DIR_BASENAME}/todo.lst > /dev/null 2>&1 |
358 | #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst | 360 | #rm $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst |
359 | rm $RES_DIR/*.lock > /dev/null 2>&1 | 361 | rm $RES_DIR/*.lock > /dev/null 2>&1 |
360 | fi | 362 | fi |
361 | done | 363 | done |
362 | 364 | ||
363 | ## Check missing seg and log it | 365 | ## Check missing seg and log it |
364 | if [ "$CHECK" -eq 1 ] | 366 | if [ "$CHECK" -eq 1 ] |
365 | then | 367 | then |
366 | ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp | 368 | ls $RES_DIR/*.seg | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp |
367 | todo=$(cat ${PLP_LIST} 2> /dev/null | wc -l) | 369 | todo=$(cat ${PLP_LIST} 2> /dev/null | wc -l) |
368 | if [ "$todo" -eq 0 ]; then todo=1;fi | 370 | if [ "$todo" -eq 0 ]; then todo=1;fi |
369 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) | 371 | notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l))) |
370 | pourcentage=$((($notdone*100)/$todo)) | 372 | pourcentage=$((($notdone*100)/$todo)) |
371 | 373 | ||
372 | if [ "$notdone" -ne 0 ] | 374 | if [ "$notdone" -ne 0 ] |
373 | then | 375 | then |
374 | print_error "[${BASENAME}] Check ${ERRORFILE}" | 376 | print_error "[${BASENAME}] Check ${ERRORFILE}" |
375 | print_log_file "${ERRORFILE}" "ERROR : Segs not done [" | 377 | print_log_file "${ERRORFILE}" "ERROR : Segs not done [" |
376 | diff ${PLP_LIST} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> ${ERRORFILE} | 378 | diff ${PLP_LIST} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> ${ERRORFILE} |
377 | print_log_file "${ERRORFILE}" "] $pourcentage% $BASENAME" | 379 | print_log_file "${ERRORFILE}" "] $pourcentage% $BASENAME" |
378 | else | 380 | else |
379 | print_log_file "$LOGFILE" "P1 OK ${BASENAME} | $(date +'%d/%m/%y %H:%M:%S')" | 381 | print_log_file "$LOGFILE" "P1 OK ${BASENAME} | $(date +'%d/%m/%y %H:%M:%S')" |
380 | fi | 382 | fi |
381 | rm ${OUTPUT_DIR_BASENAME}/.tmp | 383 | rm ${OUTPUT_DIR_BASENAME}/.tmp |
382 | fi | 384 | fi |
383 | 385 | ||
384 | #---------------# | 386 | #---------------# |
385 | # Convert res # | 387 | # Convert res # |
386 | #---------------# | 388 | #---------------# |
387 | print_info "[${BASENAME}] Convert .res into .ctm" 1 | 389 | print_info "[${BASENAME}] Convert .res into .ctm" 1 |
388 | # .res => .ctm | 390 | # .res => .ctm |
389 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm | 391 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.ctm |
390 | print_info "[${BASENAME}] Convert .res into .trs" 1 | 392 | print_info "[${BASENAME}] Convert .res into .trs" 1 |
391 | # .res => .trs | 393 | # .res => .trs |
392 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | 394 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR_BASENAME/$BASENAME.seg" > $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
393 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg | 395 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.trs --trs_config $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg |
394 | rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null | 396 | rm $OUTPUT_DIR_BASENAME/$BASENAME.trs_cfg 2> /dev/null |
395 | print_info "[${BASENAME}] Convert .res into .txt" 1 | 397 | print_info "[${BASENAME}] Convert .res into .txt" 1 |
396 | # .res => .txt | 398 | # .res => .txt |
397 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt | 399 | $SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out $OUTPUT_DIR_BASENAME/$BASENAME.1pass.txt |
398 | 400 | ||
399 | print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1 | 401 | print_info "[${BASENAME}] <= P1 End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
400 | # unlock directory | 402 | # unlock directory |
401 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" | 403 | mv "$OUTPUT_DIR_BASENAME/FIRSTPASS.lock" "$OUTPUT_DIR_BASENAME/FIRSTPASS.unlock" |
402 | 404 |