Commit f3043a8b949ebc79be54053c83fe110e9fedc59c
1 parent
48b8af9ef5
Exists in
master
bugfix
Showing 2 changed files with 4 additions and 4 deletions Inline Diff
main_tools/ConfPass.sh
| 1 | #!/bin/bash | 1 | #!/bin/bash |
| 2 | 2 | ||
| 3 | ##################################################### | 3 | ##################################################### |
| 4 | # File : ConfPass.sh # | 4 | # File : ConfPass.sh # |
| 5 | # Brief : Process the ASR Confidence pass # | 5 | # Brief : Process the ASR Confidence pass # |
| 6 | # Author : Jean-François Rey # | 6 | # Author : Jean-François Rey # |
| 7 | # (base on Emmanuel Ferreira # | 7 | # (base on Emmanuel Ferreira # |
| 8 | # and hugo Mauchrétien works) # | 8 | # and hugo Mauchrétien works) # |
| 9 | # Version : 1.0 # | 9 | # Version : 1.0 # |
| 10 | # Date : 17/06/13 # | 10 | # Date : 17/06/13 # |
| 11 | ##################################################### | 11 | ##################################################### |
| 12 | 12 | ||
| 13 | echo "### ConfPass.sh ###" | 13 | echo "### ConfPass.sh ###" |
| 14 | 14 | ||
| 15 | #Check OTMEDIA_HOME env var | 15 | #Check OTMEDIA_HOME env var |
| 16 | if [ -z ${OTMEDIA_HOME} ] | 16 | if [ -z ${OTMEDIA_HOME} ] |
| 17 | then | 17 | then |
| 18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 18 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
| 19 | export OTMEDIA_HOME=$OTMEDIA_HOME | 19 | export OTMEDIA_HOME=$OTMEDIA_HOME |
| 20 | fi | 20 | fi |
| 21 | 21 | ||
| 22 | 22 | ||
| 23 | # where is ConfPath.sh | 23 | # where is ConfPath.sh |
| 24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 24 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
| 25 | 25 | ||
| 26 | # Scripts Path | 26 | # Scripts Path |
| 27 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 27 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
| 28 | 28 | ||
| 29 | # Include scripts | 29 | # Include scripts |
| 30 | . $SCRIPT_PATH"/Tools.sh" | 30 | . $SCRIPT_PATH"/Tools.sh" |
| 31 | . $SCRIPT_PATH"/CheckConfPass.sh" | 31 | . $SCRIPT_PATH"/CheckConfPass.sh" |
| 32 | 32 | ||
| 33 | # where is FirstPass.cfg | 33 | # where is FirstPass.cfg |
| 34 | CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" | 34 | CONFPASS_CONFIG_FILE="$OTMEDIA_HOME/cfg/ConfPass.cfg" |
| 35 | if [ -e $CONFPASS_CONFIG_FILE ] | 35 | if [ -e $CONFPASS_CONFIG_FILE ] |
| 36 | then | 36 | then |
| 37 | . $CONFPASS_CONFIG_FILE | 37 | . $CONFPASS_CONFIG_FILE |
| 38 | else | 38 | else |
| 39 | echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2 | 39 | echo "ERROR : Can't find configuration file $CONFPASS_CONFIG_FILE" >&2 |
| 40 | exit 1 | 40 | exit 1 |
| 41 | fi | 41 | fi |
| 42 | 42 | ||
| 43 | #---------------# | 43 | #---------------# |
| 44 | # Parse Options # | 44 | # Parse Options # |
| 45 | #---------------# | 45 | #---------------# |
| 46 | while getopts ":hDv:cr" opt | 46 | while getopts ":hDv:cr" opt |
| 47 | do | 47 | do |
| 48 | case $opt in | 48 | case $opt in |
| 49 | h) | 49 | h) |
| 50 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" | 50 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY> <TREIL_DIRECTORY_NAME>\n" |
| 51 | echo -e "\t Options:" | 51 | echo -e "\t Options:" |
| 52 | echo -e "\t\t-h :\tprint this message" | 52 | echo -e "\t\t-h :\tprint this message" |
| 53 | echo -e "\t\t-D :\tDEBUG mode on" | 53 | echo -e "\t\t-D :\tDEBUG mode on" |
| 54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 54 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
| 55 | echo -e "\t\t-c :\t Check process, stop if error detected" | 55 | echo -e "\t\t-c :\t Check process, stop if error detected" |
| 56 | echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" | 56 | echo -e "\t\t-r :\tForce to rerun confpas without deleting existing files" |
| 57 | exit 1 | 57 | exit 1 |
| 58 | ;; | 58 | ;; |
| 59 | D) | 59 | D) |
| 60 | DEBUG=1 | 60 | DEBUG=1 |
| 61 | ;; | 61 | ;; |
| 62 | v) | 62 | v) |
| 63 | VERBOSE=$OPTARG | 63 | VERBOSE=$OPTARG |
| 64 | ;; | 64 | ;; |
| 65 | c) | 65 | c) |
| 66 | CHECK=1 | 66 | CHECK=1 |
| 67 | ;; | 67 | ;; |
| 68 | r) | 68 | r) |
| 69 | RERUN=1 | 69 | RERUN=1 |
| 70 | ;; | 70 | ;; |
| 71 | :) | 71 | :) |
| 72 | echo "Option -$OPTARG requires an argument." >&2 | 72 | echo "Option -$OPTARG requires an argument." >&2 |
| 73 | exit 1 | 73 | exit 1 |
| 74 | ;; | 74 | ;; |
| 75 | \?) | 75 | \?) |
| 76 | echo "BAD USAGE : unknow option -$OPTARG" | 76 | echo "BAD USAGE : unknow option -$OPTARG" |
| 77 | #exit 1 | 77 | #exit 1 |
| 78 | ;; | 78 | ;; |
| 79 | esac | 79 | esac |
| 80 | done | 80 | done |
| 81 | 81 | ||
| 82 | # mode debug enable | 82 | # mode debug enable |
| 83 | if [ $DEBUG -eq 1 ] | 83 | if [ $DEBUG -eq 1 ] |
| 84 | then | 84 | then |
| 85 | set -x | 85 | set -x |
| 86 | echo -e "## Mode DEBUG ON ##" | 86 | echo -e "## Mode DEBUG ON ##" |
| 87 | fi | 87 | fi |
| 88 | 88 | ||
| 89 | # mode verbose enable | 89 | # mode verbose enable |
| 90 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 90 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
| 91 | 91 | ||
| 92 | # Check USAGE by arguments number | 92 | # Check USAGE by arguments number |
| 93 | if [ $(($#-($OPTIND-1))) -ne 2 ] | 93 | if [ $(($#-($OPTIND-1))) -ne 2 ] |
| 94 | then | 94 | then |
| 95 | echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" | 95 | echo "BAD USAGE : ConfPass.sh [OPTIONS] <INPUT_DIR> <TREIL_DIRECTORY_NAME>" |
| 96 | echo "$0 -h for more info" | 96 | echo "$0 -h for more info" |
| 97 | exit 1 | 97 | exit 1 |
| 98 | fi | 98 | fi |
| 99 | 99 | ||
| 100 | shift $((OPTIND-1)) | 100 | shift $((OPTIND-1)) |
| 101 | # check input directory - first argument | 101 | # check input directory - first argument |
| 102 | if [ ! -e $1 ] | 102 | if [ ! -e $1 ] |
| 103 | then | 103 | then |
| 104 | print_error "can't open $1" | 104 | print_error "can't open $1" |
| 105 | exit 1 | 105 | exit 1 |
| 106 | fi | 106 | fi |
| 107 | # check treil input directory - second argument | 107 | # check treil input directory - second argument |
| 108 | if [ ! -e $1/$2 ] | 108 | if [ ! -e $1/$2 ] |
| 109 | then | 109 | then |
| 110 | print_error "can't open $1/$2" | 110 | print_error "can't open $1/$2" |
| 111 | exit 1 | 111 | exit 1 |
| 112 | fi | 112 | fi |
| 113 | 113 | ||
| 114 | #-------------# | 114 | #-------------# |
| 115 | # GLOBAL VARS # | 115 | # GLOBAL VARS # |
| 116 | #-------------# | 116 | #-------------# |
| 117 | INPUT_DIR=$(readlink -e $1) | 117 | INPUT_DIR=$(readlink -e $1) |
| 118 | OUTPUT_DIR=$INPUT_DIR | 118 | OUTPUT_DIR=$INPUT_DIR |
| 119 | BASENAME=$(basename $OUTPUT_DIR) | 119 | BASENAME=$(basename $OUTPUT_DIR) |
| 120 | RES_NAME=$2 | 120 | RES_NAME=$2 |
| 121 | RES_P="${INPUT_DIR}/${RES_NAME}" | 121 | RES_P="${INPUT_DIR}/${RES_NAME}" |
| 122 | USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf | 122 | USF_FILE=${INPUT_DIR}/${BASENAME}.${RES_NAME}.usf |
| 123 | CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" | 123 | CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME" |
| 124 | RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" | 124 | RES_CONF_DIR="$OUTPUT_DIR/conf/$RES_NAME/scored_ctm" |
| 125 | LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log" | 125 | LOGFILE="${OUTPUT_DIR_BASENAME}/info_conf.log" |
| 126 | ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log" | 126 | ERRORFILE="${OUTPUT_DIR_BASENAME}/error_conf.log" |
| 127 | 127 | ||
| 128 | print_info "[${BASENAME}] => Conf Pass start | $(date +'%d/%m/%y %H:%M:%S')" 1 | 128 | print_info "[${BASENAME}] => Conf Pass start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
| 129 | 129 | ||
| 130 | #------------------# | 130 | #------------------# |
| 131 | # Create Workspace # | 131 | # Create Workspace # |
| 132 | #------------------# | 132 | #------------------# |
| 133 | # Lock directory | 133 | # Lock directory |
| 134 | if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] | 134 | if [ -e "$OUTPUT_DIR/CONFPASS.lock" ] && [ $RERUN -eq 0 ] |
| 135 | then | 135 | then |
| 136 | print_warn "[${BASENAME}] Confpass is locked -> exit" 2 | 136 | print_warn "[${BASENAME}] Confpass is locked -> exit" 2 |
| 137 | exit 1 | 137 | exit 1 |
| 138 | fi | 138 | fi |
| 139 | rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 | 139 | rm "$OUTPUT_DIR/CONFPASS.unlock" > /dev/null 2>&1 |
| 140 | touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 | 140 | touch "$OUTPUT_DIR/CONFPASS.lock" > /dev/null 2>&1 |
| 141 | if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi | 141 | if [ $RERUN -eq 0 ]; then rm -r $CONF_DIR > /dev/null 2>&1; fi |
| 142 | if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi | 142 | if [ $RERUN -eq 1 ]; then rm $USF_FILE > /dev/null 2>&1; fi |
| 143 | mkdir -p $CONF_DIR > /dev/null 2>&1 | 143 | mkdir -p $CONF_DIR > /dev/null 2>&1 |
| 144 | mkdir -p $RES_CONF_DIR > /dev/null 2>&1 | 144 | mkdir -p $RES_CONF_DIR > /dev/null 2>&1 |
| 145 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 | 145 | rm $LOGFILE $ERRORFILE > /dev/null 2>&1 |
| 146 | 146 | ||
| 147 | #---------------# | 147 | #---------------# |
| 148 | # Check Pass # | 148 | # Check Pass # |
| 149 | #---------------# | 149 | #---------------# |
| 150 | print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1 | 150 | print_info "[${BASENAME}] Check Conf Pass directory ${RES_NAME}" 1 |
| 151 | # if usf contains more than 49% of 0.600 confidence -> usf error | 151 | # if usf contains more than 49% of 0.600 confidence -> usf error |
| 152 | if [ -s $USF_FILE ] | 152 | if [ -s $USF_FILE ] |
| 153 | then | 153 | then |
| 154 | conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") | 154 | conftozerosix=$(grep -c -E 'confidence="0.600"' "${USF_FILE}") |
| 155 | confall=$(grep -c -E 'confidence=' "${USF_FILE}") | 155 | confall=$(grep -c -E 'confidence=' "${USF_FILE}") |
| 156 | if [ $confall -gt 0 ] | 156 | if [ $confall -gt 0 ] |
| 157 | then | 157 | then |
| 158 | pourcentageofzerosix=$((($conftozerosix*100)/$confall)) | 158 | pourcentageofzerosix=$((($conftozerosix*100)/$confall)) |
| 159 | if [ $pourcentageofzerosix -gt 49 ] | 159 | if [ $pourcentageofzerosix -gt 49 ] |
| 160 | then | 160 | then |
| 161 | print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2 | 161 | print_warn "[${BASENAME}] ${BASENAME}.${RES_NAME}.usf got $pourcentageofzerosix% of 0.600 confidence" 2 |
| 162 | print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1 | 162 | print_info "[${BASENAME}] bad usf ${RES_NAME}, will do it again" 1 |
| 163 | mv "${USF_FILE}" "${USF_FILE}.back" | 163 | mv "${USF_FILE}" "${USF_FILE}.back" |
| 164 | rm -r $CONF_DIR > /dev/null 2>&1 | 164 | rm -r $CONF_DIR > /dev/null 2>&1 |
| 165 | else | 165 | else |
| 166 | print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1 | 166 | print_warn "[${BASENAME}] ${USF_FILE} already done, skipping it" 1 |
| 167 | exit 0 | 167 | exit 0 |
| 168 | fi | 168 | fi |
| 169 | fi | 169 | fi |
| 170 | else | 170 | else |
| 171 | print_info "[${BASENAME}] No USF file already done, continue..." 1 | 171 | print_info "[${BASENAME}] No USF file already done, continue..." 1 |
| 172 | fi | 172 | fi |
| 173 | 173 | ||
| 174 | # Check if treil are here | 174 | # Check if treil are here |
| 175 | nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l) | 175 | nbres_p1=$(cat ${INPUT_DIR}/plp.lst | wc -l) |
| 176 | nbtreil_p=$(ls ${RES_P}/*.treil 2> /dev/null | wc -l) | 176 | nbtreil_p=$(ls ${RES_P}/*.treil 2> /dev/null | wc -l) |
| 177 | if [ $nbtreil_p -eq 0 ] | 177 | if [ $nbtreil_p -eq 0 ] |
| 178 | then | 178 | then |
| 179 | print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass" | 179 | print_error "[${BASENAME}] No ${RES_NAME} Pass, No .treil -> exit ConfPass" |
| 180 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi | 180 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ${RES_NAME} Pass, No .treil -> exit ConfPass" ;fi |
| 181 | exit 1 | 181 | exit 1 |
| 182 | else | 182 | else |
| 183 | #Check if more then 89% of treil are done | 183 | #Check if more then 89% of treil are done |
| 184 | if [ $nbres_p1 -gt 0 ] | 184 | if [ $nbres_p1 -gt 0 ] |
| 185 | then | 185 | then |
| 186 | pourcentage=$((($nbtreil_p*100)/$nbres_p1)) | 186 | pourcentage=$((($nbtreil_p*100)/$nbres_p1)) |
| 187 | if [ $pourcentage -gt 89 ] | 187 | if [ $pourcentage -gt 89 ] |
| 188 | then | 188 | then |
| 189 | print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1 | 189 | print_info "[${BASENAME}] ${RES_NAME}/*.treil are here" 1 |
| 190 | else | 190 | else |
| 191 | print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2 | 191 | print_warn "[${BASENAME}] not enough ${RES_NAME} treil" 2 |
| 192 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil ";fi | 192 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "Not enough ${RES_NAME} treil ";fi |
| 193 | fi | 193 | fi |
| 194 | fi | 194 | fi |
| 195 | fi | 195 | fi |
| 196 | 196 | ||
| 197 | #------# | 197 | #------# |
| 198 | # Save # | 198 | # Save # |
| 199 | #------# | 199 | #------# |
| 200 | cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg | 200 | cp $CONFPASS_CONFIG_FILE $OUTPUT_DIR/ConfPass.cfg |
| 201 | echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg | 201 | echo "RES_CONF_DIR=$RES_CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg |
| 202 | echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg | 202 | echo "CONF_DIR=$CONF_DIR" >> $OUTPUT_DIR/ConfPass.cfg |
| 203 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1 | 203 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ConfPass.cfg" 1 |
| 204 | 204 | ||
| 205 | #--------------------# | 205 | #--------------------# |
| 206 | # CONFIDENCE MEASURE # | 206 | # CONFIDENCE MEASURE # |
| 207 | #--------------------# | 207 | #--------------------# |
| 208 | 208 | ||
| 209 | # Check pourcentage of scored_ctm already done, if < 85% do confidence measure | 209 | # Check pourcentage of scored_ctm already done, if < 85% do confidence measure |
| 210 | nbres_p=$(ls ${RES_P}/*.treil | wc -l) | 210 | nbres_p=$(ls ${RES_P}/*.treil 2> /dev/null | wc -l) |
| 211 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) | 211 | nbconf=$(ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) |
| 212 | if [ $nbres_p -gt 0 ] | 212 | if [ $nbres_p -gt 0 ] |
| 213 | then | 213 | then |
| 214 | pourcentageres=$((($nbconf*100)/$nbres_p)) | 214 | pourcentageres=$((($nbconf*100)/$nbres_p)) |
| 215 | if [ $pourcentageres -lt 85 ] | 215 | if [ $pourcentageres -lt 85 ] |
| 216 | then | 216 | then |
| 217 | print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1 | 217 | print_info "[${BASENAME}] Calcul Confidence $INPUT_DIR $RES_NAME" 1 |
| 218 | $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME | 218 | $MAIN_SCRIPT_PATH/ConfidenceMeasure.sh $INPUT_DIR $RES_NAME |
| 219 | 219 | ||
| 220 | else | 220 | else |
| 221 | print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 | 221 | print_info "[${BASENAME}] Skipping Confidence Calcul $INPUT_DIR/$RES_NAME" 1 |
| 222 | fi | 222 | fi |
| 223 | fi | 223 | fi |
| 224 | 224 | ||
| 225 | ### Check scored_ctm number res files ! | 225 | ### Check scored_ctm number res files ! |
| 226 | if [ $CHECK -eq 1 ] | 226 | if [ $CHECK -eq 1 ] |
| 227 | then | 227 | then |
| 228 | nbconf=$(ls ${RES_CONF_DIR}/*.res | wc -l) | 228 | nbconf=$(ls ${RES_CONF_DIR}/*.res 2> /dev/null | wc -l) |
| 229 | if [ $nbres_p -ne $nbconf ] | 229 | if [ $nbres_p -ne $nbconf ] |
| 230 | then | 230 | then |
| 231 | print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2 | 231 | print_warn "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" 2 |
| 232 | print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" | 232 | print_log_file $LOGFILE "WARN : ConfPass $INPUT_DIR/$RES_NAME number of res files differ" |
| 233 | fi | 233 | fi |
| 234 | fi | 234 | fi |
| 235 | 235 | ||
| 236 | #---------------------------# | 236 | #---------------------------# |
| 237 | # FROM RES WITH CONF => USF # | 237 | # FROM RES WITH CONF => USF # |
| 238 | #---------------------------# | 238 | #---------------------------# |
| 239 | print_info "[${BASENAME}] Create USF file for $RES_P" 1 | 239 | print_info "[${BASENAME}] Create USF file for $RES_P" 1 |
| 240 | for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done | 240 | for f in `ls ${RES_CONF_DIR}`; do $SCRIPT_PATH/formatRES.pl $RES_CONF_DIR/$f; done |
| 241 | # create USF configuration file | 241 | # create USF configuration file |
| 242 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg | 242 | echo -e "name $AUTHOR\nfileName $BASENAME\nfileExt wav\nsegFile $OUTPUT_DIR/$BASENAME.seg" > $OUTPUT_DIR/$BASENAME.usf_cfg |
| 243 | # create USF file | 243 | # create USF file |
| 244 | print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg" 3 | 244 | print_info "$SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg" 3 |
| 245 | $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg | 245 | $SCRIPT_PATH/res2out.pl --dir $RES_CONF_DIR --format USF --ignore $RULES/asupp --out $USF_FILE.tmp --usf_config $OUTPUT_DIR/$BASENAME.usf_cfg |
| 246 | rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 | 246 | rm $OUTPUT_DIR/$BASENAME.usf_cfg > /dev/null 2>&1 |
| 247 | cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE | 247 | cat $USF_FILE.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f > $USF_FILE |
| 248 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf | 248 | cp $USF_FILE ${OUTPUT_DIR}/${BASENAME}.usf |
| 249 | rm $USF_FILE.tmp > /dev/null 2>&1 | 249 | rm $USF_FILE.tmp > /dev/null 2>&1 |
| 250 | 250 | ||
| 251 | #----------------# | 251 | #----------------# |
| 252 | # Check USF file # | 252 | # Check USF file # |
| 253 | #----------------# | 253 | #----------------# |
| 254 | if [ $CHECK -eq 1 ] | 254 | if [ $CHECK -eq 1 ] |
| 255 | then | 255 | then |
| 256 | check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" | 256 | check_conf_pass_usf "$OUTPUT_DIR/$BASENAME.usf" |
| 257 | if [ $? -eq 1 ] | 257 | if [ $? -eq 1 ] |
| 258 | then | 258 | then |
| 259 | print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" | 259 | print_error "[${BASENAME}] Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" |
| 260 | print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" | 260 | print_log_file $ERRORFILE "ERROR : Wrong confidence measures in USF file : $OUTPUT_DIR/$BASENAME.usf" |
| 261 | exit 1 | 261 | exit 1 |
| 262 | fi | 262 | fi |
| 263 | fi | 263 | fi |
| 264 | 264 | ||
| 265 | #-------# | 265 | #-------# |
| 266 | # CLOSE # | 266 | # CLOSE # |
| 267 | #-------# | 267 | #-------# |
| 268 | # Seem OK | 268 | # Seem OK |
| 269 | print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1 | 269 | print_info "[${BASENAME}] <= ConfPass End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
| 270 | 270 | ||
| 271 | # unlock directory | 271 | # unlock directory |
| 272 | mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" | 272 | mv "$OUTPUT_DIR/CONFPASS.lock" "$OUTPUT_DIR/CONFPASS.unlock" |
| 273 | 273 | ||
| 274 | 274 |
main_tools/ExploitConfidencePass.sh
| 1 | #!/bin/bash | 1 | #!/bin/bash |
| 2 | 2 | ||
| 3 | ##################################################### | 3 | ##################################################### |
| 4 | # File : ExploitConfidencePass.sh # | 4 | # File : ExploitConfidencePass.sh # |
| 5 | # Brief : Exploit the ASR confidence pass to : # | 5 | # Brief : Exploit the ASR confidence pass to : # |
| 6 | # -> boost the confident zone # | 6 | # -> boost the confident zone # |
| 7 | # -> find alternative in non confident zone | 7 | # -> find alternative in non confident zone |
| 8 | # -> dynamicly extend the lexicon # | 8 | # -> dynamicly extend the lexicon # |
| 9 | # Author : Jean-François Rey # | 9 | # Author : Jean-François Rey # |
| 10 | # (base on Emmanuel Ferreira # | 10 | # (base on Emmanuel Ferreira # |
| 11 | # and Hugo Mauchrétien works) # | 11 | # and Hugo Mauchrétien works) # |
| 12 | # Version : 1.0 # | 12 | # Version : 1.0 # |
| 13 | # Date : 25/06/13 # | 13 | # Date : 25/06/13 # |
| 14 | ##################################################### | 14 | ##################################################### |
| 15 | 15 | ||
| 16 | echo "### ExploitConfidencePass.sh ###" | 16 | echo "### ExploitConfidencePass.sh ###" |
| 17 | 17 | ||
| 18 | # Check OTMEDIA_HOME env var | 18 | # Check OTMEDIA_HOME env var |
| 19 | if [ -z ${OTMEDIA_HOME} ] | 19 | if [ -z ${OTMEDIA_HOME} ] |
| 20 | then | 20 | then |
| 21 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) | 21 | OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0))) |
| 22 | export OTMEDIA_HOME=$OTMEDIA_HOME | 22 | export OTMEDIA_HOME=$OTMEDIA_HOME |
| 23 | fi | 23 | fi |
| 24 | 24 | ||
| 25 | # where is ExploitConfidencePass.sh | 25 | # where is ExploitConfidencePass.sh |
| 26 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) | 26 | MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)) |
| 27 | 27 | ||
| 28 | if [ -z ${SCRIPT_PATH} ] | 28 | if [ -z ${SCRIPT_PATH} ] |
| 29 | then | 29 | then |
| 30 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts | 30 | SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts |
| 31 | fi | 31 | fi |
| 32 | 32 | ||
| 33 | # Include scripts | 33 | # Include scripts |
| 34 | . $SCRIPT_PATH"/Tools.sh" | 34 | . $SCRIPT_PATH"/Tools.sh" |
| 35 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" | 35 | . $SCRIPT_PATH"/CheckExploitConfPass.sh" |
| 36 | 36 | ||
| 37 | # where is ExploitConfidencePass.cfg | 37 | # where is ExploitConfidencePass.cfg |
| 38 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" | 38 | EXPLOITCONFIDENCEPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ExploitConfidencePass.cfg" |
| 39 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] | 39 | if [ -e $EXPLOITCONFIDENCEPASS_CONFIG_FILE ] |
| 40 | then | 40 | then |
| 41 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE | 41 | . $EXPLOITCONFIDENCEPASS_CONFIG_FILE |
| 42 | else | 42 | else |
| 43 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 | 43 | echo "ERROR : Can't find configuration file $EXPLOITCONFIDENCEPASS_CONFIG_FILE" >&2 |
| 44 | exit 1 | 44 | exit 1 |
| 45 | fi | 45 | fi |
| 46 | 46 | ||
| 47 | #---------------# | 47 | #---------------# |
| 48 | # Parse Options # | 48 | # Parse Options # |
| 49 | #---------------# | 49 | #---------------# |
| 50 | while getopts ":hDv:cf:r" opt | 50 | while getopts ":hDv:cf:r" opt |
| 51 | do | 51 | do |
| 52 | case $opt in | 52 | case $opt in |
| 53 | h) | 53 | h) |
| 54 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" | 54 | echo -e "$0 [OPTIONS] <INPUT_DIRECTORY>\n" |
| 55 | echo -e "\t Options:" | 55 | echo -e "\t Options:" |
| 56 | echo -e "\t\t-h :\tprint this message" | 56 | echo -e "\t\t-h :\tprint this message" |
| 57 | echo -e "\t\t-D :\tDEBUG mode on" | 57 | echo -e "\t\t-D :\tDEBUG mode on" |
| 58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" | 58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
| 59 | echo -e "\t\t-c :\tCheck process, stop if error detected" | 59 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
| 60 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | 60 | echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" |
| 61 | echo -e "\t\t-r n :\tforce rerun without deleting files" | 61 | echo -e "\t\t-r n :\tforce rerun without deleting files" |
| 62 | exit 1 | 62 | exit 1 |
| 63 | ;; | 63 | ;; |
| 64 | D) | 64 | D) |
| 65 | DEBUG=1 | 65 | DEBUG=1 |
| 66 | ;; | 66 | ;; |
| 67 | v) | 67 | v) |
| 68 | VERBOSE=$OPTARG | 68 | VERBOSE=$OPTARG |
| 69 | ;; | 69 | ;; |
| 70 | c) | 70 | c) |
| 71 | CHECK=1 | 71 | CHECK=1 |
| 72 | ;; | 72 | ;; |
| 73 | f) | 73 | f) |
| 74 | FORKS="--forks $OPTARG" | 74 | FORKS="--forks $OPTARG" |
| 75 | ;; | 75 | ;; |
| 76 | r) | 76 | r) |
| 77 | RERUN=1 | 77 | RERUN=1 |
| 78 | ;; | 78 | ;; |
| 79 | :) | 79 | :) |
| 80 | echo "Option -$OPTARG requires an argument." >&2 | 80 | echo "Option -$OPTARG requires an argument." >&2 |
| 81 | exit 1 | 81 | exit 1 |
| 82 | ;; | 82 | ;; |
| 83 | \?) | 83 | \?) |
| 84 | echo "BAD USAGE : unknow opton -$OPTARG" | 84 | echo "BAD USAGE : unknow opton -$OPTARG" |
| 85 | #exit 1 | 85 | #exit 1 |
| 86 | ;; | 86 | ;; |
| 87 | esac | 87 | esac |
| 88 | done | 88 | done |
| 89 | 89 | ||
| 90 | # mode debug enable | 90 | # mode debug enable |
| 91 | if [ $DEBUG -eq 1 ] | 91 | if [ $DEBUG -eq 1 ] |
| 92 | then | 92 | then |
| 93 | set -x | 93 | set -x |
| 94 | echo -e "## Mode DEBUG ON ##" | 94 | echo -e "## Mode DEBUG ON ##" |
| 95 | fi | 95 | fi |
| 96 | 96 | ||
| 97 | # mode verbose enable | 97 | # mode verbose enable |
| 98 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi | 98 | if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi |
| 99 | 99 | ||
| 100 | # Check USAGE by arguments number | 100 | # Check USAGE by arguments number |
| 101 | if [ $(($#-($OPTIND-1))) -ne 1 ] | 101 | if [ $(($#-($OPTIND-1))) -ne 1 ] |
| 102 | then | 102 | then |
| 103 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" | 103 | echo "BAD USAGE : ExploitConfidencePass.sh [OPTIONS] <INPUT_DIRECTORY>" |
| 104 | echo "$0 -h for more info" | 104 | echo "$0 -h for more info" |
| 105 | exit 1 | 105 | exit 1 |
| 106 | fi | 106 | fi |
| 107 | 107 | ||
| 108 | shift $((OPTIND-1)) | 108 | shift $((OPTIND-1)) |
| 109 | # check input directory - first argument | 109 | # check input directory - first argument |
| 110 | if [ ! -e $1 ] | 110 | if [ ! -e $1 ] |
| 111 | then | 111 | then |
| 112 | print_error "can't open $1" | 112 | print_error "can't open $1" |
| 113 | exit 1 | 113 | exit 1 |
| 114 | fi | 114 | fi |
| 115 | 115 | ||
| 116 | print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1 | 116 | print_info "[${BASENAME}] => ExploitConfPass start | $(date +'%d/%m/%y %H:%M:%S')" 1 |
| 117 | 117 | ||
| 118 | #-------------# | 118 | #-------------# |
| 119 | # GLOBAL VARS # | 119 | # GLOBAL VARS # |
| 120 | #-------------# | 120 | #-------------# |
| 121 | INPUT_DIR=$(readlink -e $1) | 121 | INPUT_DIR=$(readlink -e $1) |
| 122 | OUTPUT_DIR=$INPUT_DIR | 122 | OUTPUT_DIR=$INPUT_DIR |
| 123 | BASENAME=$(basename $OUTPUT_DIR) | 123 | BASENAME=$(basename $OUTPUT_DIR) |
| 124 | SHOW_DIR="$OUTPUT_DIR/shows/" | 124 | SHOW_DIR="$OUTPUT_DIR/shows/" |
| 125 | SOLR_RES="$OUTPUT_DIR/solr/" | 125 | SOLR_RES="$OUTPUT_DIR/solr/" |
| 126 | EXT_LEX="$OUTPUT_DIR/LEX/" | 126 | EXT_LEX="$OUTPUT_DIR/LEX/" |
| 127 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" | 127 | TRIGGER_CONFZONE="$OUTPUT_DIR/trigg/" |
| 128 | LOGFILE="$OUTPUT_DIR/info_exploitconf.log" | 128 | LOGFILE="$OUTPUT_DIR/info_exploitconf.log" |
| 129 | ERRORFILE="$OUTPUT_DIR/error_exploitconf.log" | 129 | ERRORFILE="$OUTPUT_DIR/error_exploitconf.log" |
| 130 | 130 | ||
| 131 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" | 131 | CONFPASS_CONFIG_FILE="$(readlink -e $1)/ConfPass.cfg" |
| 132 | if [ -e $CONFPASS_CONFIG_FILE ] | 132 | if [ -e $CONFPASS_CONFIG_FILE ] |
| 133 | then | 133 | then |
| 134 | { | 134 | { |
| 135 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") | 135 | RES_CONF_DIR=$(cat $CONFPASS_CONFIG_FILE | grep "^RES_CONF_DIR=" | cut -f2 -d"=") |
| 136 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") | 136 | RES_CONF=$(cat $CONFPASS_CONFIG_FILE | grep "^CONF_DIR=" | cut -f2 -d"=") |
| 137 | print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 | 137 | print_info "[${BASENAME}] Use confidence measure from : $RES_CONF" 2 |
| 138 | } | 138 | } |
| 139 | else | 139 | else |
| 140 | { | 140 | { |
| 141 | print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" | 141 | print_error "[${BASENAME}] Can't find $CONFPASS_CONFIG_FILE" |
| 142 | print_error "[${BASENAME}] -> use res_p2" | 142 | print_error "[${BASENAME}] -> use res_p2" |
| 143 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" | 143 | RES_CONF_DIR="$INPUT_DIR/conf/res_p2/scored_ctm" |
| 144 | RES_CONF="$INPUT_DIR/conf/res_p2" | 144 | RES_CONF="$INPUT_DIR/conf/res_p2" |
| 145 | } | 145 | } |
| 146 | fi | 146 | fi |
| 147 | 147 | ||
| 148 | mkdir -p $SHOW_DIR > /dev/null 2>&1 | 148 | mkdir -p $SHOW_DIR > /dev/null 2>&1 |
| 149 | mkdir -p $SOLR_RES > /dev/null 2>&1 | 149 | mkdir -p $SOLR_RES > /dev/null 2>&1 |
| 150 | mkdir -p $EXT_LEX > /dev/null 2>&1 | 150 | mkdir -p $EXT_LEX > /dev/null 2>&1 |
| 151 | mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 | 151 | mkdir -p $TRIGGER_CONFZONE > /dev/null 2>&1 |
| 152 | 152 | ||
| 153 | #------------------# | 153 | #------------------# |
| 154 | # Create Workspace # | 154 | # Create Workspace # |
| 155 | #------------------# | 155 | #------------------# |
| 156 | # Lock directory | 156 | # Lock directory |
| 157 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] | 157 | if [ -e "$OUTPUT_DIR_BASENAME/EXPLOITCONFPASS.lock" ] && [ $RERUN -eq 0 ] |
| 158 | then | 158 | then |
| 159 | print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 | 159 | print_warn "[${BASENAME}] ExploitConfidencePass is locked -> exit" 2 |
| 160 | exit 1 | 160 | exit 1 |
| 161 | fi | 161 | fi |
| 162 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 | 162 | rm "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" > /dev/null 2>&1 |
| 163 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 | 163 | touch "$OUTPUT_DIR/EXPLOITCONFPASS.lock" > /dev/null 2>&1 |
| 164 | 164 | ||
| 165 | #------# | 165 | #------# |
| 166 | # Save # | 166 | # Save # |
| 167 | #------# | 167 | #------# |
| 168 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg | 168 | cp $EXPLOITCONFIDENCEPASS_CONFIG_FILE $OUTPUT_DIR/ExploitConfPass.cfg |
| 169 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg | 169 | echo "TRIGGER_DIR=$TRIGGER_CONFZONE" >> $OUTPUT_DIR/ExploitConfPass.cfg |
| 170 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg | 170 | echo "TRIGGER_SPEERAL=$TRIGGER_CONFZONE/speeral/" >> $OUTPUT_DIR/ExploitConfPass.cfg |
| 171 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg | 171 | echo "LEX_SPEERAL=$EXT_LEX/speeral/${lexname}_ext" >> $OUTPUT_DIR/ExploitConfPass.cfg |
| 172 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg | 172 | echo "LEX_BINODE_SPEERAL=$EXT_LEX/speeral/${lexname}_ext.bin" >> $OUTPUT_DIR/ExploitConfPass.cfg |
| 173 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 | 173 | print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/ExploitConfPass.cfg" 1 |
| 174 | 174 | ||
| 175 | #---------------# | 175 | #---------------# |
| 176 | # Check Pass # | 176 | # Check Pass # |
| 177 | #---------------# | 177 | #---------------# |
| 178 | if [ $( ${RES_CONf_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ] | 178 | if [ $( ${RES_CONf_DIR}/*.res 2> /dev/null | wc -l) -eq 0 ] |
| 179 | then | 179 | then |
| 180 | print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass" | 180 | print_error "[${BASENAME}] No Conf Pass res -> exit ExploitConfPass" |
| 181 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in $[RES_CONf_DIR}" ;fi | 181 | if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No ConfPass res in ${RES_CONf_DIR}" ;fi |
| 182 | exit 1 | 182 | exit 1 |
| 183 | fi | 183 | fi |
| 184 | 184 | ||
| 185 | #-----------------------# | 185 | #-----------------------# |
| 186 | # Segmentation by show # | 186 | # Segmentation by show # |
| 187 | #-----------------------# | 187 | #-----------------------# |
| 188 | # create txt file from scored res | 188 | # create txt file from scored res |
| 189 | # tag pos and lemmatization of the txt file | 189 | # tag pos and lemmatization of the txt file |
| 190 | # merge the scored res and taglem file | 190 | # merge the scored res and taglem file |
| 191 | # segment using the last generated file | 191 | # segment using the last generated file |
| 192 | # and create a ctm file by show | 192 | # and create a ctm file by show |
| 193 | 193 | ||
| 194 | print_info "[${BASENAME}] Segmentation by show" 1 | 194 | print_info "[${BASENAME}] Segmentation by show" 1 |
| 195 | 195 | ||
| 196 | # -> to txt | 196 | # -> to txt |
| 197 | print_info "[${BASENAME}] Create txt from scored res" 3 | 197 | print_info "[${BASENAME}] Create txt from scored res" 3 |
| 198 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm | 198 | cat ${RES_CONF_DIR}/*.res > $INPUT_DIR/$BASENAME.sctm |
| 199 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp | 199 | cat $INPUT_DIR/$BASENAME.seg | $SIGMUND_BIN/myConvert.pl $INPUT_DIR/$BASENAME.sctm $INPUT_DIR/$BASENAME.tmp |
| 200 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt | 200 | cat $INPUT_DIR/$BASENAME.tmp | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | sed -e "s/_/ /g" | sort -nt 'n' -k '2' > $INPUT_DIR/$BASENAME.txt |
| 201 | 201 | ||
| 202 | # -> to tagger + lemme | 202 | # -> to tagger + lemme |
| 203 | print_info "[${BASENAME}] Tag pos and lem in txt file" 3 | 203 | print_info "[${BASENAME}] Tag pos and lem in txt file" 3 |
| 204 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp | 204 | iconv -t ISO_8859-1 $INPUT_DIR/$BASENAME.txt > $INPUT_DIR/$BASENAME.tmp |
| 205 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem | 205 | $SIGMUND_BIN/txt2lem.sh $INPUT_DIR/$BASENAME.tmp $INPUT_DIR/$BASENAME.taglem |
| 206 | 206 | ||
| 207 | # merge sctm and taglem | 207 | # merge sctm and taglem |
| 208 | print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3 | 208 | print_info "[${BASENAME}] Merge scored ctm with tag pos and lem file" 3 |
| 209 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl | 209 | cat $INPUT_DIR/$BASENAME.sctm | $SCRIPT_PATH/BdlexUC.pl ${RULES}/basic -f | iconv -t ISO_8859-1 | $SCRIPT_PATH/scoredCtmAndTaggedLem2All.pl $INPUT_DIR/$BASENAME.taglem > $INPUT_DIR/$BASENAME.ctl |
| 210 | 210 | ||
| 211 | # -> new seg | 211 | # -> new seg |
| 212 | print_info "[${BASENAME}] Create xml file and run Topic Seg" 3 | 212 | print_info "[${BASENAME}] Create xml file and run Topic Seg" 3 |
| 213 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml | 213 | $SIGMUND_BIN/tagLem2xml.pl $INPUT_DIR/$BASENAME.taglem $INPUT_DIR/$BASENAME.doc.xml |
| 214 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem | 214 | rm $INPUT_DIR/$BASENAME.tmp #$INPUT_DIR/$BASENAME.taglem |
| 215 | 215 | ||
| 216 | # Lia_topic_seg : bring together sentences into show | 216 | # Lia_topic_seg : bring together sentences into show |
| 217 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml | 217 | cp $INPUT_DIR/$BASENAME.doc.xml 0.xml |
| 218 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg | 218 | java -cp $LIATOPICSEG/bin Test > $INPUT_DIR/show.seg |
| 219 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg | 219 | cat $INPUT_DIR/show.seg | $SIGMUND_BIN/toSegEmiss.pl $INPUT_DIR/$BASENAME.show.seg |
| 220 | rm 0.xml $INPUT_DIR/show.seg | 220 | rm 0.xml $INPUT_DIR/show.seg |
| 221 | 221 | ||
| 222 | if [ $CHECK -eq 1 ] | 222 | if [ $CHECK -eq 1 ] |
| 223 | then | 223 | then |
| 224 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ] | 224 | if [ ! -s $INPUT_DIR/$BASENAME.show.seg ] |
| 225 | then | 225 | then |
| 226 | print_error "[${BASENAME}] No Topic segmentation ! " | 226 | print_error "[${BASENAME}] No Topic segmentation ! " |
| 227 | print_error "[${BASENAME}] Check $ERRORFILE " | 227 | print_error "[${BASENAME}] Check $ERRORFILE " |
| 228 | print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg" | 228 | print_log_file "$ERRORFILE" "No Topic segmentation in ${BASENAME}.show.seg" |
| 229 | fi | 229 | fi |
| 230 | fi | 230 | fi |
| 231 | 231 | ||
| 232 | # Segment ctm into several show files and create a seg list by show | 232 | # Segment ctm into several show files and create a seg list by show |
| 233 | print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1 | 233 | print_info "[${BASENAME}] Segment ctm into show files and a seg list by show" 1 |
| 234 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR | 234 | $SCRIPT_PATH/ctm2show.pl $INPUT_DIR/$BASENAME.ctl $INPUT_DIR/$BASENAME.show.seg $SHOW_DIR |
| 235 | 235 | ||
| 236 | #-----------------------------------------------------------# | 236 | #-----------------------------------------------------------# |
| 237 | # SOLR QUERIES # | 237 | # SOLR QUERIES # |
| 238 | # -> Create Confidente Word # | 238 | # -> Create Confidente Word # |
| 239 | # Keep conf words and use Tags # | 239 | # Keep conf words and use Tags # |
| 240 | # -> Query SOLR (document & multimedia) # | 240 | # -> Query SOLR (document & multimedia) # |
| 241 | # concat word + add date 2 day before and after the show # | 241 | # concat word + add date 2 day before and after the show # |
| 242 | # query document & multimedia # | 242 | # query document & multimedia # |
| 243 | #-----------------------------------------------------------# | 243 | #-----------------------------------------------------------# |
| 244 | print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1 | 244 | print_info "[${BASENAME}] Create SOLR queries and ask SOLR" 1 |
| 245 | for show in $(ls $SHOW_DIR/*.ctm) | 245 | for show in $(ls $SHOW_DIR/*.ctm) |
| 246 | do | 246 | do |
| 247 | bn=$(basename $show .ctm) | 247 | bn=$(basename $show .ctm) |
| 248 | # Remove words with low confidence and keep useful tagger words | 248 | # Remove words with low confidence and keep useful tagger words |
| 249 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" | 249 | cat $show | $SCRIPT_PATH/KeepConfZone.pl | grep -e "MOTINC\|NMS\|NMP\|NFS\|NFP\|X[A-Z]{3,5}" | cut -f3 -d' ' > "$SHOW_DIR/$bn.confzone" |
| 250 | # Get date 2 day before and after the show | 250 | # Get date 2 day before and after the show |
| 251 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` | 251 | datePattern=`$SCRIPT_PATH/daybefore2after.sh $(echo $BASENAME | cut -c1-6)` |
| 252 | # Create SOLR queries | 252 | # Create SOLR queries |
| 253 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" | 253 | cat $SHOW_DIR/$bn".confzone" | $SCRIPT_PATH/GenerateSOLRQueries.pl | iconv -f ISO_8859-1 -t UTF-8 > "$SHOW_DIR/$bn.queries" |
| 254 | # Ask SOLR DB | 254 | # Ask SOLR DB |
| 255 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then | 255 | if [ $(wc -w "$SHOW_DIR/$bn.queries" | cut -f1 -d' ') -gt 0 ]; then |
| 256 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" | 256 | query=$(cat $SHOW_DIR/$bn.queries)"&fq=docDate:[$datePattern]" |
| 257 | echo $query > $SHOW_DIR/$bn.queries | 257 | echo $query > $SHOW_DIR/$bn.queries |
| 258 | prnt_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3 | 258 | prnt_info "python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp" 3 |
| 259 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp | 259 | python $SCRIPT_PATH/ProcessSOLRQueries.py $SHOW_DIR/$bn.queries $SOLR_RES/$bn.keywords.tmp $SOLR_RES/$bn.txt.tmp |
| 260 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords | 260 | cat $SOLR_RES/$bn.keywords.tmp | sort -u > $SOLR_RES/$bn.keywords |
| 261 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt | 261 | cat $SOLR_RES/$bn.txt.tmp | sort -u > $SOLR_RES/$bn.txt |
| 262 | rm $SOLR_RES/*.tmp > /dev/null 2>&1 | 262 | rm $SOLR_RES/*.tmp > /dev/null 2>&1 |
| 263 | fi | 263 | fi |
| 264 | 264 | ||
| 265 | if [ $CHECK -eq 1 ] | 265 | if [ $CHECK -eq 1 ] |
| 266 | then | 266 | then |
| 267 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] | 267 | if [ ! -e $SOLR_RES/$bn.keywords ] || [ ! -e $SOLR_RES/$bn.txt ] |
| 268 | then | 268 | then |
| 269 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2 | 269 | print_warn "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" 2 |
| 270 | print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" | 270 | print_log_file "$LOGFILE" "$bn.keywords and $bn.txt are empty !\nMaybe SOLR server is down !" |
| 271 | fi | 271 | fi |
| 272 | fi | 272 | fi |
| 273 | 273 | ||
| 274 | done | 274 | done |
| 275 | 275 | ||
| 276 | #----------------------------------------------------------------------------------------------- | 276 | #----------------------------------------------------------------------------------------------- |
| 277 | # Build trigger file | 277 | # Build trigger file |
| 278 | # 1) keywords are automatically boosted in the non confident zone of the current res | 278 | # 1) keywords are automatically boosted in the non confident zone of the current res |
| 279 | # confident zone are boosted | 279 | # confident zone are boosted |
| 280 | # previous words in sensible zone are penalized | 280 | # previous words in sensible zone are penalized |
| 281 | # 2) OOVs are extracted + phonetized | 281 | # 2) OOVs are extracted + phonetized |
| 282 | # 3) Try to find OOVs acousticly in the current segment | 282 | # 3) Try to find OOVs acousticly in the current segment |
| 283 | # 4) Generate the .trigg file | 283 | # 4) Generate the .trigg file |
| 284 | #------------------------------------------------------------------------------------------------ | 284 | #------------------------------------------------------------------------------------------------ |
| 285 | print_info "[${BASENAME}] Build trigger files" 1 | 285 | print_info "[${BASENAME}] Build trigger files" 1 |
| 286 | for i in `ls $SOLR_RES/*.keywords` | 286 | for i in `ls $SOLR_RES/*.keywords` |
| 287 | do | 287 | do |
| 288 | basename=`basename $i .keywords` | 288 | basename=`basename $i .keywords` |
| 289 | 289 | ||
| 290 | # | 290 | # |
| 291 | # Tokenize & produce coverage report | 291 | # Tokenize & produce coverage report |
| 292 | # Use filter you need | 292 | # Use filter you need |
| 293 | # | 293 | # |
| 294 | print_info "[${BASENAME}] keywords filtering and produce coverage report" 3 | 294 | print_info "[${BASENAME}] keywords filtering and produce coverage report" 3 |
| 295 | # Default filter | 295 | # Default filter |
| 296 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ | 296 | cat $i | $SCRIPT_PATH/CleanFilter.sh | ${SCRIPT_PATH}/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t |\ |
| 297 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 297 | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
| 298 | # do less filter | 298 | # do less filter |
| 299 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok | 299 | #cat $i | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex | $SCRIPT_PATH/CoverageReportMaker.pl --out $SOLR_RES/${basename}_tmp_report $LEXICON.bdlex_tok |
| 300 | 300 | ||
| 301 | 301 | ||
| 302 | # | 302 | # |
| 303 | # Extract "real" OOV and phonetize them | 303 | # Extract "real" OOV and phonetize them |
| 304 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits | 304 | # -> petit filtrage persoo pour eviter d'avoir trop de bruits |
| 305 | # | 305 | # |
| 306 | print_info "[${BASENAME}] Extract OOV and phonetize them" 3 | 306 | print_info "[${BASENAME}] Extract OOV and phonetize them" 3 |
| 307 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov | 307 | ${SCRIPT_PATH}/FindNormRules.pl $SOLR_RES/${basename}_tmp_report/report.oov $LEXICON.bdlex_tok | cut -f3 | grep -v "#" | grep -v "^[A-Z]\+$" | grep -v "^[0-9]" | grep --perl-regex -v "^([a-z']){1,3}$" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -f | iconv -t ISO_8859-1 -f UTF-8 | ${LIA_LTBOX}/lia_phon/script/lia_lex2phon_variante | grep -v "core dumped" | cut -d"[" -f1 | sort -u | ${SCRIPT_PATH}/PhonFormatter.pl | iconv -f ISO_8859-1 -t UTF-8 | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $SOLR_RES/${basename}.phon_oov |
| 308 | 308 | ||
| 309 | # | 309 | # |
| 310 | # Search INVOC & OOV in the current lattice | 310 | # Search INVOC & OOV in the current lattice |
| 311 | # | 311 | # |
| 312 | print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3 | 312 | print_info "[${BASENAME}] Search INVOC and OOV in the current lattice" 3 |
| 313 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch | 313 | cat $SOLR_RES/${basename}_tmp_report/report.invoc | grep -v "\b0" | cut -f1 | grep -v --perl-regex -v "^[a-zA-Z']{1,3}$" | grep -v --perl-regex "^[a-zA-Z0-9]{1,3}$" | grep -v "<s>" | grep -v "</s>" | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $TRIGGER_CONFZONE/$basename.tosearch |
| 314 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch | 314 | cat $SOLR_RES/${basename}.phon_oov | cut -f1 >> $TRIGGER_CONFZONE/$basename.tosearch |
| 315 | 315 | ||
| 316 | # For each treil | 316 | # For each treil |
| 317 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 317 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
| 318 | do | 318 | do |
| 319 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION | 319 | $OTMEDIA_HOME/tools/QUOTE_FINDER/bin/acousticFinder ${LEXICON}.speer_phon $RES_CONF/wlat/$baseseg.wlat $TRIGGER_CONFZONE/${basename}.tosearch $SOLR_RES/$basename.phon_oov > $TRIGGER_CONFZONE/$baseseg.acousticlyfound $OUTPUT_REDIRECTION |
| 320 | # | 320 | # |
| 321 | # Produce the boost file for the next decoding pass | 321 | # Produce the boost file for the next decoding pass |
| 322 | # | 322 | # |
| 323 | print_info "[${BASENAME}] Produce trigg file : $baseseg " 3 | 323 | print_info "[${BASENAME}] Produce trigg file : $baseseg " 3 |
| 324 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg | 324 | cat $RES_CONF_DIR/$baseseg.res | $SCRIPT_PATH/ScoreCtm2trigg.pl $TRIGGER_CONFZONE/$baseseg.acousticlyfound > $TRIGGER_CONFZONE/$baseseg.trigg |
| 325 | done | 325 | done |
| 326 | 326 | ||
| 327 | done | 327 | done |
| 328 | 328 | ||
| 329 | #----------------------------------------------------------------------------------------------- | 329 | #----------------------------------------------------------------------------------------------- |
| 330 | # Build the extended SPEERAL Lexicon | 330 | # Build the extended SPEERAL Lexicon |
| 331 | # 1) Merge OOVs + LEXICON | 331 | # 1) Merge OOVs + LEXICON |
| 332 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) | 332 | # 1) Related text are collected in order to find the invoc word with maximizing the ppl (LM proba) |
| 333 | # 2) The current lexicon is extended with all the valid OOVs | 333 | # 2) The current lexicon is extended with all the valid OOVs |
| 334 | #----------------------------------------------------------------------------------------------- | 334 | #----------------------------------------------------------------------------------------------- |
| 335 | print_info "[${BASENAME}] Build extended Speeral Lexicon" 1 | 335 | print_info "[${BASENAME}] Build extended Speeral Lexicon" 1 |
| 336 | mkdir -p $EXT_LEX/final | 336 | mkdir -p $EXT_LEX/final |
| 337 | mkdir -p $EXT_LEX/tmp | 337 | mkdir -p $EXT_LEX/tmp |
| 338 | mkdir -p $EXT_LEX/tmp/txt | 338 | mkdir -p $EXT_LEX/tmp/txt |
| 339 | # | 339 | # |
| 340 | # Collect the acousticly found oov and their phonetisation | 340 | # Collect the acousticly found oov and their phonetisation |
| 341 | # | 341 | # |
| 342 | print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3 | 342 | print_info "[${BASENAME}] Get all OOV and retrieve all phonetisation" 3 |
| 343 | for i in `ls $SOLR_RES/*.phon_oov` | 343 | for i in `ls $SOLR_RES/*.phon_oov` |
| 344 | do | 344 | do |
| 345 | basename=`basename $i .phon_oov` | 345 | basename=`basename $i .phon_oov` |
| 346 | 346 | ||
| 347 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null | 347 | rm $EXT_LEX/$basename.acousticlyfound 2> /dev/null |
| 348 | # list acousticly found for the show | 348 | # list acousticly found for the show |
| 349 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") | 349 | for baseseg in $(cat "$SHOW_DIR/$basename.lst") |
| 350 | do | 350 | do |
| 351 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound | 351 | cat $TRIGGER_CONFZONE/$baseseg.acousticlyfound | cut -f1 | cut -f2 -d"=" >> $EXT_LEX/$basename.acousticlyfound |
| 352 | done | 352 | done |
| 353 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp | 353 | cat $EXT_LEX/$basename.acousticlyfound | sort -u > $EXT_LEX/.tmp |
| 354 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound | 354 | mv $EXT_LEX/.tmp $EXT_LEX/$basename.acousticlyfound |
| 355 | 355 | ||
| 356 | # | 356 | # |
| 357 | # Extract OOV really added | 357 | # Extract OOV really added |
| 358 | # | 358 | # |
| 359 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov | 359 | cat $SOLR_RES/$basename.phon_oov | cut -f1 | sort -u > $EXT_LEX/$basename.oov |
| 360 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound | 360 | $SCRIPT_PATH/intersec.pl $EXT_LEX/$basename.oov $EXT_LEX/$basename.acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound |
| 361 | # | 361 | # |
| 362 | # Retrieve all phonetisation | 362 | # Retrieve all phonetisation |
| 363 | # | 363 | # |
| 364 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon | 364 | cat $SOLR_RES/${basename}.phon_oov | $SCRIPT_PATH/LexPhonFilter.pl $EXT_LEX/$basename.oov_acousticlyfound > $EXT_LEX/$basename.oov_acousticlyfound_phon |
| 365 | done | 365 | done |
| 366 | 366 | ||
| 367 | # | 367 | # |
| 368 | # Merge OOVs and their phonetisation | 368 | # Merge OOVs and their phonetisation |
| 369 | # | 369 | # |
| 370 | print_info "[${BASENAME}] Merge OOV and their phonetisation" 3 | 370 | print_info "[${BASENAME}] Merge OOV and their phonetisation" 3 |
| 371 | lexname=$(basename $LEXICON) | 371 | lexname=$(basename $LEXICON) |
| 372 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon | 372 | cat $EXT_LEX/*.oov_acousticlyfound_phon | sort -u > $EXT_LEX/final/all.oov_acousticlyfound_phon |
| 373 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound | 373 | cat $EXT_LEX/*.oov_acousticlyfound | sort -u | grep --perl-regex -v "^([a-z']){3}$" > $EXT_LEX/final/all.oov_acousticlyfound |
| 374 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon | 374 | $SCRIPT_PATH/MergeLexicon.pl $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/final/${lexname}_ext.phon |
| 375 | 375 | ||
| 376 | # | 376 | # |
| 377 | # Collect + clean retrieved txt | 377 | # Collect + clean retrieved txt |
| 378 | # | 378 | # |
| 379 | print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2 | 379 | print_info "[${BASENAME}] Collect and clean SOLR txt answers" 2 |
| 380 | # choose filter | 380 | # choose filter |
| 381 | # default | 381 | # default |
| 382 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt | 382 | cat $SOLR_RES/*.txt | $SCRIPT_PATH/CleanFilter.sh | $SCRIPT_PATH/ApplyCorrectionRules.pl ${LEXICON}.regex | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t > $EXT_LEX/final/all.bdlex_txt |
| 383 | # low filter | 383 | # low filter |
| 384 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt | 384 | #cat $SOLR_RES/*.txt | $SCRIPT_PATH/BdlexUC.pl $RULES/basic -t | sed -f $RULES/preprocess.regex | sed -f $RULES/lastprocess.regex > $EXT_LEX/final/all.bdlex_txt |
| 385 | 385 | ||
| 386 | # | 386 | # |
| 387 | # Construct the map file | 387 | # Construct the map file |
| 388 | # | 388 | # |
| 389 | # Notes: | 389 | # Notes: |
| 390 | # - Expected format : | 390 | # - Expected format : |
| 391 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> | 391 | # <WORD1_STRING> <CANDIDATE1_STRING> <PHON_1> |
| 392 | # | 392 | # |
| 393 | print_info "[${BASENAME}] Construct map file" 3 | 393 | print_info "[${BASENAME}] Construct map file" 3 |
| 394 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null | 394 | rm -f $EXT_LEX/final/${lexname}_ext.map 2>/dev/null |
| 395 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null | 395 | rm -f $EXT_LEX/final/${lexname}.unvalid_oov 2>/dev/null |
| 396 | 396 | ||
| 397 | while read oov | 397 | while read oov |
| 398 | do | 398 | do |
| 399 | oov=`echo $oov | sed "s/\n//g"` | 399 | oov=`echo $oov | sed "s/\n//g"` |
| 400 | # | 400 | # |
| 401 | # Obtain the oov's tag | 401 | # Obtain the oov's tag |
| 402 | # | 402 | # |
| 403 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` | 403 | #oov_tag=`grep --perl-regex "^$oov\t" $DYNAMIC_TAGSTATS/all.tags | cut -f2` |
| 404 | # | 404 | # |
| 405 | # Try to collect text containing the oov word | 405 | # Try to collect text containing the oov word |
| 406 | # | 406 | # |
| 407 | print_info "[${BASENAME}] Collect text containing the oov" 3 | 407 | print_info "[${BASENAME}] Collect text containing the oov" 3 |
| 408 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt | 408 | cat $EXT_LEX/final/all.bdlex_txt | grep --perl-regex " $oov " | $SCRIPT_PATH/NbMaxWordsFilter.pl 40 |uniq > $EXT_LEX/tmp/txt/$oov.bdlex_txt |
| 409 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then | 409 | if [ -f $EXT_LEX/tmp/txt/$oov.bdlex_txt ]; then |
| 410 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 410 | nbWords=`wc -l $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
| 411 | if [ $nbWords -eq 0 ]; then | 411 | if [ $nbWords -eq 0 ]; then |
| 412 | print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2 | 412 | print_warn "[${BASENAME}] UNVALID OOV: $oov => $nbWords occurrences" 2 |
| 413 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 413 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
| 414 | else | 414 | else |
| 415 | # | 415 | # |
| 416 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected | 416 | # Find a candidate in a filtred invoc lexicon => a candidate which maximize the ppl in the overall txt collected |
| 417 | # | 417 | # |
| 418 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" | 418 | #echo "$/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $LEXICON.bdlex_tok $EXT_LEX/tmp/txt/$oov.bdlex_txt" |
| 419 | print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3 | 419 | print_info `$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` 3 |
| 420 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` | 420 | candidate=`$SPEERAL_PATH/bin/getCandidate $SPEER_LM_PATH $SPEER_LM_BASENAME $oov $CANDIDATE_LEXICON $EXT_LEX/tmp/txt/$oov.bdlex_txt | cut -f1 -d" "` |
| 421 | if [ ! "$candidate" == "" ]; then | 421 | if [ ! "$candidate" == "" ]; then |
| 422 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon | 422 | grep --perl-regex "^$oov\t" $EXT_LEX/final/all.oov_acousticlyfound_phon > $EXT_LEX/tmp/$oov.phon |
| 423 | while read phonLine | 423 | while read phonLine |
| 424 | do | 424 | do |
| 425 | #<word> <phon> => <word> <candidate> <phon> | 425 | #<word> <phon> => <word> <candidate> <phon> |
| 426 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map | 426 | echo "$phonLine" | sed "s|\t|\t$candidate\t|" >> $EXT_LEX/final/${lexname}_ext.map |
| 427 | done < $EXT_LEX/tmp/$oov.phon | 427 | done < $EXT_LEX/tmp/$oov.phon |
| 428 | else | 428 | else |
| 429 | print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2 | 429 | print_warn "[${BASENAME}] UNVALID OOV: $oov => no availaible Candidate word in LM" 2 |
| 430 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 430 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
| 431 | fi | 431 | fi |
| 432 | fi | 432 | fi |
| 433 | else | 433 | else |
| 434 | print_warn "[${BASENAME}] UNVALID OOV: $oov" 2 | 434 | print_warn "[${BASENAME}] UNVALID OOV: $oov" 2 |
| 435 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov | 435 | echo "$oov" >> $EXT_LEX/final/${lexname}.unvalid_oov |
| 436 | fi | 436 | fi |
| 437 | done < $EXT_LEX/final/all.oov_acousticlyfound | 437 | done < $EXT_LEX/final/all.oov_acousticlyfound |
| 438 | 438 | ||
| 439 | # | 439 | # |
| 440 | ### Speeral | 440 | ### Speeral |
| 441 | # | 441 | # |
| 442 | 442 | ||
| 443 | lexname=`basename $LEXICON` | 443 | lexname=`basename $LEXICON` |
| 444 | # | 444 | # |
| 445 | # Build the final trigger file | 445 | # Build the final trigger file |
| 446 | # | 446 | # |
| 447 | print_info "[${BASENAME}] Clean trigg files" 3 | 447 | print_info "[${BASENAME}] Clean trigg files" 3 |
| 448 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null | 448 | mkdir -p $TRIGGER_CONFZONE/speeral/ 2> /dev/null |
| 449 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null | 449 | mkdir -p $EXT_LEX/speeral/ 2> /dev/null |
| 450 | for i in `ls $TRIGGER_CONFZONE/*.trigg` | 450 | for i in `ls $TRIGGER_CONFZONE/*.trigg` |
| 451 | do | 451 | do |
| 452 | basename=`basename $i .trigg` | 452 | basename=`basename $i .trigg` |
| 453 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg | 453 | cat $i | $SCRIPT_PATH/RemoveLineContaining.pl $EXT_LEX/$lexname.unvalid_oov > $TRIGGER_CONFZONE/speeral/$basename.trigg |
| 454 | done | 454 | done |
| 455 | # | 455 | # |
| 456 | # Compile the speeral extended lexicon | 456 | # Compile the speeral extended lexicon |
| 457 | # | 457 | # |
| 458 | print_info "[${BASENAME}] Compile Speeral extended lexicon" 3 | 458 | print_info "[${BASENAME}] Compile Speeral extended lexicon" 3 |
| 459 | print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3 | 459 | print_info "$SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext" 3 |
| 460 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext | 460 | $SPEERAL_PATH/bin/buildmappedbinode $LEXICON.bdlex_phon $EXT_LEX/final/${lexname}_ext.map $AM_SKL $EXT_LEX/speeral/${lexname}_ext |
| 461 | 461 | ||
| 462 | if [ $CHECK -eq 1 ] | 462 | if [ $CHECK -eq 1 ] |
| 463 | then | 463 | then |
| 464 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" | 464 | check_exploitconfpass_lex_check "${EXT_LEX}/speeral/${lexname}_ext" |
| 465 | if [ $? -eq 1 ] | 465 | if [ $? -eq 1 ] |
| 466 | then | 466 | then |
| 467 | print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit" | 467 | print_error "[${BASENAME}] Building Speeral Lexicon $INPUT_DIR -> exit" |
| 468 | print_error "[${BASENAME}] Check $ERRORFILE" | 468 | print_error "[${BASENAME}] Check $ERRORFILE" |
| 469 | print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR" | 469 | print_log_file $ERRORFILE "ERROR : Building Speeral Lexicon $INPUT_DIR" |
| 470 | print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?" | 470 | print_log_file $ERRORFILE "ERROR : ${EXT_LEX}/speeral/${lexname}_ext Empty after buildmappedbinode ?" |
| 471 | exit 1; | 471 | exit 1; |
| 472 | fi | 472 | fi |
| 473 | fi | 473 | fi |
| 474 | 474 | ||
| 475 | 475 | ||
| 476 | #-------# | 476 | #-------# |
| 477 | # CLOSE # | 477 | # CLOSE # |
| 478 | #-------# | 478 | #-------# |
| 479 | # Seem OK | 479 | # Seem OK |
| 480 | print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1 | 480 | print_info "[${BASENAME}] <= ExploitConfidencePass End | $(date +'%d/%m/%y %H:%M:%S')" 1 |
| 481 | 481 | ||
| 482 | # unlok directory | 482 | # unlok directory |
| 483 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" | 483 | mv "$OUTPUT_DIR/EXPLOITCONFPASS.lock" "$OUTPUT_DIR/EXPLOITCONFPASS.unlock" |
| 484 | 484 | ||
| 485 | 485 | ||
| 486 | 486 |