Blame view
egs/gp/s1/steps/make_mfcc.sh
3.33 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
#!/bin/bash -u # Copyright 2012 Arnab Ghoshal # Copyright 2010-2011 Microsoft Corporation # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # To be run from .. (one directory up from here) function error_exit () { echo -e "$@" >&2; exit 1; } function readint () { local retval=${1/#*=/}; # In case --switch=ARG format was used retval=${retval#0*} # Strip any leading 0's [[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \ || error_exit "Argument \"$retval\" not an integer." echo $retval } nj=4 # Default number of jobs qcmd="" # Options for the submit_jobs.sh script sjopts="" # Options for the submit_jobs.sh script PROG=`basename $0`; usage="Usage: $PROG [options] <data-dir> <log-dir> <abs-path-to-mfccdir> Options: --help\t\tPrint this message and exit --num-jobs INT\tNumber of parallel jobs to run (default=$nj). --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches. --sjopts STRING\tOptions for the 'submit_jobs.sh' script "; while [ $# -gt 0 ]; do case "${1# *}" in # ${1# *} strips any leading spaces from the arguments --help) echo -e $usage; exit 0 ;; --num-jobs) shift; nj=`readint $1`; [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; shift ;; --qcmd) shift; qcmd="--qcmd=${1}"; shift ;; --sjopts) shift; sjopts="$1"; shift ;; -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; *) break ;; # end of options: interpreted as the data-dir esac done if [ $# != 3 ]; then error_exit $usage; fi [ -f path.sh ] && . ./path.sh data=$1 logdir=$2 mfccdir=$3 # use "name" as part of name of the archive. name=`basename $data` mkdir -p $mfccdir || exit 1; mkdir -p $logdir || exit 1; scp=$data/wav.scp config=conf/mfcc.conf required="$scp $config" for f in $required; do if [ ! -f $f ]; then echo "make_mfcc.sh: no such file $f" exit 1; fi done # note: in general, the double-parenthesis construct in bash "((" is "C-style # syntax" where we can get rid of the $ for variable names, and omit spaces. # The "for" loop in this style is a special construct. split_scps="" for ((n=1; n<=nj; n++)); do split_scps="$split_scps $logdir/wav$n.scp" done split_scp.pl $scp $split_scps || exit 1; submit_jobs.sh "$qcmd" --njobs=$nj --log=$logdir/make_mfcc.TASK_ID.log $sjopts \ compute-mfcc-feats --verbose=2 --config=$config scp:$logdir/wavTASK_ID.scp \ ark,scp:$mfccdir/mfcc_$name.TASK_ID.ark,$mfccdir/mfcc_$name.TASK_ID.scp \ || error_exit "Error producing mfcc features for $name:"`tail $logdir/make_mfcc.*.log` # concatenate the .scp files together. rm $data/feats.scp 2>/dev/null for ((n=1; n<=nj; n++)); do cat $mfccdir/mfcc_$name.$n.scp >> $data/feats.scp done # rm $logdir/wav*.scp echo "Succeeded creating MFCC features for $name" |