Blame view
egs/gp/s1/utils/htk2kaldi_feats.sh
3.14 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
#!/bin/bash -u # Copyright 2012 Arnab Ghoshal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # Converts HTK features to Kaldi format. function error_exit () { echo -e "$@" >&2; exit 1; } function readint () { local retval=${1/#*=/}; # In case --switch=ARG format was used retval=${retval#0*} # Strip any leading 0's [[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \ || error_exit "Argument \"$retval\" not an integer." echo $retval } nj=1 # Default number of jobs qcmd="" # Options for the submit_jobs.sh script sjopts="" # Options for the submit_jobs.sh script PROG=`basename $0`; usage="Usage: $PROG [options] <htk-file-list> <log-dir> <out-dir> <out-list> Options: --help\t\tPrint this message and exit --num-jobs INT\tNumber of parallel jobs to run (default=$nj). --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches. --sjopts STRING\tOptions for the 'submit_jobs.sh' script "; while [ $# -gt 0 ]; do case "${1# *}" in # ${1# *} strips any leading spaces from the arguments --help) echo -e $usage; exit 0 ;; --num-jobs) shift; nj=`readint $1`; [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; shift ;; --qcmd) shift; qcmd="--qcmd=${1}"; shift ;; --sjopts) shift; sjopts="$1"; shift ;; -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; *) break ;; # end of options: interpreted as the data-dir esac done if [ $# != 4 ]; then error_exit $usage; fi [ -f path.sh ] && . ./path.sh htklist=$1 logdir=$2 outdir=$3 outlist=$4 # use "name" as part of name of the archive. name=`basename $htklist` mkdir -p $outdir || error_exit "Cannot create '$outdir'."; mkdir -p $logdir || error_exit "Cannot create '$logdir'."; # note: in general, the double-parenthesis construct in bash "((" is "C-style # syntax" where we can get rid of the $ for variable names, and omit spaces. # The "for" loop in this style is a special construct. split_scps="" for ((n=1; n<=nj; n++)); do split_scps="$split_scps $logdir/htk$n.scp" done split_scp.pl $htklist $split_scps || exit 1; submit_jobs.sh "$qcmd" --njobs=$nj --log=$logdir/htk2kaldi.TASK_ID.log $sjopts \ copy-feats --verbose=2 --htk-in scp:$logdir/htkTASK_ID.scp \ ark,scp:$outdir/kaldi_$name.TASK_ID.ark,$outdir/kaldi_$name.TASK_ID.scp \ || error_exit "Error converting HTK features:"`tail $logdir/htk2kaldi.*.log` # concatenate the .scp files together. rm -f $outlist for ((n=1; n<=nj; n++)); do cat $outdir/kaldi_$name.$n.scp >> $outlist done rm $logdir/htk*.scp echo "Succeeded in copying HTK featurs to Kaldi format." |