Blame view
egs/apiai_decode/s5/recognize-wav.sh
2.24 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#!/bin/bash # Copyright 2016 Api.ai (Author: Ilya Platonov) # Apache 2.0 # This script demonstrates kaldi decoding using pretrained model. It will decode list of wav files. # # IMPORTANT: wav files must be in 16kHz, 16 bit little-endian format. # # This script tries to follow with what other scripts are doing in terms of directory structures and data handling. # # Use ./download-model.sh script to download asr model # See https://github.com/api-ai/api-ai-english-asr-model for details about a model and how to use it. . ./path.sh MODEL_DIR="exp/api.ai-model" DATA_DIR="data/test-corpus" echo "///////" echo "// IMPORTANT: wav files must be in 16kHz, 16 bit little-endian format." echo "//////"; for file in final.mdl HCLG.fst words.txt frame_subsampling_factor; do if [ ! -f $MODEL_DIR/$file ]; then echo "$MODEL_DIR/$file not found, use ./download-model.sh" exit 1; fi done; for app in nnet3-latgen-faster apply-cmvn lattice-scale; do command -v $app >/dev/null 2>&1 || { echo >&2 "$app not found, is kaldi compiled?"; exit 1; } done; local/create-corpus.sh $DATA_DIR $@ || exit 1; echo "///////" echo "// Computing mfcc and cmvn (cmvn is not really used)" echo "//////"; steps/make_mfcc.sh --nj 1 --mfcc-config $MODEL_DIR/mfcc.conf \ --cmd "run.pl" $DATA_DIR exp/make_mfcc exp/mfcc || { echo "Unable to calculate mfcc, ensure 16kHz, 16 bit little-endian wav format or see log"; exit 1; }; steps/compute_cmvn_stats.sh $DATA_DIR exp/make_mfcc/ exp/mfcc || exit 1; echo "///////" echo "// Doing decoding (see log for results)" echo "//////"; frame_subsampling_factor=$(cat $MODEL_DIR/frame_subsampling_factor) nnet3-latgen-faster --frame-subsampling-factor=$frame_subsampling_factor --frames-per-chunk=50 --extra-left-context=0 \ --extra-right-context=0 --extra-left-context-initial=-1 --extra-right-context-final=-1 \ --minimize=false --max-active=7000 --min-active=200 --beam=15.0 --lattice-beam=8.0 \ --acoustic-scale=1.0 --allow-partial=true \ --word-symbol-table=$MODEL_DIR/words.txt $MODEL_DIR/final.mdl $MODEL_DIR//HCLG.fst \ "ark,s,cs:apply-cmvn --norm-means=false --norm-vars=false --utt2spk=ark:$DATA_DIR/utt2spk scp:$DATA_DIR/cmvn.scp scp:$DATA_DIR/feats.scp ark:- |" \ "ark:|lattice-scale --acoustic-scale=10.0 ark:- ark:- >exp/lat.1" |