run-5-test-bnf.sh 5.83 KB
#!/bin/bash 
set -e
set -o pipefail

. conf/common_vars.sh || exit 1;
. ./lang.conf || exit 1;


type=dev10h
data_only=false
fast_path=true
skip_kws=false
skip_stt=false
tmpdir=`pwd`

. utils/parse_options.sh

if [ $# -ne 0 ]; then
  echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)"
  exit 1
fi

if [[ "$type" != "dev10h" && "$type" != "dev2h" && "$type" != "eval" && "$type" != "shadow" ]] ; then
  echo "Warning: invalid variable type=${type}, valid values are dev10h|dev2h|eval"
  echo "Hope you know what your ar doing!"
fi


datadir=data/app_${type}.uem
dirid=${type}.uem

[ ! -d data/${dirid} ] && echo "No such directory data/${dirid}" && exit 1;
[ ! -d exp/tri5/decode_${dirid} ] && echo "No such directory exp/tri5/decode_${dirid}" && exit 1;

# Set my_nj; typically 64.
my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1;



if [ ! -f data/bnf_${dirid}/.done ]; then
  [ ! -d data/bnf_${dirid} ] && \
    mkdir -p $tmpdir/data/bnf_${dirid} && \
    ln -s $tmpdir/data/bnf_${dirid} data/bnf_${dirid}

  steps_BNF/make_bnf_feat.sh --nj $my_nj --cmd "$decode_cmd" \
    --transform_dir exp/tri5/decode_${dirid}/ \
    data/${dirid} data/bnf_${dirid} exp_BNF/bnf_dnn \
    exp/tri5_ali exp_BNF/make_bnf_${dirid}

  touch data/bnf_${dirid}/.done
fi

if [ ! -f data/sat_${dirid}/.done ]; then
  [ ! -d data/sat_${dirid} ] && \
    mkdir -p $tmpdir/data/sat_${dirid} && \
    ln -s $tmpdir/data/sat_${dirid} data/sat_${dirid}

  steps/make_fmllr_feats.sh --cmd "$decode_cmd -tc 10" --nj $my_nj \
    --transform-dir exp/tri5/decode_${dirid}  \
    data/sat_${dirid} data/${dirid} exp/tri5 \
    exp_BNF/make_fmllr_feats_${dirid}/log plp_processed

  touch data/sat_${dirid}/.done
fi

if [ ! -f $datadir/.done ]; then
  [ ! -d ${datadir} ] && \
    mkdir -p $tmpdir/${datadir} && \
    ln -s $tmpdir/${datadir} ${datadir}

  steps/append_feats.sh --cmd "$decode_cmd" --nj 4 \
    data/bnf_${dirid} data/sat_${dirid} ${datadir} \
    exp_BNF/append_feats/log plp_processed/

  steps/compute_cmvn_stats.sh --fake \
    ${datadir} exp/make_plp/app_${dirid} plp_processed

  ln -s `pwd`/data/${dirid}/kws ${datadir}/kws
  touch ${datadir}/.done
fi

if $data_only ; then
  echo "Exiting, as data-only was requested..."
  exit 0;
fi


####################################################################
##
## FMLLR decoding 
##
####################################################################
decode=exp_BNF/tri6/decode_${dirid}
if [ ! -f ${decode}/.done ]; then
  echo ---------------------------------------------------------------------
  echo "Spawning decoding with SAT models  on" `date`
  echo ---------------------------------------------------------------------
  utils/mkgraph.sh \
    data/lang exp_BNF/tri6 exp_BNF/tri6/graph |tee exp_BNF/tri6/mkgraph.log

  mkdir -p $decode
  #By default, we do not care about the lattices for this step -- we just want the transforms
  #Therefore, we will reduce the beam sizes, to reduce the decoding times
  steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\
    --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\
    exp_BNF/tri6/graph ${datadir} ${decode} |tee ${decode}/decode.log
  touch ${decode}/.done
fi

if ! $fast_path ; then
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip $wip \
    "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
    ${datadir} data/lang ${decode}

  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip $wip \
    "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
    ${datadir} data/lang  ${decode}.si
fi

####################################################################
## SGMM2 decoding 
####################################################################
decode=exp_BNF/sgmm7/decode_fmllr_${dirid}
if [ ! -f $decode/.done ]; then
  echo ---------------------------------------------------------------------
  echo "Spawning $decode on" `date`
  echo ---------------------------------------------------------------------
  utils/mkgraph.sh \
    data/lang exp_BNF/sgmm7 exp_BNF/sgmm7/graph |tee exp_BNF/sgmm7/mkgraph.log

  mkdir -p $decode
  steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \
    --cmd "$decode_cmd" --transform-dir exp_BNF/tri6/decode_${dirid} "${decode_extra_opts[@]}"\
    exp_BNF/sgmm7/graph ${datadir} $decode |tee $decode/decode.log
  touch $decode/.done
fi

if ! $fast_path ; then
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip 0.5 \
    "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
    ${datadir} data/lang  exp/sgmm5/decode_fmllr_${dirid}
fi

####################################################################
##
## SGMM_MMI rescoring
##
####################################################################

for iter in 1 2 3 4; do
  # Decode SGMM+MMI (via rescoring).
  decode=exp_BNF/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter
  if [ ! -f $decode/.done ]; then

    mkdir -p $decode
    steps/decode_sgmm2_rescore.sh  --skip-scoring true \
      --cmd "$decode_cmd" --iter $iter --transform-dir exp_BNF/tri6/decode_${dirid} \
      data/lang ${datadir} exp_BNF/sgmm7/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log

    touch $decode/.done
  fi
done

#We are done -- all lattices has been generated. We have to
#a)Run MBR decoding
#b)Run KW search
for iter in 1 2 3 4; do
  # Decode SGMM+MMI (via rescoring).
  decode=exp_BNF/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter
  local/run_kws_stt_task.sh --cer $cer --max-states $max_states \
    --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --wip 0.5 \
    "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \
    ${datadir} data/lang $decode
done


echo "Everything looking good...." 
exit 0