run_autoencoder.sh 2.72 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89


#!/bin/bash

# this is an example to show a "tdnn" system in raw nnet configuration
# i.e. without a transition model
# It uses corrupted (reverberation + noise) speech as input and clean speech 
# as output.

. ./cmd.sh

stage=0
affix=
train_stage=-10
common_egs_dir=
egs_opts=
num_data_reps=10

remove_egs=true

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh


if ! cuda-compiled; then
  cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi

dir=exp/nnet3/tdnn_raw
dir=$dir${affix:+_$affix}

clean_data_dir=data/train
data_dir=data/train_rvb
targets_scp=$dir/targets.scp

mkdir -p $dir

# Create copies of clean feats with prefix "rev$x_" to match utterance names of
# the noisy feats
for x in `seq 1 $num_data_reps`; do
  awk -v x=$x '{print "rev"x"_"$0}' $clean_data_dir/feats.scp | sort -k1,1 > $targets_scp
done

if [ $stage -le 9 ]; then
  echo "$0: creating neural net configs";
  num_targets=`feat-to-dim scp:$targets_scp - 2>/dev/null` || exit 1
  feat_dim=`feat-to-dim scp:$data_dir/feats.scp - 2>/dev/null` || exit 1

  mkdir -p $dir/configs
  cat <<EOF > $dir/configs/network.xconfig
  input dim=$feat_dim name=input

  relu-renorm-layer name=tdnn1 dim=1024 input=Append(-2,-1,0,1,2)
  relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,2)
  relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,3)
  relu-renorm-layer name=tdnn4 dim=1024 input=Append(-7,2)
  relu-renorm-layer name=tdnn5 dim=1024
  output-layer name=output dim=$num_targets max-change=1.5 objective-type=quadratic include-log-softmax=false
EOF
  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi

if [ $stage -le 10 ]; then
  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
    utils/create_split_dir.pl \
     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
  fi

  steps/nnet3/train_raw_dnn.py --stage=$train_stage \
    --cmd="$decode_cmd" \
    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
    --trainer.num-epochs 2 \
    --trainer.optimization.num-jobs-initial 3 \
    --trainer.optimization.num-jobs-final 16 \
    --trainer.optimization.initial-effective-lrate 0.0017 \
    --trainer.optimization.final-effective-lrate 0.00017 \
    --trainer.optimization.minibatch-size 512 \
    --egs.dir "$common_egs_dir" --egs.opts "$egs_opts" \
    --cleanup.remove-egs $remove_egs \
    --cleanup.preserve-model-interval 50 \
    --nj=30 \
    --use-dense-targets=true \
    --feat-dir=${data_dir} \
    --targets-scp=$targets_scp \
    --dir=$dir || exit 1;
fi