Blame view

egs/bn_music_speech/v1/run.sh 3.63 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  #!/bin/bash
  # Copyright 2015   David Snyder
  # Apache 2.0.
  #
  # This example demonstrates music/speech discrimination. This recipe trains
  # three GMMs on the music, speech and noise portions of the MUSAN corpus.
  # We test the systems on Broadcast News. The Broadcast News test data consists
  # of short segments of either speech or music. The classification decisions
  # are made at a segment level from the average likelihoods of two GMMs.
  # Results (EERs) are inline in comments below.
  #
  # See README.txt for more info on data required.
  
  . ./cmd.sh
  . ./path.sh
  set -e
  mfccdir=`pwd`/mfcc
  vaddir=`pwd`/mfcc
  
  local/make_bn.sh /export/corpora5/LDC/LDC97S44 \
                   /export/corpora/LDC/LDC97T22 data
  
  steps/data/make_musan.sh --sampling-rate 16000 /export/corpora/JHU/musan data
  
  steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 30 --cmd "$train_cmd" \
      data/musan_speech exp/make_mfcc $mfccdir
  steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 30 --cmd "$train_cmd" \
      data/musan_music exp/make_mfcc $mfccdir
  steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 5 --cmd "$train_cmd" \
      data/musan_noise exp/make_mfcc $mfccdir
  steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 30 --cmd "$train_cmd" \
      data/bn exp/make_mfcc $mfccdir
  
  utils/fix_data_dir.sh data/musan_speech
  utils/fix_data_dir.sh data/musan_music
  utils/fix_data_dir.sh data/musan_noise
  utils/fix_data_dir.sh data/bn
  
  sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
      data/musan_speech exp/make_vad $vaddir
  sid/compute_vad_decision.sh --nj 5 --cmd "$train_cmd" \
      data/musan_noise exp/make_vad $vaddir
  sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
      data/musan_music exp/make_vad $vaddir
  sid/compute_vad_decision.sh --nj 20 --cmd "$train_cmd" \
      data/bn exp/make_vad $vaddir
  
  sid/train_diag_ubm.sh --nj 10 --cmd "$train_cmd" --delta-window 2 \
      data/musan_noise 32 exp/diag_ubm_noise &
  sid/train_diag_ubm.sh --nj 20 --cmd "$train_cmd" --delta-window 2 \
      data/musan_speech 32 exp/diag_ubm_speech &
  sid/train_diag_ubm.sh --nj 20 --cmd "$train_cmd" --delta-window 2 \
      data/musan_music 32  exp/diag_ubm_music
  wait;
  
  sid/train_full_ubm.sh --nj 20 --cmd "$train_cmd" \
      --remove-low-count-gaussians false data/musan_noise \
      exp/diag_ubm_noise exp/full_ubm_noise &
  sid/train_full_ubm.sh --nj 20 --cmd "$train_cmd" \
      --remove-low-count-gaussians false data/musan_speech \
      exp/diag_ubm_speech exp/full_ubm_speech &
  sid/train_full_ubm.sh --nj 20 --cmd "$train_cmd" \
      --remove-low-count-gaussians false data/musan_music \
      exp/diag_ubm_music exp/full_ubm_music
  wait;
  
  sid/music_id.sh --cmd "$train_cmd" --nj 40 \
    exp/full_ubm_music exp/full_ubm_speech \
    data/bn exp/bn_music_speech
  sid/music_id.sh --cmd "$train_cmd" --nj 40 \
    exp/full_ubm_noise exp/full_ubm_speech \
    data/bn exp/bn_noise_speech
  
  printf "EER using GMMs trained on music and speech"
  compute-eer <(local/print_scores.py exp/bn_music_speech/ratio)
  # Equal error rate is 0.344234%, at threshold 0.525752
  printf "
  EER using GMM trained on noise instead of music"
  compute-eer <(local/print_scores.py exp/bn_noise_speech/ratio)
  # Equal error rate is 0.860585%, at threshold 0.123218
  
  # The following script replaces the VAD decisions originally computed by
  # the energy-based VAD.  It uses the GMMs trained earlier in the script
  # to make frame-level decisions. Due to the mapping provided in
  # conf/merge_vad_map.txt, "0" corresponds to silence, "1" to speech, and
  # "2" to music.
  sid/compute_vad_decision_gmm.sh --nj 40 --cmd "$train_cmd" \
    --merge-map-config conf/merge_vad_map.txt --use-energy-vad true \
    data/bn exp/full_ubm_noise exp/full_ubm_speech/ \
    exp/full_ubm_music/ exp/vad_gmm exp/vad_gmm/