Blame view

egs/aspire/s5/local/multi_condition/rirs/prep_rvb2014.sh 5.34 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
  #!/bin/bash
  # Copyright 2015  Johns Hopkins University (author: Vijayaditya Peddinti)
  # Apache 2.0
  # This script downloads the impulse responses and noise files from the
  # Reverb2014 challenge
  # and converts them to wav files with the required sampling rate
  #==============================================
  
  download=true
  sampling_rate=8k
  output_bit=16
  DBname=RVB2014
  file_splitter=  #script to generate job scripts given the command file
  
  . ./cmd.sh
  . ./path.sh
  . ./utils/parse_options.sh
  
  if [ $# != 3 ]; then
    echo "Usage: "
    echo "  $0 [options] <rir-home> <output-dir> <log-dir>"
    echo "e.g.:"
    echo " $0  --download true db/RIR_databases/ data/impulses_noises exp/make_reverb/log"
    exit 1;
  fi
  
  RIR_home=$1
  output_dir=$2
  log_dir=$3
  
  if [ "$download" = true ]; then
    mkdir -p $RIR_home
    (cd $RIR_home;
    rm -rf reverb_tools*.tgz
    wget http://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_mcTrainData.tgz || exit 1;
    tar -zxvf reverb_tools_for_Generate_mcTrainData.tgz
    wget http://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_SimData.tgz || exit 1;
    tar -zxvf reverb_tools_for_Generate_SimData.tgz >/dev/null
    )
  fi
  
  Reverb2014_home1=$RIR_home/reverb_tools_for_Generate_mcTrainData
  Reverb2014_home2=$RIR_home/reverb_tools_for_Generate_SimData
  
  # Reverb2014 RIRs and noise
  #--------------------------
  # data is stored as multi-channel wav-files
  
  command_file=$log_dir/${DBname}_read_rir_noise.sh
  echo "">$command_file
  # Simdata for training
  #--------------------
  type_num=1
  data_files=( $(find $Reverb2014_home1/RIR -name '*.wav' -type f -print || exit -1) )
  files_done=0
  total_files=$(echo ${data_files[@]}|wc -w)
  echo "" > $log_dir/${DBname}_type${type_num}.rir.list
  echo "Found $total_files impulse responses in ${Reverb2014_home1}/RIR."
  for data_file in ${data_files[@]}; do
    output_file_name=${DBname}_type${type_num}_`basename $data_file | tr '[:upper:]' '[:lower:]'` 
    echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
    echo ${output_dir}/${output_file_name} >>  $log_dir/${DBname}_type${type_num}.rir.list
    files_done=$((files_done + 1))
  done
  
  data_files=( $(find $Reverb2014_home1/NOISE -name '*.wav' -type f -print || exit -1) )
  files_done=0
  total_files=$(echo ${data_files[@]}|wc -w)
  echo "" > $log_dir/${DBname}_type${type_num}.noise.list
  echo "Found $total_files noises in ${Reverb2014_home1}/NOISE."
  for data_file in ${data_files[@]}; do
    output_file_name=${DBname}_type${type_num}_`basename $data_file| tr '[:upper:]' '[:lower:]'`
    echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
    echo ${output_dir}/${output_file_name} >>  $log_dir/${DBname}_type${type_num}.noise.list
    files_done=$((files_done + 1))
  done
  
  # Simdata for devset
  type_num=$((type_num + 1))
  data_files=( $(find $Reverb2014_home2/RIR -name '*.wav' -type f -print || exit -1) )
  files_done=0
  total_files=$(echo ${data_files[@]}|wc -w)
  echo "" > $log_dir/${DBname}_type${type_num}.rir.list
  echo "Found $total_files impulse responses in ${Reverb2014_home2}/RIR."
  for data_file in ${data_files[@]}; do
    output_file_name=${DBname}_type${type_num}_`basename $data_file| tr '[:upper:]' '[:lower:]'`
    echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
    echo ${output_dir}/${output_file_name} >>  $log_dir/${DBname}_type${type_num}.rir.list
    files_done=$((files_done + 1))
  done
  
  
  data_files=( $(find $Reverb2014_home2/NOISE -name '*.wav' -type f -print || exit -1) )
  files_done=0
  total_files=$(echo ${data_files[@]}|wc -w)
  echo "" > $log_dir/${DBname}_type${type_num}.noise.list
  echo "Found $total_files noises in ${Reverb2014_home2}/NOISE."
  for data_file in ${data_files[@]}; do
    output_file_name=${DBname}_type${type_num}_`basename $data_file | tr '[:upper:]' '[:lower:]'`
    echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
    echo ${output_dir}/${output_file_name} >>  $log_dir/${DBname}_type${type_num}.noise.list
    files_done=$((files_done + 1))
  done
  
  
  
  if [ ! -z "$file_splitter" ]; then
    num_jobs=$($file_splitter $command_file || exit 1)
    job_file=${command_file%.sh}.JOB.sh
    job_log=${command_file%.sh}.JOB.log
  else
    num_jobs=1
    job_file=$command_file
    job_log=${command_file%.sh}.log
  fi
  # execute the commands using the above created array jobs
  time $decode_cmd --max-jobs-run 40 JOB=1:$num_jobs $job_log \
    sh $job_file || exit 1;
  
  # get the Reverb2014 room names to pair the noises and impulse responses 
  for type_num in `seq 1 2`; do
    noise_patterns=( $(ls ${output_dir}/${DBname}_type${type_num}_noise*.wav | xargs -n1 basename | python -c"
  import sys
  for line in sys.stdin:
    name = line.split('${DBname}_type${type_num}_noise_')[1]
    print name.split('_')[0]
    "|sort -u) )
    for noise_pattern in ${noise_patterns[@]}; do
      set_file=$output_dir/info/noise_impulse_${DBname}_$noise_pattern
      echo -n "noise_files =" > ${set_file}
      ls ${output_dir}/${DBname}_type${type_num}_noise*${noise_pattern}*.wav | awk '{ ORS="  "; print;} END{print "
  "}' >> ${set_file}
      echo -n "impulse_files =" >> ${set_file}
      ls ${output_dir}/${DBname}_type${type_num}_rir*${noise_pattern}*.wav | awk '{ ORS="  "; print; } END{print "
  "}' >> ${set_file}
    done
  done