prep_rvb2014.sh
5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash
# Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
# Apache 2.0
# This script downloads the impulse responses and noise files from the
# Reverb2014 challenge
# and converts them to wav files with the required sampling rate
#==============================================
download=true
sampling_rate=8k
output_bit=16
DBname=RVB2014
file_splitter= #script to generate job scripts given the command file
. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if [ $# != 3 ]; then
echo "Usage: "
echo " $0 [options] <rir-home> <output-dir> <log-dir>"
echo "e.g.:"
echo " $0 --download true db/RIR_databases/ data/impulses_noises exp/make_reverb/log"
exit 1;
fi
RIR_home=$1
output_dir=$2
log_dir=$3
if [ "$download" = true ]; then
mkdir -p $RIR_home
(cd $RIR_home;
rm -rf reverb_tools*.tgz
wget http://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_mcTrainData.tgz || exit 1;
tar -zxvf reverb_tools_for_Generate_mcTrainData.tgz
wget http://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_SimData.tgz || exit 1;
tar -zxvf reverb_tools_for_Generate_SimData.tgz >/dev/null
)
fi
Reverb2014_home1=$RIR_home/reverb_tools_for_Generate_mcTrainData
Reverb2014_home2=$RIR_home/reverb_tools_for_Generate_SimData
# Reverb2014 RIRs and noise
#--------------------------
# data is stored as multi-channel wav-files
command_file=$log_dir/${DBname}_read_rir_noise.sh
echo "">$command_file
# Simdata for training
#--------------------
type_num=1
data_files=( $(find $Reverb2014_home1/RIR -name '*.wav' -type f -print || exit -1) )
files_done=0
total_files=$(echo ${data_files[@]}|wc -w)
echo "" > $log_dir/${DBname}_type${type_num}.rir.list
echo "Found $total_files impulse responses in ${Reverb2014_home1}/RIR."
for data_file in ${data_files[@]}; do
output_file_name=${DBname}_type${type_num}_`basename $data_file | tr '[:upper:]' '[:lower:]'`
echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
echo ${output_dir}/${output_file_name} >> $log_dir/${DBname}_type${type_num}.rir.list
files_done=$((files_done + 1))
done
data_files=( $(find $Reverb2014_home1/NOISE -name '*.wav' -type f -print || exit -1) )
files_done=0
total_files=$(echo ${data_files[@]}|wc -w)
echo "" > $log_dir/${DBname}_type${type_num}.noise.list
echo "Found $total_files noises in ${Reverb2014_home1}/NOISE."
for data_file in ${data_files[@]}; do
output_file_name=${DBname}_type${type_num}_`basename $data_file| tr '[:upper:]' '[:lower:]'`
echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
echo ${output_dir}/${output_file_name} >> $log_dir/${DBname}_type${type_num}.noise.list
files_done=$((files_done + 1))
done
# Simdata for devset
type_num=$((type_num + 1))
data_files=( $(find $Reverb2014_home2/RIR -name '*.wav' -type f -print || exit -1) )
files_done=0
total_files=$(echo ${data_files[@]}|wc -w)
echo "" > $log_dir/${DBname}_type${type_num}.rir.list
echo "Found $total_files impulse responses in ${Reverb2014_home2}/RIR."
for data_file in ${data_files[@]}; do
output_file_name=${DBname}_type${type_num}_`basename $data_file| tr '[:upper:]' '[:lower:]'`
echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
echo ${output_dir}/${output_file_name} >> $log_dir/${DBname}_type${type_num}.rir.list
files_done=$((files_done + 1))
done
data_files=( $(find $Reverb2014_home2/NOISE -name '*.wav' -type f -print || exit -1) )
files_done=0
total_files=$(echo ${data_files[@]}|wc -w)
echo "" > $log_dir/${DBname}_type${type_num}.noise.list
echo "Found $total_files noises in ${Reverb2014_home2}/NOISE."
for data_file in ${data_files[@]}; do
output_file_name=${DBname}_type${type_num}_`basename $data_file | tr '[:upper:]' '[:lower:]'`
echo "sox -t wav $data_file -t wav -r $sampling_rate -e signed-integer -b $output_bit ${output_dir}/${output_file_name}" >> $command_file
echo ${output_dir}/${output_file_name} >> $log_dir/${DBname}_type${type_num}.noise.list
files_done=$((files_done + 1))
done
if [ ! -z "$file_splitter" ]; then
num_jobs=$($file_splitter $command_file || exit 1)
job_file=${command_file%.sh}.JOB.sh
job_log=${command_file%.sh}.JOB.log
else
num_jobs=1
job_file=$command_file
job_log=${command_file%.sh}.log
fi
# execute the commands using the above created array jobs
time $decode_cmd --max-jobs-run 40 JOB=1:$num_jobs $job_log \
sh $job_file || exit 1;
# get the Reverb2014 room names to pair the noises and impulse responses
for type_num in `seq 1 2`; do
noise_patterns=( $(ls ${output_dir}/${DBname}_type${type_num}_noise*.wav | xargs -n1 basename | python -c"
import sys
for line in sys.stdin:
name = line.split('${DBname}_type${type_num}_noise_')[1]
print name.split('_')[0]
"|sort -u) )
for noise_pattern in ${noise_patterns[@]}; do
set_file=$output_dir/info/noise_impulse_${DBname}_$noise_pattern
echo -n "noise_files =" > ${set_file}
ls ${output_dir}/${DBname}_type${type_num}_noise*${noise_pattern}*.wav | awk '{ ORS=" "; print;} END{print "\n"}' >> ${set_file}
echo -n "impulse_files =" >> ${set_file}
ls ${output_dir}/${DBname}_type${type_num}_rir*${noise_pattern}*.wav | awk '{ ORS=" "; print; } END{print "\n"}' >> ${set_file}
done
done