Generate_mcTrainData_cut.m 7.02 KB
function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
%
% Input variables:
%    WSJ_dir_name: string name of WAV file directory converted from original wsjcam0 SPHERE files
%                  (*Directory structure for wsjcam0 corpus to be kept as it is after obtaining it from LDC. 
%                    Otherwise this script does not work.)
%
% This function generates multi-condition traiing data
% based on the following items:
%  1. wsjcam0 corpus (WAV files)
%  2. room impulse responses (ones under ./RIR/)
%  3. noise (ones under ./NOISE/).
% Generated data has the same directory structure as original wsjcam0 corpus. 
%

if nargin<2
   error('Usage: Generate_mcTrainData(WSJCAM0_data_path, save_dir)  *Note that the input variable WSJCAM0_data_path should indicate the directory name of your clean WSJCAM0 corpus. '); 
end
if exist([WSJ_dir_name,'/data/'])==0
   error(['Could not find wsjcam0 corpus : Please confirm if ',WSJ_dir_name,' is a correct path to your clean WSJCAM0 corpus']); 
end

if ~exist('save_dir', 'var')
    error('You have to set the save_dir variable in the code before running this script!')
end

display(['Name of directory for original wsjcam0: ',WSJ_dir_name])
display(['Name of directory to save generated multi-condition training data: ',save_dir])

% Parameters related to acoustic conditions
SNRdB=20;

% List of WSJ speech data
flist1='etc/audio_si_tr.lst';

%
% List of RIRs
%
num_RIRvar=24;
RIR_sim1='./RIR/RIR_SmallRoom1_near_AnglA.wav'; 
RIR_sim2='./RIR/RIR_SmallRoom1_near_AnglB.wav'; 
RIR_sim3='./RIR/RIR_SmallRoom1_far_AnglA.wav';  
RIR_sim4='./RIR/RIR_SmallRoom1_far_AnglB.wav';  
RIR_sim5='./RIR/RIR_MediumRoom1_near_AnglA.wav';
RIR_sim6='./RIR/RIR_MediumRoom1_near_AnglB.wav';
RIR_sim7='./RIR/RIR_MediumRoom1_far_AnglA.wav'; 
RIR_sim8='./RIR/RIR_MediumRoom1_far_AnglB.wav'; 
RIR_sim9='./RIR/RIR_LargeRoom1_near_AnglA.wav'; 
RIR_sim10='./RIR/RIR_LargeRoom1_near_AnglB.wav';
RIR_sim11='./RIR/RIR_LargeRoom1_far_AnglA.wav'; 
RIR_sim12='./RIR/RIR_LargeRoom1_far_AnglB.wav'; 
RIR_sim13='./RIR/RIR_SmallRoom2_near_AnglA.wav';
RIR_sim14='./RIR/RIR_SmallRoom2_near_AnglB.wav';
RIR_sim15='./RIR/RIR_SmallRoom2_far_AnglA.wav'; 
RIR_sim16='./RIR/RIR_SmallRoom2_far_AnglB.wav'; 
RIR_sim17='./RIR/RIR_MediumRoom2_near_AnglA.wav';
RIR_sim18='./RIR/RIR_MediumRoom2_near_AnglB.wav';
RIR_sim19='./RIR/RIR_MediumRoom2_far_AnglA.wav'; 
RIR_sim20='./RIR/RIR_MediumRoom2_far_AnglB.wav'; 
RIR_sim21='./RIR/RIR_LargeRoom2_near_AnglA.wav'; 
RIR_sim22='./RIR/RIR_LargeRoom2_near_AnglB.wav'; 
RIR_sim23='./RIR/RIR_LargeRoom2_far_AnglA.wav';  
RIR_sim24='./RIR/RIR_LargeRoom2_far_AnglB.wav';  

%
% List of noise
% 
num_NOISEvar=6;
noise_sim1='./NOISE/Noise_SmallRoom1';
noise_sim2='./NOISE/Noise_MediumRoom1';
noise_sim3='./NOISE/Noise_LargeRoom1';
noise_sim4='./NOISE/Noise_SmallRoom2';
noise_sim5='./NOISE/Noise_MediumRoom2';
noise_sim6='./NOISE/Noise_LargeRoom2';

%
% Start generating noisy reverberant data with creating new directories
%

fcount=1;
rcount=1;
ncount=1;

if save_dir(end)=='/';
    save_dir_tr=[save_dir,'data/mc_train/'];
else
    save_dir_tr=[save_dir,'/data/mc_train/'];
end
mkdir([save_dir_tr]);

mic_idx=['A';'B';'C';'D';'E';'F';'G';'H'];
prev_fname='dummy';

for nlist=1:1
    % Open file list
    eval(['fid=fopen(flist',num2str(nlist),',''r'');']);

    while 1
        
        % Set data file name
        fname=fgetl(fid);
        if ~ischar(fname);
            break;
        end
        
        idx1=find(fname=='/');  
        
        % Make directory if there isn't any
        if ~strcmp(prev_fname,fname(1:idx1(end)))
            mkdir([save_dir_tr fname(1:idx1(end))])
        end
        prev_fname=fname(1:idx1(end));
       
        % load speech signal
        x=audioread([WSJ_dir_name, '/data/', fname, '.wav'])';
        
        % load RIR and noise for "THIS" utterance
        eval(['RIR=audioread(RIR_sim',num2str(rcount),');']);
        eval(['NOISE=audioread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);

        % Generate 8ch noisy reverberant data        
        y=gen_obs(x,RIR,NOISE,SNRdB);

        % cut to length of original signal
        y = y(1:size(x,2),:);
        
        % rotine to cyclicly switch RIRs and noise, utterance by utterance 
        rcount=rcount+1;
        if rcount>num_RIRvar;rcount=1;ncount=ncount+1;end
        if ncount>10;ncount=1;end

        % save the data

        y=y/4; % common normalization to all the data to prevent clipping
               % denominator was decided experimentally

        for ch=1:8
	    outfilename = [save_dir_tr, fname, '_ch', num2str(ch), '.wav'];
            eval(['audiowrite(outfilename, y(:,',num2str(ch),'), 16000);']);
        end
           
        display(['sentence ',num2str(fcount),' (out of 7861) finished! (Multi-condition training data)'])
        fcount=fcount+1;

    end
end


%%%%
function [y]=gen_obs(x,RIR,NOISE,SNRdB)
% function to generate noisy reverberant data

x=x';

% calculate direct+early reflection signal for calculating SNR
[val,delay]=max(RIR(:,1));
before_impulse=floor(16000*0.001);
after_impulse=floor(16000*0.05);
RIR_direct=RIR(delay-before_impulse:delay+after_impulse,1);
direct_signal=fconv(x,RIR_direct);

% obtain reverberant speech
for ch=1:8
    rev_y(:,ch)=fconv(x,RIR(:,ch));
end

% normalize noise data according to the prefixed SNR value
NOISE=NOISE(1:size(rev_y,1),:);
NOISE_ref=NOISE(:,1);

iPn = diag(1./mean(NOISE_ref.^2,1));
Px = diag(mean(direct_signal.^2,1));
Msnr = sqrt(10^(-SNRdB/10)*iPn*Px);
scaled_NOISE = NOISE*Msnr;
y = rev_y + scaled_NOISE;
y = y(delay:end,:);


%%%%
function [y]=fconv(x, h)
%FCONV Fast Convolution
%   [y] = FCONV(x, h) convolves x and h, and normalizes the output  
%         to +-1.
%
%      x = input vector
%      h = input vector
% 
%      See also CONV
%
%   NOTES:
%
%   1) I have a short article explaining what a convolution is.  It
%      is available at http://stevem.us/fconv.html.
%
%
%Version 1.0
%Coded by: Stephen G. McGovern, 2003-2004.
%
%Copyright (c) 2003, Stephen McGovern
%All rights reserved.
%
%THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
%AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
%IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
%ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
%LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
%CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
%SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
%INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
%CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
%ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
%POSSIBILITY OF SUCH DAMAGE.

Ly=length(x)+length(h)-1;  % 
Ly2=pow2(nextpow2(Ly));    % Find smallest power of 2 that is > Ly
X=fft(x, Ly2);		   % Fast Fourier transform
H=fft(h, Ly2);	           % Fast Fourier transform
Y=X.*H;        	           % 
y=real(ifft(Y, Ly2));      % Inverse fast Fourier transform
y=y(1:1:Ly);               % Take just the first N elements