Blame view

egs/apiai_decode/s5/local/create-corpus.sh 1021 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  #!/bin/bash
  
  # Checking arguments
  if [ $# -le 1 ]; then
    echo "Use $0 <datadir> test1.wav [test2.wav] ..."
    echo "  $0 data/test-corpus test1.wav test2.wav"
    exit 0;
  fi
  
  CORPUS=$1
  shift
  for file in "$@"; do
    if [[ "$file" != *.wav ]]; then
      echo "Expecting .wav files, got $file"
      exit 1;
    fi
  
    if [ ! -f "$file" ]; then
      echo "$file not found";
      exit 1;
    fi
  done;
  
  
  echo "Initilizing $CORPUS"
  if [ ! -d "$CORPUS" ]; then
    echo "Creating $CORPUS directory"
    mkdir -p "$CORPUS" || ( echo "Unable to create data dir" && exit 1 )
  fi;
  
  wav_scp="$CORPUS/wav.scp"
  spk2utt="$CORPUS/spk2utt"
  utt2spk="$CORPUS/utt2spk"
  text="$CORPUS/text"
  
  #nulling files
  cat </dev/null >$wav_scp
  cat </dev/null >$spk2utt
  cat </dev/null >$utt2spk
  cat </dev/null >$text
  rm $CORPUS/feats.scp 2>/dev/null;
  rm $CORPUS/cmvn.scp  2>/dev/null;
  
  for file in "$@"; do
    id=$(echo $file | sed -e 's/ /_/g')
    echo "$id $file" >>$wav_scp
    echo "$id $id" >>$spk2utt
    echo "$id $id" >>$utt2spk
    echo "$id NO_TRANSRIPTION" >>$text
  done;