Blame view

egs/gale_mandarin/s5/local/gale_data_prep_audio.sh 1.43 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  #!/bin/bash
  
  # Copyright 2014 QCRI (author: Ahmed Ali)
  # Copyright 2016 Johns Hopkins Univeersity (author: Jan "Yenda" Trmal)
  # Apache 2.0
  
  
  echo $0 "$@"
  
  galeData=$(utils/make_absolute.sh "${@: -1}" );
  wavedir=$galeData/wav
  mkdir -p $wavedir
  
  
  length=$(($#-1))
  args=${@:1:$length}
  
  # check that sox is installed
  which sox  &>/dev/null
  if [[ $? != 0 ]]; then
   echo "$0: sox is not installed"
   exit 1
  fi
  
  set -e -o pipefail
  
  for var in $args; do
    CD=$(basename $var)
    [ -d $wavedir/$CD ] && rm -rf $wavedir/$CD
    mkdir -p $wavedir/$CD
    find $var -type f -name *.wav | while read file; do
      f=$(basename $file)
      if [[ ! -L "$wavedir/$CD/$f" ]]; then
        ln -sf $file $wavedir/$CD/$f
      fi
    done
  
    #make an flac symmlink as well
    find $var -type f -name *.flac  | while read file; do
      f=$(basename $file)
  
      if [[ ! -L "$wavedir/$CD/$f" ]]; then
        ln -sf $file $wavedir/$CD/$f
      fi
    done
  done
  
  #figure out the proper sox command line
  #the flac will be converted on the fly
  (
    for w in `find $wavedir -name *.wav` ; do
      base=`basename $w .wav`
      fullpath=`utils/make_absolute.sh $w`
      echo "$base sox $fullpath -r 16000 -t wav - |"
    done
  
    for w in `find $wavedir -name *.flac` ; do
      base=`basename $w .flac`
      fullpath=`utils/make_absolute.sh $w`
      echo "$base sox $fullpath -r 16000 -t wav - |"
    done
  )  | sort -u > $galeData/wav.scp
  
  #clean
  rm -fr $galeData/id$$ $galeData/wav$$
  echo "$0: data prep audio succeded"
  
  exit 0