Blame view

egs/gp/s5/local/gp_convert_audio.sh 3.34 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  #!/bin/bash -u
  
  # Copyright 2012  Arnab Ghoshal
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  set -o errexit
  
  function read_dirname () {
    local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
    [ -d "$dir_name" ] || { echo "Argument '$dir_name' not a directory" >&2; \
      exit 1; }
    local retval=`cd $dir_name 2>/dev/null && pwd || $dir_name`;
    echo $retval
  }
  
  PROG=`basename $0`;
  usage="Usage: $PROG <arguments> [options]
  
  Converts GlobalPhone audio files from shorten to WAV with error checking.
  
  (Must have shorten and sox on PATH).
  
  
  Required arguments:
  
    --input-list=FILE\tList of shorten-compressed files to process.
  
    --output-dir=DIR\tDirectory to write the WAV files to.
  
  Options:
  
    --output-list=FILE\tWrite list of converted files.
  
    --help\t\t\tPrint this help and exit.
  
  ";
  
  if [ $# -lt 2 ]; then
    echo -e $usage; exit 1;
  fi
  
  while [ $# -gt 0 ];
  do
    case "$1" in
    --help) echo -e $usage; exit 0 ;;
    --input-list=*)
    INLIST=`expr "X$1" : '[^=]*=\(.*\)'`; 
    [ -f "$INLIST" ] || { echo "Argument '$INLIST' not a file" >&2; exit 1; }; 
    shift ;;
    --output-dir=*) 
    ODIR=`read_dirname $1`; shift ;;
    --output-list=*)
    OLIST=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
    *)  echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
    esac
  done
  OLIST=${OLIST:-/dev/null}  # Default for output list
  
  # Checking for shorten and sox. Since 'errexit' option is set, the script will
  # terminate if shorten and sox are not found.
  which shorten > /dev/null
  which sox > /dev/null
  
  tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
  trap 'rm -rf "$tmpdir"' EXIT
  
  mkdir -p $tmpdir/raw $ODIR
  shnerr=$tmpdir/shnerr;
  soxerr=$tmpdir/soxerr;
  nshnerr=0;
  nsoxerr=0;
  
  while read line; do
    [[ "$line" =~ ^.*/.*\.adc.shn$ ]] || { echo "Bad line: '$line'"; exit 1; }
    set +e  # Don't want script to die if conversion fails.
    b=`basename $line .adc.shn`; 
    shorten -x $line $tmpdir/raw/${b}.raw;
    if [ $? -ne 0 ]; then
      echo "$line" >> $shnerr;
      let "nshnerr+=1"
    else
      sox -t raw -r 16000 -e signed-integer -b 16 $tmpdir/raw/${b}.raw \
        -t wav $ODIR/${b}.wav
      if [ $? -ne 0 ]; then
        echo "$tmpdir/raw/${b}.raw: exit status = $?" >> $soxerr;
        let "nsoxerr+=1"
      else
        # Just in case there are empty files! Setting the cutoff at 1000 samples,
        # which, assuming 16KHz sampling, is 0.0625 seconds.
        nsamples=`soxi -s "$ODIR/${b}.wav"`;
        if [[ "$nsamples" -gt 1000 ]]; then 
  	echo "$ODIR/${b}.wav" >> $OLIST;
        else
  	echo "$tmpdir/raw/${b}.raw: #samples = $nsamples" >> $soxerr;
  	let "nsoxerr+=1"
        fi
      fi
    fi
    set -e
  done < "$INLIST"
  
  [[ "$nshnerr" -gt 0 ]] && \
    echo "shorten: error converting following $nshnerr file(s):" >&2
  [ -f "$shnerr" ] && cat "$shnerr" >&2
  [[ "$nsoxerr" -gt 0 ]] && \
    echo "sox: error converting following $nsoxerr file(s):" >&2
  [ -f "$soxerr" ] && cat "$soxerr" >&2
  
  exit 0;