Blame view
egs/gp/s5/local/gp_convert_audio.sh
3.34 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
#!/bin/bash -u # Copyright 2012 Arnab Ghoshal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. set -o errexit function read_dirname () { local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`; [ -d "$dir_name" ] || { echo "Argument '$dir_name' not a directory" >&2; \ exit 1; } local retval=`cd $dir_name 2>/dev/null && pwd || $dir_name`; echo $retval } PROG=`basename $0`; usage="Usage: $PROG <arguments> [options] Converts GlobalPhone audio files from shorten to WAV with error checking. (Must have shorten and sox on PATH). Required arguments: --input-list=FILE\tList of shorten-compressed files to process. --output-dir=DIR\tDirectory to write the WAV files to. Options: --output-list=FILE\tWrite list of converted files. --help\t\t\tPrint this help and exit. "; if [ $# -lt 2 ]; then echo -e $usage; exit 1; fi while [ $# -gt 0 ]; do case "$1" in --help) echo -e $usage; exit 0 ;; --input-list=*) INLIST=`expr "X$1" : '[^=]*=\(.*\)'`; [ -f "$INLIST" ] || { echo "Argument '$INLIST' not a file" >&2; exit 1; }; shift ;; --output-dir=*) ODIR=`read_dirname $1`; shift ;; --output-list=*) OLIST=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;; *) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; esac done OLIST=${OLIST:-/dev/null} # Default for output list # Checking for shorten and sox. Since 'errexit' option is set, the script will # terminate if shorten and sox are not found. which shorten > /dev/null which sox > /dev/null tmpdir=$(mktemp -d /tmp/kaldi.XXXX); trap 'rm -rf "$tmpdir"' EXIT mkdir -p $tmpdir/raw $ODIR shnerr=$tmpdir/shnerr; soxerr=$tmpdir/soxerr; nshnerr=0; nsoxerr=0; while read line; do [[ "$line" =~ ^.*/.*\.adc.shn$ ]] || { echo "Bad line: '$line'"; exit 1; } set +e # Don't want script to die if conversion fails. b=`basename $line .adc.shn`; shorten -x $line $tmpdir/raw/${b}.raw; if [ $? -ne 0 ]; then echo "$line" >> $shnerr; let "nshnerr+=1" else sox -t raw -r 16000 -e signed-integer -b 16 $tmpdir/raw/${b}.raw \ -t wav $ODIR/${b}.wav if [ $? -ne 0 ]; then echo "$tmpdir/raw/${b}.raw: exit status = $?" >> $soxerr; let "nsoxerr+=1" else # Just in case there are empty files! Setting the cutoff at 1000 samples, # which, assuming 16KHz sampling, is 0.0625 seconds. nsamples=`soxi -s "$ODIR/${b}.wav"`; if [[ "$nsamples" -gt 1000 ]]; then echo "$ODIR/${b}.wav" >> $OLIST; else echo "$tmpdir/raw/${b}.raw: #samples = $nsamples" >> $soxerr; let "nsoxerr+=1" fi fi fi set -e done < "$INLIST" [[ "$nshnerr" -gt 0 ]] && \ echo "shorten: error converting following $nshnerr file(s):" >&2 [ -f "$shnerr" ] && cat "$shnerr" >&2 [[ "$nsoxerr" -gt 0 ]] && \ echo "sox: error converting following $nsoxerr file(s):" >&2 [ -f "$soxerr" ] && cat "$soxerr" >&2 exit 0; |