gp_convert_audio.sh
3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/bin/bash -u
# Copyright 2012 Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
set -o errexit
function read_dirname () {
local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
[ -d "$dir_name" ] || { echo "Argument '$dir_name' not a directory" >&2; \
exit 1; }
local retval=`cd $dir_name 2>/dev/null && pwd || $dir_name`;
echo $retval
}
PROG=`basename $0`;
usage="Usage: $PROG <arguments> [options]\n
Converts GlobalPhone audio files from shorten to WAV with error checking.\n
(Must have shorten and sox on PATH).\n\n
Required arguments:\n
--input-list=FILE\tList of shorten-compressed files to process.\n
--output-dir=DIR\tDirectory to write the WAV files to.\n
Options:\n
--output-list=FILE\tWrite list of converted files.\n
--help\t\t\tPrint this help and exit.\n
";
if [ $# -lt 2 ]; then
echo -e $usage; exit 1;
fi
while [ $# -gt 0 ];
do
case "$1" in
--help) echo -e $usage; exit 0 ;;
--input-list=*)
INLIST=`expr "X$1" : '[^=]*=\(.*\)'`;
[ -f "$INLIST" ] || { echo "Argument '$INLIST' not a file" >&2; exit 1; };
shift ;;
--output-dir=*)
ODIR=`read_dirname $1`; shift ;;
--output-list=*)
OLIST=`expr "X$1" : '[^=]*=\(.*\)'`; shift ;;
*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;;
esac
done
OLIST=${OLIST:-/dev/null} # Default for output list
# Checking for shorten and sox. Since 'errexit' option is set, the script will
# terminate if shorten and sox are not found.
which shorten > /dev/null
which sox > /dev/null
tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
trap 'rm -rf "$tmpdir"' EXIT
mkdir -p $tmpdir/raw $ODIR
shnerr=$tmpdir/shnerr;
soxerr=$tmpdir/soxerr;
nshnerr=0;
nsoxerr=0;
while read line; do
[[ "$line" =~ ^.*/.*\.adc.shn$ ]] || { echo "Bad line: '$line'"; exit 1; }
set +e # Don't want script to die if conversion fails.
b=`basename $line .adc.shn`;
shorten -x $line $tmpdir/raw/${b}.raw;
if [ $? -ne 0 ]; then
echo "$line" >> $shnerr;
let "nshnerr+=1"
else
sox -t raw -r 16000 -e signed-integer -b 16 $tmpdir/raw/${b}.raw \
-t wav $ODIR/${b}.wav
if [ $? -ne 0 ]; then
echo "$tmpdir/raw/${b}.raw: exit status = $?" >> $soxerr;
let "nsoxerr+=1"
else
# Just in case there are empty files! Setting the cutoff at 1000 samples,
# which, assuming 16KHz sampling, is 0.0625 seconds.
nsamples=`soxi -s "$ODIR/${b}.wav"`;
if [[ "$nsamples" -gt 1000 ]]; then
echo "$ODIR/${b}.wav" >> $OLIST;
else
echo "$tmpdir/raw/${b}.raw: #samples = $nsamples" >> $soxerr;
let "nsoxerr+=1"
fi
fi
fi
set -e
done < "$INLIST"
[[ "$nshnerr" -gt 0 ]] && \
echo "shorten: error converting following $nshnerr file(s):" >&2
[ -f "$shnerr" ] && cat "$shnerr" >&2
[[ "$nsoxerr" -gt 0 ]] && \
echo "sox: error converting following $nsoxerr file(s):" >&2
[ -f "$soxerr" ] && cat "$soxerr" >&2
exit 0;