ExtractAudioFromTV.sh 1.03 KB
#!/bin/bash

# File : ExtractAudioFromTV.sh
# Brief : extract audio file and subtitle from TV corpus
# version 1.0
#

### ExtractAudioFromCorpus.sh INA-TV2 MYTV

if [ $(($#-($OPTIND-1))) -ne 2 ]
then
    echo "BAD USAGE : $0 <INPUT_DIRECTORY> <OUTPUT_DIRECTORY>"
    exit 1
fi

if [ -e $1 ]
then
    IN=$(readlink -e $1)
else
    echo "ERROR : Can't read input $1"
    exit 1
fi

if [ -e $2 ]
then
    OUT=$(readlink -e $2)
else
    echo "ERROR : Can't read  OUTPUT $2"
    exit 1
fi


for f in `ls $IN`
do
    channel=`echo $f | sed -re 's/NAS_//'`
    mkdir $OUT/$channel
    cd $f
    for d in `ls $f`
    do
        for file in `ls ${f}/${d} | grep MP4`
        do
            avconv -i ${IN}/${f}/${d}/${file} -threads 4 -vn -f wav -ac 1 -ar 16000 -ab 256000 ${OUT}/${channel}/${d}_$(basename ${file} .MP4).wav
            if [ -e ${IN}/${f}/${d}/$(basename ${file} .MP4).SRT ]
            then
                cp ${IN}/${f}/${d}/$(basename ${file} .MP4).SRT ${OUT}/${channel}/${d}_$(basename ${file} .MP4).SRT
            fi
        done
    done
done