Blame view
Scripts/utils/nnet/copy_feats.sh
1.89 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
#!/bin/bash # Copyright 2012 Karel Vesely # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # This script resaves features to a specified directory, # this is done to have the randomized data stored consecutivly, # which improves the speed and reduces loads on disks. # # To make sure the temporary dir gets deleted upon exit of the calling script # you can use something like: # # trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh; . parse_options.sh || exit 1; if [ $# != 3 ]; then echo "Usage: $0 <input.scp> <tmpdir> <output.scp>" echo " e.g.: $0 train_remote.scp /tmp/324nkjl train_local.scp" exit 1; fi scp_in=$1 tmpdir=$2 scp_out=$3 echo "Re-saving the features to tmpdir $tmpdir @ $(hostname)" #divide the arks per 10k files nj=$((1 + $(cat $scp_in | wc -l) / 10000)) for((n=0; n<nj; n++)); do copy-feats "scp:utils/split_scp.pl -j $nj $n $scp_in - |" ark,scp:$tmpdir/feats.$n.ark,$tmpdir/feats.$n.scp || exit 1 done #assemble the scp file for((n=0; n<nj; n++)); do cat $tmpdir/feats.$n.scp done > $scp_out #test we have all the data l1=$(cat $scp_in | wc -l) l2=$(cat $scp_out | wc -l) [[ "$l1" != "$l2" ]] && echo "ERROR in data re-saving $l1 != $l2" && exit 1; #notify it was copied ok wc -l $scp_in $scp_out echo Copied ok! exit 0 |