Blame view

egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh 1.33 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  #! /bin/bash
  
  # Copyright 2016-2018  Vimal Manohar
  # Apache 2.0
  
  # This scripts converts a data directory into a "whole" data directory
  # by removing the segments and using the recordings themselves as 
  # utterances
  
  set -o pipefail
  
  . ./path.sh
  
  . utils/parse_options.sh
  
  if [ $# -ne 2 ]; then
    echo "Usage: convert_data_dir_to_whole.sh <in-data> <out-data>"
    echo " e.g.: convert_data_dir_to_whole.sh data/dev data/dev_whole"
    exit 1
  fi
  
  data=$1
  dir=$2
  
  if [ ! -f $data/segments ]; then
    echo "$0: Data directory already does not contain segments. So just copying it."
    utils/copy_data_dir.sh $data $dir
    exit 0
  fi
  
  mkdir -p $dir
  cp $data/wav.scp $dir
  if [ -f $data/reco2file_and_channel ]; then 
    cp $data/reco2file_and_channel $dir; 
  fi
  
  mkdir -p $dir/.backup
  if [ -f $dir/feats.scp ]; then
    mv $dir/feats.scp $dir/.backup
  fi
  if [ -f $dir/cmvn.scp ]; then
    mv $dir/cmvn.scp $dir/.backup
  fi
  if [ -f $dir/utt2spk ]; then
    mv $dir/utt2spk $dir/.backup
  fi
  
  [ -f $data/stm ] && cp $data/stm $dir
  [ -f $data/glm ] && cp $data/glm $dir
  
  utils/data/internal/combine_segments_to_recording.py \
    --write-reco2utt=$dir/reco2sorted_utts $data/segments $dir/utt2spk || exit 1
  
  if [ -f $data/text ]; then
    utils/apply_map.pl -f 2- $data/text < $dir/reco2sorted_utts > $dir/text || exit 1
  fi
  
  rm $dir/reco2sorted_utts
  
  utils/fix_data_dir.sh $dir || exit 1
  
  exit 0