Blame view

scripts/rnnlm/ensure_counts_present.sh 666 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
  #!/usr/bin/env bash
  
  # This script makes sure that a <text-dir>, as validated by validate_text_dir.py,
  # has unigram counts present (*.counts).
  
  
  if [ $# != 1 ]; then
    echo "Usage: $0 <text-dir>"
    echo "Makes sure unigram counts (*.counts) are present in <text-dir>,"
    echo "and if not, sets them up."
    exit 1;
  fi
  
  
  dir=$1
  
  all_ok=true
  for f in `ls $dir/*.txt`; do
    counts_file=$(echo $f | sed s/.txt$/.counts/)
    if [ ! -f $counts_file -o $counts_file -ot $f ]; then
      echo "$0: generating counts file for $f" 1>&2
      cat $f | awk '{for(i = 1; i <= NF; i++) {print $i;} print "</s>"}' | \
        sort | uniq -c | awk '{print $2,$1}' > $counts_file
    fi
  done