Blame view

egs/chime1/s5/local/chime1_prepare_dict.sh 932 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
  #!/bin/bash
  
  # Copyright 2015  University of Sheffield (Author: Ning Ma)
  # Apache 2.0.
  #
  # Kaldi scripts for preparing dictionary for the GRID corpus (or CHiME 1)
  
  echo "Preparing dictionary"
  
  . ./config.sh # Needed for REC_ROOT and WAV_ROOT
  
  # Prepare relevant folders
  dict="$REC_ROOT/data/local/dict"
  mkdir -p $dict
  
  utils="utils"
  
  # Copy lexicon
  lexicon="input/lexicon.txt" # phone models
  cp $lexicon $dict/lexicon.txt
  
  # Generate phone list
  sil="SIL"
  phone_list="$dict/phone.list" 
  awk '{for (n=2;n<=NF;n++)print $n;}' $lexicon | sort -u > $phone_list
  echo $sil >> $phone_list
  
  # Create phone lists 
  grep -v -w $sil $phone_list > $dict/nonsilence_phones.txt
  echo $sil > $dict/silence_phones.txt
  echo $sil > $dict/optional_silence.txt
  
  # list of "extra questions"-- empty; we don't  have things like tone or 
  # word-positions or stress markings.
  touch $dict/extra_questions.txt
  
  echo "-->Dictionary preparation succeeded"
  exit 0