Blame view

src/fstext/determinize-star.h 5.04 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  // fstext/determinize-star.h
  
  // Copyright 2009-2011  Microsoft Corporation
  //                2014  Guoguo Chen
  //                2015  Hainan Xu
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  
  #ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
  #define KALDI_FSTEXT_DETERMINIZE_STAR_H_
  #include <fst/fstlib.h>
  #include <fst/fst-decl.h>
  #include <algorithm>
  #include <map>
  #include <set>
  #include <vector>
  #include <stdexcept> // this algorithm uses exceptions
  
  namespace fst {
  
  /// \addtogroup fst_extensions
  ///  @{
  
  
  // For example of usage, see test-determinize-star.cc
  
  /*
     DeterminizeStar implements determinization with epsilon removal, which we
     distinguish with a star.
  
     We define a determinized* FST as one in which no state has more than one
     transition with the same input-label.  Epsilon input labels are not allowed
     except starting from states that have exactly one arc exiting them (and are
     not final).  [In the normal definition of determinized, epsilon-input labels
     are not allowed at all, whereas in Mohri's definition, epsilons are treated
     as ordinary symbols].  The determinized* definition is intended to simulate
     the effect of allowing strings of output symbols at each state.
  
     The algorithm implemented here takes an Fst<Arc>, and a pointer to a
     MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
     float-weight.  It does epsilon removal and determinization.
     This algorithm may fail if the input has epsilon cycles under
     certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
     semiring, or there are negative cost epsilon cycles).
  
     This implementation is much less fancy than the one in fst/determinize.h, and
     does not have an "on-demand" version.
  
     The algorithm is a fairly normal determinization algorithm.  We keep in
     memory the subsets of states, together with their leftover strings and their
     weights.  The only difference is we detect input epsilon transitions and
     treat them "specially".
  */
  
  
  // This algorithm will be slightly faster if you sort the input fst on input label.
  
  /**
      This function implements the normal version of DeterminizeStar, in which the
      output strings are represented using sequences of arcs, where all but the
      first one has an epsilon on the input side.  The debug_ptr argument is an
      optional pointer to a bool that, if it becomes true while the algorithm is
      executing, the algorithm will print a traceback and terminate (used in
      fstdeterminizestar.cc debug non-terminating determinization).
      If max_states is positive, it will stop determinization and throw an
      exception as soon as the max-states is reached. This can be useful in test.
      If allow_partial is true, the algorithm will output partial results when the
      specified max_states is reached (when larger than zero), instead of throwing
      out an error.
  
      Caution, the return status is un-intuitive: this function will return false if
      determinization completed normally, and true if it was stopped early by
      reaching the 'max-states' limit, and a partial FST was generated.
  */
  template<class F>
  bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,
                       float delta = kDelta,
                       bool *debug_ptr = NULL,
                       int max_states = -1,
                       bool allow_partial = false);
  
  
  
  /*  This is a version of DeterminizeStar with a slightly more "natural" output format,
      where the output sequences are encoded using the GallicArc (i.e. the output symbols
      are strings.
      If max_states is positive, it will stop determinization and throw an
      exception as soon as the max-states is reached.  This can be useful in test.
      If allow_partial is true, the algorithm will output partial results when the
      specified max_states is reached (when larger than zero), instead of throwing
      out an error.
  
      Caution, the return status is un-intuitive: this function will return false if
      determinization completed normally, and true if it was stopped early by
      reaching the 'max-states' limit, and a partial FST was generated.
  */
  template<class F>
  bool DeterminizeStar(F &ifst, MutableFst<GallicArc<typename F::Arc> > *ofst,
                       float delta = kDelta, bool *debug_ptr = NULL,
                       int max_states = -1,
                       bool allow_partial = false);
  
  
  /// @} end "addtogroup fst_extensions"
  
  } // end namespace fst
  
  #include "fstext/determinize-star-inl.h"
  
  #endif