determinize-star.h
5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// fstext/determinize-star.h
// Copyright 2009-2011 Microsoft Corporation
// 2014 Guoguo Chen
// 2015 Hainan Xu
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
#include <fst/fstlib.h>
#include <fst/fst-decl.h>
#include <algorithm>
#include <map>
#include <set>
#include <vector>
#include <stdexcept> // this algorithm uses exceptions
namespace fst {
/// \addtogroup fst_extensions
/// @{
// For example of usage, see test-determinize-star.cc
/*
DeterminizeStar implements determinization with epsilon removal, which we
distinguish with a star.
We define a determinized* FST as one in which no state has more than one
transition with the same input-label. Epsilon input labels are not allowed
except starting from states that have exactly one arc exiting them (and are
not final). [In the normal definition of determinized, epsilon-input labels
are not allowed at all, whereas in Mohri's definition, epsilons are treated
as ordinary symbols]. The determinized* definition is intended to simulate
the effect of allowing strings of output symbols at each state.
The algorithm implemented here takes an Fst<Arc>, and a pointer to a
MutableFst<Arc> where it puts its output. The weight type is assumed to be a
float-weight. It does epsilon removal and determinization.
This algorithm may fail if the input has epsilon cycles under
certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
semiring, or there are negative cost epsilon cycles).
This implementation is much less fancy than the one in fst/determinize.h, and
does not have an "on-demand" version.
The algorithm is a fairly normal determinization algorithm. We keep in
memory the subsets of states, together with their leftover strings and their
weights. The only difference is we detect input epsilon transitions and
treat them "specially".
*/
// This algorithm will be slightly faster if you sort the input fst on input label.
/**
This function implements the normal version of DeterminizeStar, in which the
output strings are represented using sequences of arcs, where all but the
first one has an epsilon on the input side. The debug_ptr argument is an
optional pointer to a bool that, if it becomes true while the algorithm is
executing, the algorithm will print a traceback and terminate (used in
fstdeterminizestar.cc debug non-terminating determinization).
If max_states is positive, it will stop determinization and throw an
exception as soon as the max-states is reached. This can be useful in test.
If allow_partial is true, the algorithm will output partial results when the
specified max_states is reached (when larger than zero), instead of throwing
out an error.
Caution, the return status is un-intuitive: this function will return false if
determinization completed normally, and true if it was stopped early by
reaching the 'max-states' limit, and a partial FST was generated.
*/
template<class F>
bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,
float delta = kDelta,
bool *debug_ptr = NULL,
int max_states = -1,
bool allow_partial = false);
/* This is a version of DeterminizeStar with a slightly more "natural" output format,
where the output sequences are encoded using the GallicArc (i.e. the output symbols
are strings.
If max_states is positive, it will stop determinization and throw an
exception as soon as the max-states is reached. This can be useful in test.
If allow_partial is true, the algorithm will output partial results when the
specified max_states is reached (when larger than zero), instead of throwing
out an error.
Caution, the return status is un-intuitive: this function will return false if
determinization completed normally, and true if it was stopped early by
reaching the 'max-states' limit, and a partial FST was generated.
*/
template<class F>
bool DeterminizeStar(F &ifst, MutableFst<GallicArc<typename F::Arc> > *ofst,
float delta = kDelta, bool *debug_ptr = NULL,
int max_states = -1,
bool allow_partial = false);
/// @} end "addtogroup fst_extensions"
} // end namespace fst
#include "fstext/determinize-star-inl.h"
#endif