kws-functions.h
6.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// kws/kws-functions.h
// Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_KWS_KWS_FUNCTIONS_H_
#define KALDI_KWS_KWS_FUNCTIONS_H_
#include <vector>
#include "lat/kaldi-lattice.h"
#include "kws/kaldi-kws.h"
namespace kaldi {
// We store the time information of the arc into class "Interval". "Interval"
// has a public function "int32 Overlap(Interval interval)" which takes in
// another interval and returns the overlap of that interval and the current
// interval.
class Interval {
public:
Interval() {}
Interval(int32 start, int32 end) : start_(start), end_(end) {}
Interval(const Interval &interval) : start_(interval.Start()), end_(interval.End()) {}
int32 Overlap(Interval interval) {
return std::max<int32>(0, std::min(end_, interval.end_) -
std::max(start_, interval.start_));
}
int32 Start() const {return start_;}
int32 End() const {return end_;}
~Interval() {}
private:
int32 start_;
int32 end_;
};
// We define a function bool CompareInterval(const Interval &i1, const Interval
// &i2) to compare the Interval defined above. If interval i1 is in front of
// interval i2, then return true; otherwise return false.
bool CompareInterval(const Interval &i1,
const Interval &i2);
// This function clusters the arcs with same word id and overlapping time-spans.
// Examples of clusters:
// 0 1 a a (0.1s ~ 0.5s) and 2 3 a a (0.2s ~ 0.4s) are within the same cluster;
// 0 1 a a (0.1s ~ 0.5s) and 5 6 b b (0.2s ~ 0.4s) are in different clusters;
// 0 1 a a (0.1s ~ 0.5s) and 7 8 a a (0.9s ~ 1.4s) are also in different clusters.
// It puts disambiguating symbols in the olabels, leaving the words on the
// ilabels.
bool ClusterLattice(CompactLattice *clat,
const std::vector<int32> &state_times);
// This function contains two steps: weight pushing and factor generation. The
// original ShortestDistance() is not very efficient, so we do the weight
// pushing and shortest path manually by computing the alphas and betas. The
// factor generation step expand the lattice to the LXTXT' semiring, with
// additional start state and end state (and corresponding arcs) added.
bool CreateFactorTransducer(const CompactLattice &clat,
const std::vector<int32> &state_times,
int32 utterance_id,
KwsProductFst *factor_transducer);
// This function removes the arcs with long silence. By "long" we mean arcs with
// #frames exceeding the given max_silence_frames. We do this filtering because
// the gap between adjacent words in a keyword must be <= 0.5 second.
// Note that we should not remove the arcs created in the factor generation
// step, so the "search area" is limited to the original arcs before factor
// generation.
void RemoveLongSilences(int32 max_silence_frames,
const std::vector<int32> &state_times,
KwsProductFst *factor_transducer);
// Do the factor merging part: encode input and output, and apply weighted
// epsilon removal, determinization and minimization. Modifies factor_transducer.
void DoFactorMerging(KwsProductFst *factor_transducer,
KwsLexicographicFst *index_transducer);
// Do the factor disambiguation step: remove the cluster id's for the non-final
// arcs and insert disambiguation symbols for the final arcs
void DoFactorDisambiguation(KwsLexicographicFst *index_transducer);
// Do the optimization: do encoded determinization, minimization
void OptimizeFactorTransducer(KwsLexicographicFst *index_transducer,
int32 max_states,
bool allow_partial);
// the following two functions will, if GetVerboseLevel() >= 2, check that the
// cost of the second-best path in the transducers is not negative, and print
// out some associated debugging info if GetVerboseLevel() >= 3. The best path
// in the transducers will typically be for the empty word sequence, and it may
// have negative cost (i.e. probability more than one), but the second-best one
// should not have negative cost. A warning will be printed if
// GetVerboseLevel() >= 2 and a substantially negative cost is found.
void MaybeDoSanityCheck(const KwsProductFst &factor_transducer);
void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer);
// this Mapper class is used in some of the the internals; we have to declare it
// in the header because, for the sake of compilation time, we split up the
// implementation into two .cc files.
class KwsProductFstToKwsLexicographicFstMapper {
public:
typedef KwsProductArc FromArc;
typedef KwsProductWeight FromWeight;
typedef KwsLexicographicArc ToArc;
typedef KwsLexicographicWeight ToWeight;
KwsProductFstToKwsLexicographicFstMapper() {}
inline ToArc operator()(const FromArc &arc) const {
return ToArc(arc.ilabel,
arc.olabel,
(arc.weight == FromWeight::Zero() ?
ToWeight::Zero() :
ToWeight(arc.weight.Value1().Value(),
StdLStdWeight(arc.weight.Value2().Value1().Value(),
arc.weight.Value2().Value2().Value()))),
arc.nextstate);
}
fst::MapFinalAction FinalAction() const { return fst::MAP_NO_SUPERFINAL; }
fst::MapSymbolsAction InputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS; }
fst::MapSymbolsAction OutputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS;}
uint64 Properties(uint64 props) const { return props; }
};
} // namespace kaldi
#endif // KALDI_KWS_KWS_FUNCTIONS_H_