Blame view

tools/openfst-1.6.7/include/fst/symbol-table-ops.h 2.97 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  // See www.openfst.org for extensive documentation on this weighted
  // finite-state transducer library.
  
  #ifndef FST_SYMBOL_TABLE_OPS_H_
  #define FST_SYMBOL_TABLE_OPS_H_
  
  #include <string>
  #include <unordered_set>
  #include <vector>
  
  
  #include <fst/fst.h>
  #include <fst/symbol-table.h>
  
  
  namespace fst {
  
  // Returns a minimal symbol table containing only symbols referenced by the
  // passed fst.  Symbols preserve their original numbering, so fst does not
  // require relabeling.
  template <class Arc>
  SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms,
                                bool input) {
    std::unordered_set<typename Arc::Label> seen;
    seen.insert(0);  // Always keep epsilon.
    for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
      for (ArcIterator<Fst<Arc>> aiter(fst, siter.Value()); !aiter.Done();
           aiter.Next()) {
        const auto sym = (input) ? aiter.Value().ilabel : aiter.Value().olabel;
        seen.insert(sym);
      }
    }
    auto *pruned = new SymbolTable(syms.Name() + "_pruned");
    for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) {
      const auto label = stiter.Value();
      if (seen.count(label)) pruned->AddSymbol(stiter.Symbol(), label);
    }
    return pruned;
  }
  
  // Relabels a symbol table to make it a contiguous mapping.
  SymbolTable *CompactSymbolTable(const SymbolTable &syms);
  
  // Merges two SymbolTables, all symbols from left will be merged into right
  // with the same ids.  Symbols in right that have conflicting ids with those
  // in left will be assigned to value assigned from the left SymbolTable.
  // The returned symbol table will never modify symbol assignments from the left
  // side, but may do so on the right.  If right_relabel_output is non-NULL, it
  // will be assigned true if the symbols from the right table needed to be
  // reassigned.
  // A potential use case is to Compose two Fst's that have different symbol
  // tables.  You can reconcile them in the following way:
  //   Fst<Arc> a, b;
  //   bool relabel;
  //   std::unique_ptr<SymbolTable> bnew(MergeSymbolTable(a.OutputSymbols(),
  //                                     b.InputSymbols(), &relabel);
  //   if (relabel) {
  //     Relabel(b, bnew.get(), nullptr);
  //   }
  //   b.SetInputSymbols(bnew);
  SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
                                bool *right_relabel_output = nullptr);
  
  // Read the symbol table from any Fst::Read()able file, without loading the
  // corresponding Fst.  Returns nullptr if the Fst does not contain a symbol
  // table or the symbol table cannot be read.
  SymbolTable *FstReadSymbols(const string &filename, bool input);
  
  // Adds a contiguous range of symbols to a symbol table using a simple prefix
  // for the string, returning false if the inserted symbol string clashes with
  // any currently present.
  bool AddAuxiliarySymbols(const string &prefix, int64 start_label,
                           int64 nlabels, SymbolTable *syms);
  
  }  // namespace fst
  #endif  // FST_SYMBOL_TABLE_OPS_H_