Blame view
tools/openfst-1.6.7/src/extensions/far/farcompilestrings.cc
3.3 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Compiles a set of stings as FSTs and stores them in a finite-state archive. #include <string> #include <vector> #include <fst/flags.h> #include <fst/extensions/far/farscript.h> #include <fst/extensions/far/getters.h> #include <fstream> DEFINE_string(key_prefix, "", "Prefix to append to keys"); DEFINE_string(key_suffix, "", "Suffix to append to keys"); DEFINE_int32(generate_keys, 0, "Generate N digit numeric keys (def: use file basenames)"); DEFINE_string(far_type, "default", "FAR file format type: one of: \"default\", \"fst\", " "\"stlist\", \"sttable\""); DEFINE_bool(allow_negative_labels, false, "Allow negative labels (not recommended; may cause conflicts)"); DEFINE_string(arc_type, "standard", "Output arc type"); DEFINE_string(entry_type, "line", "Entry type: one of : " "\"file\" (one FST per file), \"line\" (one FST per line)"); DEFINE_string(fst_type, "vector", "Output FST type"); DEFINE_string(token_type, "symbol", "Token type: one of : " "\"symbol\", \"byte\", \"utf8\""); DEFINE_string(symbols, "", "Label symbol table"); DEFINE_string(unknown_symbol, "", ""); DEFINE_bool(file_list_input, false, "Each input file contains a list of files to be processed"); DEFINE_bool(keep_symbols, false, "Store symbol table in the FAR file"); DEFINE_bool(initial_symbols, true, "When keep_symbols is true, stores symbol table only for the first" " FST in archive."); int main(int argc, char **argv) { namespace s = fst::script; string usage = "Compiles a set of strings as FSTs and stores them in"; usage += " a finite-state archive. Usage:"; usage += argv[0]; usage += " [in1.txt [[in2.txt ...] out.far]] "; std::set_new_handler(FailedNewHandler); SET_FLAGS(usage.c_str(), &argc, &argv, true); s::ExpandArgs(argc, argv, &argc, &argv); std::vector<string> in_fnames; if (FLAGS_file_list_input) { for (int i = 1; i < argc - 1; ++i) { std::ifstream istrm(argv[i]); string str; while (getline(istrm, str)) in_fnames.push_back(str); } } else { for (int i = 1; i < argc - 1; ++i) in_fnames.push_back(argv[i]); } if (in_fnames.empty()) { in_fnames.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : ""); } string out_fname = argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : ""; fst::FarEntryType entry_type; if (!s::GetFarEntryType(FLAGS_entry_type, &entry_type)) { LOG(ERROR) << "Unknown or unsupported FAR entry type: " << FLAGS_entry_type; return 1; } fst::FarTokenType token_type; if (!s::GetFarTokenType(FLAGS_token_type, &token_type)) { LOG(ERROR) << "Unkonwn or unsupported FAR token type: " << FLAGS_token_type; return 1; } const auto far_type = s::GetFarType(FLAGS_far_type); s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type, far_type, FLAGS_generate_keys, entry_type, token_type, FLAGS_symbols, FLAGS_unknown_symbol, FLAGS_keep_symbols, FLAGS_initial_symbols, FLAGS_allow_negative_labels, FLAGS_key_prefix, FLAGS_key_suffix); return 0; } |