linestyle_inputparser.h
3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/*
* ASCLITE
* Author: Jerome Ajot, Jon Fiscus, Nicolas Radde, Chris Laprun
*
* This software was developed at the National Institute of Standards and Technology by
* employees of the Federal Government in the course of their official duties. Pursuant
* to title 17 Section 105 of the United States Code this software is not subject to
* copyright protection and is in the public domain. ASCLITE is an experimental system.
* NIST assumes no responsibility whatsoever for its use by other parties, and makes no
* guarantees, expressed or implied, about its quality, reliability, or any other
* characteristic. We would appreciate acknowledgement if the software is used.
*
* THIS SOFTWARE IS PROVIDED "AS IS." With regard to this software, NIST MAKES NO EXPRESS
* OR IMPLIED WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING MERCHANTABILITY,
* OR FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef LINESTYLE_INPUTPARSER_H
#define LINESTYLE_INPUTPARSER_H
#include "inputparser.h" // inheriting class's header file
#include "token.h"
#include "logger.h"
/**
* This class is a generic class for all line oriented parser.
* It give methods to parse line easier.
*/
class LineStyleInputParser : public InputParser
{
public:
// class constructor
LineStyleInputParser() {}
// class destructor
virtual ~LineStyleInputParser() {}
protected:
/**
* Parse a string as a line of tokens
* and return the corresponding Segment
*/
Segment* ParseWords(const string& source, const string& channel, const string& spkr, const int& start, const int& end, Speech* speech, const string& tokens);
Segment* ParseWordsEx(const string& source, const string& channel, const string& spkr, const int& start, const int& end, Speech* speech, const string& tokens, const bool& hasconf, const float& confscr, bool bOptionallyDeletable);
SpeechSet* ExpandAlternationSpeechSet(SpeechSet *speechs);
private:
class VirtualSegment
{
public:
VirtualSegment() { SetTraversable(false); }
~VirtualSegment();
vector<Token *> GetStartTokenVector() { return a_startTokens; }
vector<Token *> GetEndTokenVector() { return a_endTokens; }
Token* GetStartToken(const size_t& index) { return a_startTokens[index]; }
size_t GetNbStartToken() { return a_startTokens.size(); }
Token* GetEndToken(const size_t& index) { return a_endTokens[index]; }
size_t GetNbEndToken() { return a_endTokens.size(); }
void AddStartToken(Token* tok) { a_startTokens.push_back(tok); }
void AddEndToken(Token* tok) { a_endTokens.push_back(tok); }
void AddEndTokens(LineStyleInputParser::VirtualSegment* toks);
void ClearEndToken() { a_endTokens.clear(); }
void SetTraversable(const bool& trav) { traversable = trav; }
bool IsTraversable() { return traversable; }
private:
vector<Token*> a_startTokens;
vector<Token*> a_endTokens;
bool traversable;
};
VirtualSegment* ParseWords(Segment* seg, const string& tokens, bool bOptionallyDeletable);
vector<string> SeparateBySlash(const string& line);
vector<string> TokeniseWords(const string& line);
void Attach(VirtualSegment* tok1, VirtualSegment* tok2);
VirtualSegment* Transition(VirtualSegment* prec_token, VirtualSegment* toks);
string FilterSpace(string line);
string ReplaceChar(const string& line, const string& badstr, const string& goodstr);
bool m_bUseConfidence;
bool m_bUseExtended;
float m_Confidence;
int m_starttime;
int m_endtime;
};
#endif // LINESTYLE_INPUTPARSER_H