Blame view

tools/sctk-2.4.10/src/asclite/core/linestyle_inputparser.h 3.73 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
  /*
   * ASCLITE
   * Author: Jerome Ajot, Jon Fiscus, Nicolas Radde, Chris Laprun
   *
   * This software was developed at the National Institute of Standards and Technology by 
   * employees of the Federal Government in the course of their official duties. Pursuant
   * to title 17 Section 105 of the United States Code this software is not subject to
   * copyright protection and is in the public domain. ASCLITE is an experimental system.
   * NIST assumes no responsibility whatsoever for its use by other parties, and makes no
   * guarantees, expressed or implied, about its quality, reliability, or any other
   * characteristic. We would appreciate acknowledgement if the software is used.
   *
   * THIS SOFTWARE IS PROVIDED "AS IS."  With regard to this software, NIST MAKES NO EXPRESS
   * OR IMPLIED WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING MERCHANTABILITY,
   * OR FITNESS FOR A PARTICULAR PURPOSE.
   */
  
  #ifndef LINESTYLE_INPUTPARSER_H
  #define LINESTYLE_INPUTPARSER_H
  
  #include "inputparser.h" // inheriting class's header file
  #include "token.h"
  #include "logger.h"
  
  /**
   * This class is a generic class for all line oriented parser.
   * It give methods to parse line easier.
   */
  class LineStyleInputParser : public InputParser
  {
  	public:
  		// class constructor
  		LineStyleInputParser() {}
  		// class destructor
  		virtual ~LineStyleInputParser() {}
  		
  	protected:
      /**
       * Parse a string as a line of tokens
       * and return the corresponding Segment
       */	
      Segment* ParseWords(const string& source, const string& channel, const string& spkr, const int& start, const int& end, Speech* speech, const string& tokens);
      Segment* ParseWordsEx(const string& source, const string& channel, const string& spkr, const int& start, const int& end, Speech* speech, const string& tokens, const bool& hasconf, const float& confscr, bool bOptionallyDeletable); 
  	SpeechSet* ExpandAlternationSpeechSet(SpeechSet *speechs);
  	
      private:
          class VirtualSegment
          {
              public:
                  VirtualSegment() { SetTraversable(false); }
                  ~VirtualSegment();
  				vector<Token *> GetStartTokenVector() { return a_startTokens; }
  				vector<Token *> GetEndTokenVector() { return a_endTokens; }
                  Token* GetStartToken(const size_t& index) { return a_startTokens[index]; }
                  size_t GetNbStartToken() { return a_startTokens.size(); }
                  Token* GetEndToken(const size_t& index) { return a_endTokens[index]; }
                  size_t GetNbEndToken() { return a_endTokens.size(); }
                  void AddStartToken(Token* tok) { a_startTokens.push_back(tok); }
                  void AddEndToken(Token* tok) { a_endTokens.push_back(tok); }
                  void AddEndTokens(LineStyleInputParser::VirtualSegment* toks);
                  void ClearEndToken() { a_endTokens.clear(); }
                  void SetTraversable(const bool& trav) { traversable = trav; }
                  bool IsTraversable() { return traversable; }
  
              private:
                  vector<Token*> a_startTokens;
                  vector<Token*> a_endTokens;
                  bool traversable;
          };
  		
          VirtualSegment* ParseWords(Segment* seg, const string& tokens, bool bOptionallyDeletable);
  		vector<string> SeparateBySlash(const string& line);
          vector<string> TokeniseWords(const string& line);
          void Attach(VirtualSegment* tok1, VirtualSegment* tok2);
          VirtualSegment* Transition(VirtualSegment* prec_token, VirtualSegment* toks);
          string FilterSpace(string line);
          string ReplaceChar(const string& line, const string& badstr, const string& goodstr);
          
          bool m_bUseConfidence;
          bool m_bUseExtended;
          float m_Confidence;
          int m_starttime;
          int m_endtime;
  };
  
  #endif // LINESTYLE_INPUTPARSER_H