Blame view

tools/sctk-2.4.10/src/asclite/core/trn_inputparser.cpp 2.99 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  /*
   * ASCLITE
   * Author: Jerome Ajot, Jon Fiscus, Nicolas Radde, Chris Laprun
   *
   * This software was developed at the National Institute of Standards and Technology by 
   * employees of the Federal Government in the course of their official duties. Pursuant
   * to title 17 Section 105 of the United States Code this software is not subject to
   * copyright protection and is in the public domain. ASCLITE is an experimental system.
   * NIST assumes no responsibility whatsoever for its use by other parties, and makes no
   * guarantees, expressed or implied, about its quality, reliability, or any other
   * characteristic. We would appreciate acknowledgement if the software is used.
   *
   * THIS SOFTWARE IS PROVIDED "AS IS."  With regard to this software, NIST MAKES NO EXPRESS
   * OR IMPLIED WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING MERCHANTABILITY,
   * OR FITNESS FOR A PARTICULAR PURPOSE.
   */
  
  /**
   * Class that handle the parsing of TRN encoded file
   */
   
  #include "trn_inputparser.h" // class's header file
  
  Logger* TRNInputParser::logger = Logger::getLogger();
  
  /**
   * Load the named file into a vector of Speech element.
   * @todo Finish this :P
   */
  SpeechSet* TRNInputParser::loadFile(const string& name)
  {
  	string line;
  	long int lineNum = -1;
  	long int elementNum = 0;
  	ifstream file;
  	file.open(name.c_str(), ifstream::in);
      
  	if (! file.is_open())
  	{ 
  		LOG_FATAL(logger, "Error opening file " + name); 
  		exit (E_LOAD); 
  	}
  	
  	map<string, Speech*> spkr_list;
  	SpeechSet* vec = new SpeechSet(name);
  	
  	while (getline(file,line,'
  '))
  	{
  		++lineNum;
  		//cout << "prec:"<< prec_seg->isEmpty() << endl;
  		if (line.find_first_of(";;") == 0)
  		{
  			//comment so skip (for now)
  		}
  		else
  		{
  			size_t uidindex = line.find_last_of("(")+1;
  			size_t uidsize = line.find_last_of(")")-uidindex;
  			
  			string uid = line.substr(uidindex, uidsize);
  			string spkr = "undefined";
              
  			if (Properties::GetProperty("inputparser.trn.uid").compare("spu_id") == 0)
  			{
  				spkr = uid.substr(0, uid.find_last_of("_-"));
  			} 
  			else
  			{
  				LOG_ERR(logger, "trn_importer : unknown uterance id type : " + Properties::GetProperty("inputparser.trn.uid"));
  			}
              
  			Speech* speech = spkr_list[spkr];
              
  			if (!speech)
  			{
  				spkr_list[spkr] = new Speech(vec);
  				speech = spkr_list[spkr];
  			}    
                    
  			Segment* seg = ParseWords(string(""), string(""), string(""), -1, -1, speech, line.substr(0, uidindex-1));
  			seg->SetId("(" + uid + ")");
  			seg->SetSourceLineNum(lineNum);
  			seg->SetSourceElementNum(elementNum++);
  			seg->SetSpeakerId(spkr);
  			speech->AddSegment(seg);
  		}
  	}
      
  	LOG_INFO(logger, "loading of file '" + name + "' done");
  	file.close();
  	
  	map<string, Speech*>::iterator i = spkr_list.begin();
  	map<string, Speech*>::iterator ei = spkr_list.end();
      
  	bool emptyFile = true;
  	
  	while (i != ei)
  	{
  		vec->AddSpeech(i->second);
  		emptyFile = false;
  		++i;
  	}
  	
  	if(emptyFile)
  	{
  		LOG_FATAL(logger, "TRN file '" + name + "' contains no data!");
  		exit(E_MISSINFO);
  	}
      
  	return vec;
  }