trn_inputparser.cpp
2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
* ASCLITE
* Author: Jerome Ajot, Jon Fiscus, Nicolas Radde, Chris Laprun
*
* This software was developed at the National Institute of Standards and Technology by
* employees of the Federal Government in the course of their official duties. Pursuant
* to title 17 Section 105 of the United States Code this software is not subject to
* copyright protection and is in the public domain. ASCLITE is an experimental system.
* NIST assumes no responsibility whatsoever for its use by other parties, and makes no
* guarantees, expressed or implied, about its quality, reliability, or any other
* characteristic. We would appreciate acknowledgement if the software is used.
*
* THIS SOFTWARE IS PROVIDED "AS IS." With regard to this software, NIST MAKES NO EXPRESS
* OR IMPLIED WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING MERCHANTABILITY,
* OR FITNESS FOR A PARTICULAR PURPOSE.
*/
/**
* Class that handle the parsing of TRN encoded file
*/
#include "trn_inputparser.h" // class's header file
Logger* TRNInputParser::logger = Logger::getLogger();
/**
* Load the named file into a vector of Speech element.
* @todo Finish this :P
*/
SpeechSet* TRNInputParser::loadFile(const string& name)
{
string line;
long int lineNum = -1;
long int elementNum = 0;
ifstream file;
file.open(name.c_str(), ifstream::in);
if (! file.is_open())
{
LOG_FATAL(logger, "Error opening file " + name);
exit (E_LOAD);
}
map<string, Speech*> spkr_list;
SpeechSet* vec = new SpeechSet(name);
while (getline(file,line,'\n'))
{
++lineNum;
//cout << "prec:"<< prec_seg->isEmpty() << endl;
if (line.find_first_of(";;") == 0)
{
//comment so skip (for now)
}
else
{
size_t uidindex = line.find_last_of("(")+1;
size_t uidsize = line.find_last_of(")")-uidindex;
string uid = line.substr(uidindex, uidsize);
string spkr = "undefined";
if (Properties::GetProperty("inputparser.trn.uid").compare("spu_id") == 0)
{
spkr = uid.substr(0, uid.find_last_of("_-"));
}
else
{
LOG_ERR(logger, "trn_importer : unknown uterance id type : " + Properties::GetProperty("inputparser.trn.uid"));
}
Speech* speech = spkr_list[spkr];
if (!speech)
{
spkr_list[spkr] = new Speech(vec);
speech = spkr_list[spkr];
}
Segment* seg = ParseWords(string(""), string(""), string(""), -1, -1, speech, line.substr(0, uidindex-1));
seg->SetId("(" + uid + ")");
seg->SetSourceLineNum(lineNum);
seg->SetSourceElementNum(elementNum++);
seg->SetSpeakerId(spkr);
speech->AddSegment(seg);
}
}
LOG_INFO(logger, "loading of file '" + name + "' done");
file.close();
map<string, Speech*>::iterator i = spkr_list.begin();
map<string, Speech*>::iterator ei = spkr_list.end();
bool emptyFile = true;
while (i != ei)
{
vec->AddSpeech(i->second);
emptyFile = false;
++i;
}
if(emptyFile)
{
LOG_FATAL(logger, "TRN file '" + name + "' contains no data!");
exit(E_MISSINFO);
}
return vec;
}