Blame view
tools/sctk-2.4.10/src/asclite/core/trn_inputparser.cpp
2.99 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
/* * ASCLITE * Author: Jerome Ajot, Jon Fiscus, Nicolas Radde, Chris Laprun * * This software was developed at the National Institute of Standards and Technology by * employees of the Federal Government in the course of their official duties. Pursuant * to title 17 Section 105 of the United States Code this software is not subject to * copyright protection and is in the public domain. ASCLITE is an experimental system. * NIST assumes no responsibility whatsoever for its use by other parties, and makes no * guarantees, expressed or implied, about its quality, reliability, or any other * characteristic. We would appreciate acknowledgement if the software is used. * * THIS SOFTWARE IS PROVIDED "AS IS." With regard to this software, NIST MAKES NO EXPRESS * OR IMPLIED WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING MERCHANTABILITY, * OR FITNESS FOR A PARTICULAR PURPOSE. */ /** * Class that handle the parsing of TRN encoded file */ #include "trn_inputparser.h" // class's header file Logger* TRNInputParser::logger = Logger::getLogger(); /** * Load the named file into a vector of Speech element. * @todo Finish this :P */ SpeechSet* TRNInputParser::loadFile(const string& name) { string line; long int lineNum = -1; long int elementNum = 0; ifstream file; file.open(name.c_str(), ifstream::in); if (! file.is_open()) { LOG_FATAL(logger, "Error opening file " + name); exit (E_LOAD); } map<string, Speech*> spkr_list; SpeechSet* vec = new SpeechSet(name); while (getline(file,line,' ')) { ++lineNum; //cout << "prec:"<< prec_seg->isEmpty() << endl; if (line.find_first_of(";;") == 0) { //comment so skip (for now) } else { size_t uidindex = line.find_last_of("(")+1; size_t uidsize = line.find_last_of(")")-uidindex; string uid = line.substr(uidindex, uidsize); string spkr = "undefined"; if (Properties::GetProperty("inputparser.trn.uid").compare("spu_id") == 0) { spkr = uid.substr(0, uid.find_last_of("_-")); } else { LOG_ERR(logger, "trn_importer : unknown uterance id type : " + Properties::GetProperty("inputparser.trn.uid")); } Speech* speech = spkr_list[spkr]; if (!speech) { spkr_list[spkr] = new Speech(vec); speech = spkr_list[spkr]; } Segment* seg = ParseWords(string(""), string(""), string(""), -1, -1, speech, line.substr(0, uidindex-1)); seg->SetId("(" + uid + ")"); seg->SetSourceLineNum(lineNum); seg->SetSourceElementNum(elementNum++); seg->SetSpeakerId(spkr); speech->AddSegment(seg); } } LOG_INFO(logger, "loading of file '" + name + "' done"); file.close(); map<string, Speech*>::iterator i = spkr_list.begin(); map<string, Speech*>::iterator ei = spkr_list.end(); bool emptyFile = true; while (i != ei) { vec->AddSpeech(i->second); emptyFile = false; ++i; } if(emptyFile) { LOG_FATAL(logger, "TRN file '" + name + "' contains no data!"); exit(E_MISSINFO); } return vec; } |