#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ProjectModel.h" #include "Season.h" #include "Scene.h" #include "Shot.h" #include "ResultsDialog.h" using namespace std; using namespace arma; ///////////////// // constructor // ///////////////// ProjectModel::ProjectModel(QObject *parent) : QAbstractItemModel(parent), m_name(QString()), m_baseName(QString()) { m_series = new Series; m_movieAnalyzer = new MovieAnalyzer; connect(m_movieAnalyzer, SIGNAL(setResolution(const QSize &)), this, SLOT(setResolution(const QSize &))); connect(m_movieAnalyzer, SIGNAL(setFps(qreal)), this, SLOT(setFps(qreal))); connect(m_movieAnalyzer, SIGNAL(appendVideoFrame(int, qint64)), this, SLOT(appendVideoFrame(int, qint64))); connect(m_movieAnalyzer, SIGNAL(insertShot(qint64, Segment::Source)), this, SLOT(insertShot(qint64, Segment::Source))); connect(m_movieAnalyzer, SIGNAL(labelSimShot(qint64, int, Segment::Source)), this, SLOT(labelSimShot(qint64, int, Segment::Source))); connect(m_movieAnalyzer, SIGNAL(setSpeaker(qint64, qint64, const QString &, VideoFrame::SpeakerSource)), this, SLOT(setSpeaker(qint64, qint64, const QString &, VideoFrame::SpeakerSource))); connect(m_movieAnalyzer, SIGNAL(playSegments(QList>)), this, SLOT(playSeg(QList>))); connect(this, SIGNAL(setDiarData(const arma::mat, const arma::mat, const arma::mat)), m_movieAnalyzer, SLOT(setDiarData(const arma::mat, const arma::mat, const arma::mat))); connect(this, SIGNAL(setDiarData(const arma::mat, const arma::mat, const arma::mat, QMap>>)), m_movieAnalyzer, SLOT(setDiarData(const arma::mat, const arma::mat, const arma::mat, QMap>>))); } //////////////// // destructor // //////////////// ProjectModel::~ProjectModel() { delete m_series; } ///////////////////////// // save / open methods // ///////////////////////// bool ProjectModel::save(const QString &fName) { QFile saveFile(fName + ".json"); // QFile saveFile(fName + ".dat"); if (!saveFile.open(QIODevice::WriteOnly)) { qWarning("Couldn't open save file."); return false; } QJsonObject projObject; projObject["name"] = m_name; QJsonObject seriesObject; m_series->write(seriesObject); projObject["series"] = seriesObject; QJsonDocument saveDoc(projObject); saveFile.write(saveDoc.toJson()); // saveFile.write(saveDoc.toBinaryData()); return true; } bool ProjectModel::load(const QString &fName) { QFile loadFile(fName); if (!loadFile.open(QIODevice::ReadOnly)) { qWarning("Couldn't open save file."); return false; } QByteArray saveData = loadFile.readAll(); QJsonDocument loadDoc(QJsonDocument::fromJson(saveData)); // QJsonDocument loadDoc(QJsonDocument::fromBinaryData(saveData)); QJsonObject projObject = loadDoc.object(); m_name = projObject["name"].toString(); QJsonObject seriesObject = projObject["series"].toObject(); m_series->read(seriesObject); QList subStarts; QList subEnds; retrieveSubPositionsLabels(subStarts, subEnds, m_subRefLbl); for (int i(0); i < subStarts.size(); i++) m_subBound.push_back(QPair(subStarts[i], subEnds[i])); retrieveShotUtterances(); retrieveShotPatterns(); return true; } ////////////////////////////////////////// // reimplementation of abstract methods // // inherited from QAbstractItemModel // ////////////////////////////////////////// QModelIndex ProjectModel::index(int row, int column, const QModelIndex &parent) const { if (!hasIndex(row, column, parent)) return QModelIndex(); Segment *parentSegment; if (!parent.isValid()) parentSegment = m_series; else parentSegment = static_cast(parent.internalPointer()); Segment *childSegment = parentSegment->child(row); if (childSegment ) return createIndex(row, column, childSegment); else return QModelIndex(); } QModelIndex ProjectModel::parent(const QModelIndex &child) const { if (!child.isValid()) return QModelIndex(); Segment *childSegment = static_cast(child.internalPointer()); Segment *parentSegment = childSegment->parent(); if (parentSegment == m_series) return QModelIndex(); return createIndex(parentSegment->row(), 0, parentSegment); } int ProjectModel::rowCount(const QModelIndex &parent) const { Segment *parentSegment; if (!parent.isValid()) parentSegment = m_series; else parentSegment = static_cast(parent.internalPointer()); return parentSegment->childCount(); } int ProjectModel::columnCount(const QModelIndex &parent) const { Q_UNUSED(parent) return 2; } QVariant ProjectModel::data(const QModelIndex &index, int role) const { if (!index.isValid()) return QVariant(); Segment *segment = static_cast(index.internalPointer()); switch (role) { case Qt::DisplayRole: if (index.column() == 0) return segment->display(); else return segment->getFormattedPosition(); break; case Qt::ForegroundRole: if (index.column() == 1) { QBrush grayForeground(Qt::gray); return grayForeground; } break; case Qt::FontRole: QFont font; if (segment->getSource() == Segment::Automatic) font.setItalic(true); else if (segment->getSource() == Segment::Both) font.setBold(true); return font; break; } return QVariant(); } QVariant ProjectModel::headerData(int section, Qt::Orientation orientation, int role) const { if (orientation == Qt::Horizontal && role == Qt::DisplayRole && section == 0) return m_series->display(); return QVariant(); } Qt::ItemFlags ProjectModel::flags(const QModelIndex &index) const { if (!index.isValid()) return 0; return QAbstractItemModel::flags(index); } QModelIndex ProjectModel::indexFromSegment(Segment *segment) const { if (segment->parent() == m_series) return index(0, 0); return index(segment->row(), 0, indexFromSegment(segment->parent())); } int ProjectModel::getDepth() const { return m_series->getHeight(); } QModelIndex ProjectModel::getShotParentIndex() const { Segment *segment = m_series; while (!dynamic_cast(segment)) segment = segment->child(0); segment = segment->parent(); return indexFromSegment(segment); } /////////////// // modifiers // /////////////// void ProjectModel::setModel(const QString &name, const QString &seriesName, int seasNbr, int epNbr, const QString &epName, const QString &epFName) { m_name = name; m_series->setName(seriesName); Season *season = new Season(seasNbr, m_series); m_series->appendChild(season); m_episode = new Episode(epNbr, epFName, season, epName); season->appendChild(m_episode); m_movieAnalyzer->extractVideoFrames(epFName); } bool ProjectModel::appendModel(int seasNbr, int epNbr, const QString &epName, const QString &epFName) { QList seasonList = m_series->getChildren(); int i(0); while (i < seasonList.size() && seasNbr < seasonList[i]->getNumber()) i++; // new season if (i == seasonList.size()) { Season *season = new Season(seasNbr, m_series); m_series->appendChild(season); m_episode = new Episode(epNbr, epFName, season, epName); season->appendChild(m_episode); m_movieAnalyzer->extractVideoFrames(epFName); } // same season else { QList episodeList = seasonList[i]->getChildren(); int j(0); while (j < episodeList.size() && episodeList[i]->getNumber() != epNbr) j++; // new episode if (j == episodeList.size()) { m_episode = new Episode(epNbr, epFName, seasonList[i], epName); seasonList[i]->appendChild(m_episode); m_movieAnalyzer->extractVideoFrames(epFName); } // episode already recorded else return false; } return true; } bool ProjectModel::insertSubtitles(const QString &subFName) { QJsonObject subObject; qint64 position; qint64 start; QString text; qint64 end; QStringList sources; QList absLength; QList relLength; qreal proportion; int totLength; qreal startShift(-0.38); qreal endShift(-0.5); int j; Segment *segment; VideoFrame *vFrame; // regular expression to detect subtitles corresponding to noise std::regex noiseSource("\\(.*\\)"); // regular expression to detect subtitles with multiple speakers std::regex multSources("-.+
-.+"); // regular expression to detect speaker turn into subtitle std::regex spkTurn("(- *)(.*)"); eraseSubtitles(m_series); clearSpeaker(m_series, VideoFrame::Ref); QFile loadFile(subFName); if (!loadFile.open(QIODevice::ReadOnly)) { qWarning("Couldn't open subtitles file."); return false; } QByteArray saveData = loadFile.readAll(); QJsonDocument loadDoc(QJsonDocument::fromJson(saveData)); QJsonArray subArray = loadDoc.array(); for (int i(0); i < subArray.size(); i++) { sources.clear(); absLength.clear(); relLength.clear(); totLength = 0; proportion = 0.0; subObject = subArray[i].toObject(); start = (subObject["start"].toDouble() + startShift) * 1000; if (start < 0) start = 0; start = qRound(start / 10.0) * 10; end = (subObject["end"].toDouble() + endShift) * 1000; end = qRound(end / 10.0) * 10; text = subObject["text"].toString(); // case of multiple source in current subtitle if (regex_match(text.toStdString(), multSources)) { // split subtitle contents sources = text.split("
"); // removing hyphenation indicating speaker turn for (int k(0); k < sources.size(); k++) { sources[k] = sources[k].replace(0, 1, ""); while (sources[k].indexOf(" ") == 0) sources[k] = sources[k].replace(0, 1, ""); } // estimate absolute length of each contents for (int k(0); k < sources.size(); k++) { absLength.push_back((sources[k]).count(" ") + 1); totLength += absLength[k]; } // estimate relative length of each contents for (int k(0); k < sources.size(); k++) relLength.push_back(absLength[k] / static_cast(totLength)); // boundaries of each interval for (int k(1); k < sources.size(); k++) relLength[k] += relLength[k-1]; } segment = m_series; while (!(vFrame = dynamic_cast(segment))) { // video frame at start position j = segment->childIndexFromPosition(start); // select possible children segment = segment->child(j); } position = segment->getPosition(); int k = 0; int prev = 0; while (position <= end) { segment = m_series; while (!(vFrame = dynamic_cast(segment))) { // video frame at start position j = segment->childIndexFromPosition(position); // select possible children segment = segment->child(j); } proportion = (static_cast(position) - start) / (end - start); if (relLength.size() >= 1) { while (relLength[k] < proportion) k++; if (prev == k) { vFrame->setSub(sources[k]); if (!regex_match(sources[k].toStdString(), noiseSource)) vFrame->setSpeaker("S", VideoFrame::Ref); } prev = k; } else { vFrame->setSub(text); if (!regex_match(text.toStdString(), noiseSource)) vFrame->setSpeaker("S", VideoFrame::Ref); } position += 40; } } return true; } bool ProjectModel::localSpkDiar(bool baseline, UtteranceTree::DistType dist, bool norm, UtteranceTree::AgrCrit agr, UtteranceTree::PartMeth partMeth, bool weight, bool sigma) { if (baseline) m_movieAnalyzer->localSpkDiarBaseline(m_shotPatterns, m_subBound, m_shotUtterances, m_strictShotPattBound, m_baseName); else m_movieAnalyzer->localSpkDiar(m_subBound, m_shotPatterns, m_strictShotPattBound, dist, norm, agr, partMeth, weight, sigma, m_baseName); return true; } bool ProjectModel::globalSpkDiar() { m_movieAnalyzer->globalSpkDiar(m_baseName); return true; } bool ProjectModel::speakerDiarization(const QString &diarFName, VideoFrame::SpeakerSource source) { qint64 start, position, end; QString speakerLabel; Segment *segment; VideoFrame *vFrame; int i(0); clearSpeaker(m_series, source); QFile file(diarFName); if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return false; QTextStream in(&file); // parse hypotheses file while (!in.atEnd()) { QString line = in.readLine(); QStringList data = line.split(" "); start = data[0].toDouble() * 1000; end = data[1].toDouble() * 1000; speakerLabel = data[2]; segment = m_series; while (!(vFrame = dynamic_cast(segment))) { // video frame at start position i = segment->childIndexFromPosition(start); // select possible children segment = segment->child(i); } position = segment->getPosition(); while (position <= end) { segment = m_series; while (!(vFrame = dynamic_cast(segment))) { // video frame at start position i = segment->childIndexFromPosition(position); // select possible children segment = segment->child(i); } vFrame->setSpeaker(speakerLabel, source); position += 40; } } return true; } void ProjectModel::extractShots(QString fName, int histoType, int nVBins, int nHBins, int nSBins, int metrics, qreal threshold1, qreal threshold2, int nVBlock, int nHBlock, bool iterate) { qint64 posInit = 0; QList toRemove; resetShotsToManual(m_series, toRemove); for (int j(0); j < toRemove.count(); j++) removeShot(toRemove[j], Segment::Automatic); emit modelChanged(); if (iterate) { // number of HSV tuples considered int nHsvTuples(1); // number of iterations int nIter = nHBlock * nHsvTuples * threshold1 * (threshold1 - 1) / 2; // optimal values oberved so far for the int nBlockMax(0); // number of image blocks int nHMax(0); // hue channel int nSMax(0); // saturation channel int nVMax(0); // value channel int thresh1Max(0); // disimilarity treshold int thresh2Max(0); // similarity threshold // current and maximum F-Score observed so far qreal fScore(0); qreal fScoreMax(0); // iteration counter to update progress bar int cnt(0); // progress bar initialization QProgressDialog progress(tr("Extracting shots..."), tr("Cancel"), 0, nIter); progress.setWindowModality(Qt::WindowModal); // loop over the number of blocks while (nHBlock >= 1) { int currNHBins(nHBins); int currNSBins(nSBins); int currNVBins(nVBins); // loop over HSV possible values for (int i(0); i < nHsvTuples; i++) { int thresh1 = threshold1; // loop over disimilarity threshold between frames while (thresh1 >= 1) { threshold2 = thresh1; // loop over similarity threshold between frames while (threshold2 >= 1) { insertShot(posInit, Segment::Manual); // extracting shots m_movieAnalyzer->extractShots(fName, histoType, currNVBins, currNHBins, currNSBins, metrics, thresh1, threshold2, nHBlock, nHBlock, false); // evaluating extraction and updating optimal values if ((fScore = evaluateShotDetection(false, thresh1, threshold2)) > fScoreMax) { fScoreMax = fScore; nBlockMax = nHBlock; nHMax = currNHBins; nSMax = currNSBins; nVMax = currNVBins; thresh1Max = thresh1; thresh2Max = threshold2; } // displaying current results qDebug() << nHBlock << currNHBins << currNSBins << currNVBins << thresh1 << threshold2 << fScore; // removing inserted shots toRemove.clear(); resetShotsToManual(m_series, toRemove); for (int j(0); j < toRemove.count(); j++) removeShot(toRemove[j], Segment::Automatic); threshold2--; progress.setValue(++cnt); } thresh1--; } currNHBins /= 2; currNSBins /= 2; currNVBins /= 2; } nHBlock--; } progress.setValue(nIter); // setting optimal parameters nVBlock = nBlockMax; nHBlock = nBlockMax; nHBins = nHMax; nSBins = nSMax; nVBins = nVMax; threshold1 = thresh1Max; threshold2 = thresh2Max; } insertShot(posInit, Segment::Manual); m_movieAnalyzer->extractShots(fName, histoType, nVBins, nHBins, nSBins, metrics, threshold1, threshold2, nVBlock, nHBlock); evaluateShotDetection(true, threshold1, threshold2); } void ProjectModel::labelSimilarShots(QString fName, int histoType, int nVBins, int nHBins, int nSBins, int metrics, qreal maxDist, int windowSize, int nVBlock, int nHBlock, bool iterate) { resetAutoCameraLabels(m_series); QList shotPositions; retrieveShotPositions(m_series, shotPositions); if (iterate) { // number of HSV tuples considered int nHsvTuples(1); // number of iterations int nIter = nHBlock * nHsvTuples * maxDist * windowSize / 16; // optimal values oberved so far for the int nBlockMax(0); // number of image blocks int nHMax(0); // hue channel int nSMax(0); // saturation channel int nVMax(0); // value channel int thresh1Max(0); // disimilarity threshold int thresh2Max(0); // window size // current and maximum F-Score observed so far qreal fScore(0); qreal fScoreMax(0); // iteration counter to update progress bar int cnt(0); // progress bar initialization QProgressDialog progress(tr("Retrieving similar shots..."), tr("Cancel"), 0, nIter); progress.setWindowModality(Qt::WindowModal); // loop over the number of blocks while (nHBlock >= 1) { int currNHBins(nHBins); int currNSBins(nSBins); int currNVBins(nVBins); // loop over HSV possible values for (int i(0); i < nHsvTuples; i++) { int threshold1 = static_cast(maxDist); // loop over disimilarity threshold between frames while (threshold1 >= 1) { int threshold2(windowSize); // loop over window size while (threshold2 >= 1) { // retrieving similar shots m_movieAnalyzer->labelSimilarShots(fName, histoType, currNVBins, currNHBins, currNSBins, metrics, threshold1, threshold2, shotPositions, nHBlock, nHBlock, false); // evaluating extraction if ((fScore = evaluateSimShotDetection(false, threshold1, threshold2)) >= fScoreMax) { fScoreMax = fScore; nBlockMax = nHBlock; nHMax = currNHBins; nSMax = currNSBins; nVMax = currNVBins; thresh1Max = threshold1; thresh2Max = threshold2; } // displaying current results qDebug() << nHBlock << currNHBins << currNSBins << currNVBins << threshold1 << threshold2 << fScore; // resetting shot similarities resetAutoCameraLabels(m_series); threshold2--; progress.setValue(++cnt); } threshold1--; } currNHBins /= 2; currNSBins /= 2; currNVBins /= 2; } nHBlock--; } progress.setValue(nIter); // setting optimal parameters nVBlock = nBlockMax; nHBlock = nBlockMax; nHBins = nHMax; nSBins = nSMax; nVBins = nVMax; maxDist = thresh1Max; windowSize = thresh2Max; } m_movieAnalyzer->labelSimilarShots(fName, histoType, nVBins, nHBins, nSBins, metrics, maxDist, windowSize, shotPositions, nVBlock, nHBlock, true); evaluateSimShotDetection(true, maxDist, windowSize); } qreal ProjectModel::evaluateShotDetection(bool displayResults, qreal thresh1, qreal thresh2) const { Segment *season; Segment *episode; Segment *segment; int tp(0); int fp(0); int tn(0); int fn(0); int nShots(0); int nVideoFrames(0); qreal precision; qreal recall; qreal fScore; qreal accuracy; for (int i(0); i < m_series->childCount(); i++) { season = m_series->child(i); for (int j(0); j < season->childCount(); j++) { episode = season->child(j); for (int k(0); k < episode->childCount(); k++) { segment = episode->child(k); // no scene level if (dynamic_cast(segment)) { nVideoFrames += segment->childCount(); nShots++; switch (segment->getSource()) { case Segment::Both: tp++; break; case Segment::Manual: fn++; break; case Segment::Automatic: fp++; break; } } } } } // decrement first frame nVideoFrames--; nShots--; fn--; tn = nVideoFrames - (tp + fp + fn); precision = computePrecision(tp, fp); recall = computeRecall(tp, fn); fScore = computeFScore(recall, precision); accuracy = computeAccuracy(tp, fp, fn, tn); if (displayResults) { ResultsDialog *dialog = new ResultsDialog(thresh1, thresh2, tp, fp, fn, tn, precision, recall, fScore, accuracy); dialog->exec(); } return fScore; } qreal ProjectModel::evaluateSimShotDetection(bool displayResults, qreal thresh1, qreal thresh2) const { int tp(0); int fp(0); int tn(0); int fn(0); qreal precision; qreal recall; qreal fScore; qreal accuracy; QList autCamLabels; QList manCamLabels; retrieveSimCamLabels(m_series, autCamLabels, manCamLabels); for (int i(0); i < autCamLabels.size(); i++) { if (manCamLabels[i] == -1 && autCamLabels[i] != -1) fp++; else if (manCamLabels[i] != -1 && autCamLabels[i] == -1) fn++; else if (manCamLabels[i] == -1 && autCamLabels[i] == -1) tn++; else { QList manIdxSim; QList autIdxSim; // lists of shots similar to current one, as manually and automatically annotated int j; for (j = 0; j < autCamLabels.size(); j++) { if (manCamLabels[i] == manCamLabels[j]) manIdxSim.push_back(j); if (autCamLabels[i] == autCamLabels[j]) autIdxSim.push_back(j); } // size of longest list int maxList = (autCamLabels.size() > manCamLabels.size()) ? autCamLabels.size() : manCamLabels.size(); // test for emptiness of the intersection of the two lists bool found = false; j = 0; while (j < maxList && !found) if (autIdxSim.contains(manIdxSim[j++])) found = true; if (found) tp++; else fp++; } } precision = computePrecision(tp, fp); recall = computeRecall(tp, fn); fScore = computeFScore(recall, precision); accuracy = computeAccuracy(tp, fp, fn, tn); if (displayResults) { ResultsDialog *dialog = new ResultsDialog(thresh1, thresh2, tp, fp, fn, tn, precision, recall, fScore, accuracy); dialog->exec(); } return fScore; } int ProjectModel::retrieveShotPrevPositions(qint64 position, QList &shotPositions) { retrieveShotPositions(m_series, shotPositions); int i(0); while (shotPositions[i] < position) i++; return i; } qreal ProjectModel::computePrecision(int tp, int fp) const { if (tp == 0 && fp == 0) return 0.0; return static_cast(tp) / (tp + fp); } qreal ProjectModel::computeRecall(int tp, int fn) const { if (tp == 0 && fn == 0) return 0.0; return static_cast(tp) / (tp + fn); } qreal ProjectModel::computeFScore(qreal precision, qreal recall) const { if (precision == 0.0 || recall == 0.0) return 0.0; return 2 * precision * recall / (precision + recall); } qreal ProjectModel::computeAccuracy(int tp, int fp, int fn, int tn) const { return (static_cast(tp) + tn) / (tp + fp + fn + tn); } void ProjectModel::resetShotsToManual(Segment *segment, QList &toRemove) { if ((dynamic_cast(segment))) { switch (segment->getSource()) { case Segment::Manual: case Segment::Both: segment->setSource(Segment::Manual); break; case Segment::Automatic: toRemove.push_back(segment); break; } } else for (int i(0); i < segment->childCount(); i++) resetShotsToManual(segment->child(i), toRemove); } void ProjectModel::resetAutoCameraLabels(Segment *segment) { Shot *shot; if ((shot = dynamic_cast(segment))) shot->setCamera(-1, Shot::Automatic); else for (int i(0); i < segment->childCount(); i++) resetAutoCameraLabels(segment->child(i)); } void ProjectModel::eraseSubtitles(Segment *segment) { VideoFrame *vFrame; if ((vFrame = dynamic_cast(segment))) vFrame->setSub(QString()); else for (int i(0); i < segment->childCount(); i++) eraseSubtitles(segment->child(i)); } void ProjectModel::clearSpeaker(Segment *segment, VideoFrame::SpeakerSource source) { VideoFrame *vFrame; if ((vFrame = dynamic_cast(segment))) vFrame->clearSpeaker(source); else for (int i(0); i < segment->childCount(); i++) clearSpeaker(segment->child(i), source); } void ProjectModel::reset() { m_name = ""; } void ProjectModel::retrieveShotUtterances() { QList shotLabels; QList shotPositions; QList longUtt; QList> durShotByUtt; int j(0); // counter qint64 shotStart; qint64 shotEnd; qint64 uttDur; qint64 inter; qint64 minDur(200); retrieveShotLabels(m_series, shotLabels); retrieveShotPositions(m_series, shotPositions); // looping over the shots for (int i(0); i < shotLabels.size() - 1; i++) { if (shotLabels[i] != "") { shotStart = shotPositions[i]; if (i < shotPositions.size() - 1) shotEnd = shotPositions[i+1]; else shotEnd = shotStart; // setting utterance position while (j > 0 && m_subBound[j].first > shotStart) j--; while (j < m_subBound.size() && m_subBound[j].second <= shotStart) j++; // utterance longer than shot: do not process yet if (m_subBound[j].first < shotStart && m_subBound[j].second > shotEnd) { longUtt.push_back(j); continue; } // first utterance is between two shots if (m_subBound[j].first < shotStart && m_subBound[j].second > shotStart) { uttDur = m_subBound[j].second - m_subBound[j].first; // the major part of the utterance is covered by the shot if (m_subBound[j].second - shotStart >= uttDur / 2) { QPair pair(j, uttDur / 1000.0); if (uttDur >= minDur) m_shotUtterances[shotLabels[i]].push_back(pair); } j++; } // utterances entirely contained in the shot while (j < m_subBound.size() && m_subBound[j].second < shotEnd) { uttDur = m_subBound[j].second - m_subBound[j].first; QPair pair(j, uttDur / 1000.0); if (uttDur >= minDur) m_shotUtterances[shotLabels[i]].push_back(pair); j++; } // last utterance is between two shots if (m_subBound[j].first < shotEnd) { uttDur = m_subBound[j].second - m_subBound[j].first; // the major part of the utterance is covered by the shot if (shotEnd - m_subBound[j].first > uttDur / 2) { QPair pair(j, uttDur / 1000.0); if (uttDur >= minDur) m_shotUtterances[shotLabels[i]].push_back(pair); } j++; } } } // processing case of utterances longer than a shot j = 0; for (int i(0); i < longUtt.size(); i++) { QMap shots; // setting shot position while (j > 0 && shotPositions[j] >= m_subBound[longUtt[i]].first) j--; while (j < shotPositions.size() - 1 && shotPositions[j+1] <= m_subBound[longUtt[i]].first) j++; // computing duration of the utterance/shot intersection while (j < shotPositions.size() - 1 && shotPositions[j] < m_subBound[longUtt[i]].second) { // first shot partially included in utterance if (shotPositions[j] < m_subBound[longUtt[i]].first) inter = shotPositions[j+1] - m_subBound[longUtt[i]].first; // shot is a subset of the utterance else if (shotPositions[j+1] <= m_subBound[longUtt[i]].second) inter = shotPositions[j+1] - shotPositions[j]; // last shot partially included in utterance else inter = m_subBound[longUtt[i]].second - shotPositions[j]; if (shotLabels[j] != "") { if (shots.contains(shotLabels[j])) shots[shotLabels[j]] += inter; else shots[shotLabels[j]] = inter; } j++; } // assigning utterance to shot QMap::const_iterator it = shots.begin(); int max(0); QString bestShotLabel; // detetermining best shot label while (it != shots.end()) { qint64 inter = it.value(); if (inter > max) { bestShotLabel = it.key(); max = inter; } it++; } // updating list of utterances assigned to shot QPair pair(longUtt[i], (m_subBound[longUtt[i]].second - m_subBound[longUtt[i]].first) / 1000.0); m_shotUtterances[bestShotLabel].push_back(pair); } } void ProjectModel::retrieveShotPatterns() { QList shotLabels; QList shotPositions; QString patternLabel; QString prevPatternLabel; QString firstLabel; QString secondLabel; QPair pattBounds; // boundaries of pattern in ms QPair strictPattBounds; // normalized boundaries of pattern in ms QList lblWindow; // last four shot labels: used to detect shot pattern QList lblWindow1; // last four shot labels: used to detect shot pattern QList lblWindow2; // last four shot labels: used to detect shot pattern QString label; bool inPattern(false); // indicates if currently visiting a pattern int pattSize(3); // pattern minimum size int j(0); retrieveShotLabels(m_series, shotLabels); retrieveShotPositions(m_series, shotPositions); /**************************/ /* retrieve shot patterns */ /**************************/ // looping over the shots for (int i(0); i < shotLabels.size() - 1; i++) { label = shotLabels[i]; // current shot label // updating shot label window lblWindow.push_back(label); if (lblWindow.size() > pattSize) lblWindow.pop_front(); // new pattern detected if (!inPattern && testShotPattern(lblWindow, pattSize)) { // initializing pattern boundaries pattBounds.first = -1; pattBounds.second = -1; strictPattBounds.first = shotPositions[i-(pattSize-1)]; strictPattBounds.second = shotPositions[i+1]; // setting spoken segment position while (j > 0 && m_subBound[j].first > shotPositions[i-(pattSize-1)]) j--; while (j < m_subBound.size() && m_subBound[j].second < shotPositions[i-(pattSize-1)]) j++; // setting pattern label firstLabel = lblWindow[0]; secondLabel = lblWindow[1]; patternLabel = normalizedPattern(firstLabel, secondLabel); inPattern = true; } // end of pattern if (inPattern && !testShotPattern(lblWindow, pattSize)) { m_shotPattBound[patternLabel].push_back(pattBounds); m_strictShotPattBound[patternLabel].push_back(strictPattBounds); inPattern = false; } // moving into current pattern if (inPattern && testShotPattern(lblWindow, pattSize)) { // adjust normalized pattern boundaries strictPattBounds.second = shotPositions[i+1]; // writing out positions and durations of spoken segments covered by the pattern while (j < m_subBound.size() && m_subBound[j].first < shotPositions[i+1]) { // setting subtitles ref and weights contained in pattern QPair pair(j, (m_subBound[j].second-m_subBound[j].first) / 1000.0); if (m_shotUtterances[firstLabel].contains(pair) || m_shotUtterances[secondLabel].contains(pair)) { m_shotPatterns[patternLabel].push_back(pair); // adjusting pattern boundaries // first time in the pattern if (pattBounds.first == -1 && pattBounds.second == -1) { pattBounds.first = m_subBound[j].first; pattBounds.second = m_subBound[j].second; } // utterance begins before first shot of the pattern if (m_subBound[j].first < pattBounds.first) pattBounds.first = m_subBound[j].first; // utterance ends after last shot of the pattern if (m_subBound[j].second > pattBounds.second) pattBounds.second = m_subBound[j].second; } j++; } // updating flag inPattern = testShotPattern(lblWindow, pattSize); } } // processing last pattern if (inPattern) { m_shotPattBound[patternLabel].push_back(pattBounds); m_strictShotPattBound[patternLabel].push_back(strictPattBounds); inPattern = false; } /*********************************/ /* retrieve marginal expressions */ /* of the shot patterns */ /*********************************/ j = 0; // looping over the shots for (int i(0); i < shotLabels.size() - 2; i++) { label = shotLabels[i]; // current shot label // setting pattern label firstLabel = shotLabels[i]; secondLabel = shotLabels[i+1]; patternLabel = normalizedPattern(firstLabel, secondLabel); // initializing pattern boundaries pattBounds.first = -1; pattBounds.second = -1; strictPattBounds.first = shotPositions[i]; strictPattBounds.second = shotPositions[i+2]; // extending the pattern if (m_shotPatterns.contains(patternLabel)) { // setting spoken segment position while (j > 0 && m_subBound[j].first > shotPositions[i]) j--; while (j < m_subBound.size() && m_subBound[j].second < shotPositions[i]) j++; // writing out positions and durations of spoken segments covered by the pattern while (j < m_subBound.size() && m_subBound[j].first < shotPositions[i+2]) { QPair pair(j, (m_subBound[j].second-m_subBound[j].first) / 1000.0); if (!m_shotPatterns[patternLabel].contains(pair) && (m_shotUtterances[firstLabel].contains(pair) || m_shotUtterances[secondLabel].contains(pair))) { // add current utterance m_shotPatterns[patternLabel].push_back(pair); // adjusting pattern boundaries // first time in the pattern if (pattBounds.first == -1 && pattBounds.second == -1) { pattBounds.first = m_subBound[j].first; pattBounds.second = m_subBound[j].second; } // utterance begins before first shot of the pattern if (m_subBound[j].first < pattBounds.first) pattBounds.first = m_subBound[j].first; // utterance ends after last shot of the pattern if (m_subBound[j].second > pattBounds.second) pattBounds.second = m_subBound[j].second; } j++; } // updating pattern boundaries if (pattBounds.first != -1 || pattBounds.second != -1) { m_shotPattBound[patternLabel].push_back(pattBounds); m_strictShotPattBound[patternLabel].push_back(strictPattBounds); } } } // processing last pattern if (pattBounds.first != -1 || pattBounds.second != -1) { m_shotPattBound[patternLabel].push_back(pattBounds); m_strictShotPattBound[patternLabel].push_back(strictPattBounds); } /******************************/ /* merge interleaved patterns */ /******************************/ /* // for each pattern, retrieve list of patterns with one shot label in common QMap relatedPatterns; // iterators over shot patterns QMap>>::const_iterator it1 = m_shotPatterns.begin(); QMap>>::const_iterator it2; // first loop over pattern labels while (it1 != m_shotPatterns.end()) { firstLabel = it1.key(); it2 = m_shotPatterns.begin(); // second loop over pattern labels while (it2 != m_shotPatterns.end()) { secondLabel = it2.key(); if (firstLabel != secondLabel && interPatterns(firstLabel, secondLabel)) { if (!relatedPatterns.contains(secondLabel) || !relatedPatterns[secondLabel].contains(firstLabel)) relatedPatterns[firstLabel].push_back(secondLabel); } it2++; } it1++; } // iteratively merge shot patterns with one shot label in common // iterator over shot patterns with related patterns QMap::const_iterator it = relatedPatterns.begin(); QString newLabel1; QString newLabel2; QList> newList; // extended names of the labels QString compFirstLabel; QString compSecondLabel; // looping over pattern labels while (it != relatedPatterns.end()) { firstLabel = it.key(); QStringList patterns = it.value(); // looping over related patterns for (int i(0); i < patterns.size(); i++) { // updating possibly enhanced pattern name compFirstLabel = completePatternLabel(firstLabel, m_shotPatterns); // related enhanced pattern name secondLabel = patterns[i]; compSecondLabel = completePatternLabel(patterns[i], m_shotPatterns); // new label obtained by merging the complete first label // and the second label considered by himself newLabel1 = mergePatterns(compFirstLabel, secondLabel, m_shotPatterns[compFirstLabel], m_shotPatterns[secondLabel], newList); // inserting new pattern and possibly removing previous ones m_shotPatterns[newLabel1] = newList; m_shotPattBound[newLabel1] = m_shotPattBound[compFirstLabel]; m_shotPattBound[newLabel1].append(m_shotPattBound[secondLabel]); m_strictShotPattBound[newLabel1] = m_strictShotPattBound[compFirstLabel]; m_strictShotPattBound[newLabel1].append(m_strictShotPattBound[secondLabel]); if (newLabel1 != compFirstLabel) { m_shotPatterns.remove(compFirstLabel); m_shotPattBound.remove(compFirstLabel); m_strictShotPattBound.remove(compFirstLabel); } if (newLabel1 != secondLabel) { m_shotPatterns.remove(secondLabel); m_shotPattBound.remove(secondLabel); m_strictShotPattBound.remove(secondLabel); } // new label obtained by merging the possibly different complete labels if (compFirstLabel != compSecondLabel) { // merging step newLabel2 = mergePatterns(newLabel1, compSecondLabel, m_shotPatterns[newLabel1], m_shotPatterns[compSecondLabel], newList); // inserting new pattern and possibly removing previous ones m_shotPatterns[newLabel2] = newList; m_shotPattBound[newLabel2] = m_shotPattBound[newLabel1]; m_shotPattBound[newLabel2].append(m_shotPattBound[compSecondLabel]); m_strictShotPattBound[newLabel2] = m_strictShotPattBound[newLabel1]; m_strictShotPattBound[newLabel2].append(m_strictShotPattBound[compSecondLabel]); if (newLabel2 != newLabel1) { m_shotPatterns.remove(newLabel1); m_shotPattBound.remove(newLabel1); m_strictShotPattBound.remove(newLabel1); } if (newLabel1 != compSecondLabel) { m_shotPatterns.remove(compSecondLabel); m_shotPattBound.remove(compSecondLabel); m_strictShotPattBound.remove(compSecondLabel); } } } it++; } */ /**********************************/ /* update shot pattern boundaries */ /**********************************/ mergePatternBoundaries(m_shotPattBound); mergePatternBoundaries(m_strictShotPattBound); } void ProjectModel::mergePatternBoundaries(QMap>> &shotPattBound) { QMap>>::const_iterator it = shotPattBound.begin(); QString pattLabel; qint64 start; qint64 end; QPair newPair; bool modif(true); // looping over the patterns while (it != shotPattBound.end()) { modif = true; pattLabel = it.key(); QList> list = it.value(); // try to merge pattern boundaries until there remains no more one while (modif) { modif = false; // list of merged pattern boundaries QList> newList; QVector merged(list.size(), false); // first loop over pattern boundaries for (int i(0); i < list.size(); i++) { if (!merged[i]) { start = list[i].first; end = list[i].second; // second loop over pattern boundaries for (int j(i+1); j < list.size(); j++) { if (!merged[j]) { // second pattern interleaves first one at the beginning if (list[j].first < start && list[j].second >= start) { newPair = QPair(list[j].first, end); newList.push_back(newPair); merged[i] = true; merged[j] = true; modif = true; } // second pattern interleaves first one at the end else if (list[j].first <= end && list[j].second > end) { newPair = QPair(start, list[j].second); newList.push_back(newPair); merged[i] = true; merged[j] = true; modif = true; } // second pattern covers first one else if (list[j].first < start && list[j].second > end) { merged[i] = true; modif = true; } // second pattern is included in first one else if (list[j].first >= start && list[j].second <= end) { merged[j] = true; modif = true; } } } } } // add remaining pattern boundaries to new list for (int i(0); i < list.size(); i++) if (!merged[i]) newList.push_back(list[i]); qSort(newList); list = newList; } shotPattBound[pattLabel] = list; it++; } } bool ProjectModel::interPatterns(QString firstLabel, QString secondLabel) { // remove aditional characters QRegularExpression openPar(QRegularExpression::escape("(")); QRegularExpression closePar(QRegularExpression::escape(")")); firstLabel.replace(openPar, ""); firstLabel.replace(closePar, ""); secondLabel.replace(openPar, ""); secondLabel.replace(closePar, ""); // split parts of each pattern QStringList firstLabels = firstLabel.split("_"); QStringList secondLabels = secondLabel.split("_"); QList labels1; QList labels2; // retrieve list of shots contained in each pattern part labels1.push_back(firstLabels[0].split("|")); labels1.push_back(firstLabels[1].split("|")); labels2.push_back(secondLabels[0].split("|")); labels2.push_back(secondLabels[1].split("|")); // test if patterns share a shot label int i(0); int j(0); int k(0); while (i < 2) { j = 0; while (j < labels1[i].size()) { k = 0; while (k < 2) { if (labels2[k].contains(labels1[i][j])) return true; k++; } j++; } i++; } return false; } QString ProjectModel::normalizedPattern(const QString &firstLabel, const QString &secondLabel) { QString pattern; if (QString::compare(firstLabel, secondLabel) < 0) pattern = firstLabel + "_" + secondLabel; else pattern = secondLabel + "_" + firstLabel; return pattern; } QString ProjectModel::mergePatterns(QString firstLabel, QString secondLabel, QList> list1, QList> list2, QList> &mergedList) { QString newPattLabel; // remove aditional characters QRegularExpression openPar(QRegularExpression::escape("(")); QRegularExpression closePar(QRegularExpression::escape(")")); firstLabel.replace(openPar, ""); firstLabel.replace(closePar, ""); secondLabel.replace(openPar, ""); secondLabel.replace(closePar, ""); // split parts of each pattern QStringList firstLabels = firstLabel.split("_"); QStringList secondLabels = secondLabel.split("_"); QList labels1; QList labels2; // retrieve list of shots contained in each pattern part labels1.push_back(firstLabels[0].split("|")); labels1.push_back(firstLabels[1].split("|")); labels2.push_back(secondLabels[0].split("|")); labels2.push_back(secondLabels[1].split("|")); // retrieving equivalent lists bool found = false; int i(0); int j(0); int k(0); while (i < 2 && !found) { j = 0; while (j < labels1[i].size() && !found) { k = 0; while (k < 2 && !found) { if (labels2[k].contains(labels1[i][j])) found = true; k++; } j++; } i++; } i--; k--; QStringList eqClass1 = appendStringList(labels1[(i+1)%2], labels2[(k+1)%2]); qSort(eqClass1); QStringList eqClass2 = appendStringList(labels1[i], labels2[k]); qSort(eqClass2); // formatting new pattern label QString newLabel1 = eqClass1.join("|"); QString newLabel2 = eqClass2.join("|"); if (eqClass1.size() > 1) newLabel1 = "(" + newLabel1 + ")"; if (eqClass2.size() > 1) newLabel2 = "(" + newLabel2 + ")"; newPattLabel = normalizedPattern(newLabel1, newLabel2); // merging the two lists mergedList = list1; for (int i(0); i < list2.size(); i++) if (!mergedList.contains(list2[i])) mergedList.append(list2[i]); qSort(mergedList); return newPattLabel; } QStringList ProjectModel::appendStringList(const QStringList &list1, const QStringList &list2) { QStringList conc(list1); for (int i(0); i < list2.size(); i++) if (!list1.contains(list2[i])) conc.append(list2[i]); return conc; } QString ProjectModel::completePatternLabel(const QString &label, QMap>> shotPatterns) { QString completeLabel; QString currLabel; QStringList shotLabels(label.split("_")); QStringList currShotLabels; QRegularExpression left(shotLabels[0]); QRegularExpression right(shotLabels[1]); QRegularExpressionMatch matchLeft; QRegularExpressionMatch matchRight; bool found(false); QMap>>::const_iterator it = shotPatterns.begin(); while (it != shotPatterns.end() && !found) { currLabel = it.key(); currShotLabels = currLabel.split("_"); matchLeft = left.match(currShotLabels[0]); matchRight = right.match(currShotLabels[1]); if (matchLeft.hasMatch() && matchRight.hasMatch()) { completeLabel = currLabel; found = true; } matchLeft = left.match(currShotLabels[1]); matchRight = right.match(currShotLabels[0]); if (matchLeft.hasMatch() && matchRight.hasMatch()) { completeLabel = currLabel; found = true; } it++; } return completeLabel; } bool ProjectModel::testShotPattern(const QList &lblWindow, int pattSize) { int i(2); if (!lblWindow.contains("") && lblWindow.size() == pattSize) while (i < pattSize) { if (lblWindow[i] != lblWindow[i-2]) return false; i++; } else return false; return true; } void ProjectModel::retrieveShotLabels(Segment *segment, QList &shotLabels) const { if ((dynamic_cast(segment))) shotLabels.push_back(segment->getLabel()); for (int i(0); i < segment->childCount(); i++) retrieveShotLabels(segment->child(i), shotLabels); } void ProjectModel::retrieveSubPositionsLabels(QList &subStarts, QList &subEnds, QList &subRefLbl) const { QList vFrames; retrieveVFrames(m_series, vFrames); QString currSpeaker(""); QString prevSpeaker(""); QString currSub(""); QString prevSub(""); qint64 currPosition(0); qint64 prevPosition(0); // regular expression to detect subtitles corresponding to noise QRegularExpression noiseSource("\\(.*\\)"); for (int i(0); i < vFrames.size(); i++) { currSpeaker = vFrames[i]->getSpeaker(VideoFrame::Ref); currPosition = vFrames[i]->getPosition(); currSub = vFrames[i]->getSub(); QRegularExpressionMatch match = noiseSource.match(currSub); if (!match.hasMatch()) { // speaker status changed if (currSpeaker != prevSpeaker || currSub != prevSub) { // end of utterance if (currSpeaker == "") { subEnds.push_back(prevPosition); subRefLbl.push_back(prevSpeaker); } // beginning of utterance else if (prevSpeaker == "") subStarts.push_back(currPosition); // ending previous utterance and beginning new one else { subEnds.push_back(prevPosition); subRefLbl.push_back(prevSpeaker); subStarts.push_back(currPosition); } } prevSpeaker = currSpeaker; prevPosition = currPosition; prevSub = currSub; } } // processing last utterance if ending at last video frame // if (!regex_match(currSub.toStdString(), noiseSource)) { if (prevSpeaker != "" || prevSub != "") { subEnds.push_back(prevPosition); subRefLbl.push_back(prevSpeaker); } } /////////////// // accessors // /////////////// QString ProjectModel::getName() const { return m_name; } QString ProjectModel::getBaseName() const { return m_baseName; } QString ProjectModel::getSeriesName() const { return m_series->getName(); } /////////// // slots // /////////// void ProjectModel::setSpkDiar(const QString &epFName) { QString normName; QString baseName; QProcess process; QString program; QStringList arguments; QFileInfo info(epFName); // erasing previous speaker hypotheses clearSpeaker(m_series, VideoFrame::Hyp1); clearSpeaker(m_series, VideoFrame::Hyp2); // normalizing series name normName = m_series->getName(); normName = normName.toLower(); normName.replace(QRegularExpression("\\s+"), "_"); // audio files possibly needed baseName = normName + "_" + info.baseName(); m_baseName = baseName; // generating reference file QString workPath("spkDiarization/"); QString locRefFName(workPath + "data/ref/local/" + m_baseName); QString segRefFName(workPath + "data/ref/seg/" + m_baseName); exportLocSpkRef(locRefFName); // converting reference .lbl files into .rttm ones program = "perl"; arguments << workPath + "scripts/SpkMoulinette.pl" << locRefFName + ".lbl" << locRefFName + ".rttm"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); arguments << workPath + "scripts/SpkMoulinette.pl" << segRefFName + ".lbl" << segRefFName + ".rttm"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); // removing previous local score QFile::remove(workPath + "score/local/" + m_baseName + ".nistres"); } void ProjectModel::extractIVectors(bool ubm, const QString &epFName) { QString workPath("spkDiarization/"); QProcess process; QString program; QString args; QStringList arguments; QFileInfo info(epFName); int out; QString waveFile; QString sphFile; QString lstLine; QString sphSubFile; qreal startSec, endSec; QMap> spkIdx; waveFile = workPath + "data/sph/" + m_baseName + ".wav"; sphFile = workPath + "data/sph/" + m_baseName + ".sph"; qDebug() << "Extracting speech segments..."; // creating audio files corresponding to speech segments QFile lstFile(workPath + "data/data.lst"); QFile totVarFile(workPath + "ndx/totalvariability.ndx"); QFile ivExtFile(workPath + "ndx/ivExtractor.ndx"); if (!lstFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; if (!totVarFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; if (!ivExtFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream lstOut(&lstFile); QTextStream totVarOut(&totVarFile); QTextStream ivExtOut(&ivExtFile); for (int i(0); i < m_subBound.size(); i++) { if (m_subRefLbl[i] != "S") spkIdx[m_subRefLbl[i]].push_back(i); lstLine = m_baseName + "_" + QString::number(m_subBound[i].first / 10) + "_" + QString::number(m_subBound[i].second / 10); startSec = m_subBound[i].first / 1000.0; endSec = m_subBound[i].second / 1000.0; lstOut << lstLine << endl; totVarOut << lstLine << endl; ivExtOut << lstLine << " " << lstLine << endl; sphSubFile = workPath + "data/sph/" + lstLine + ".sph"; if (!QFile::exists(sphSubFile)) { // creating main audio file if needed if (!QFile::exists(sphFile)) { qDebug() << "Extracting .wav file..."; program = "avconv"; args = " -i " + epFName + " -map 0:1 -vn -acodec pcm_s16le -ar 16000 -ac 2 " + waveFile; out = std::system(qPrintable(program + args)); qDebug() << "Done."; qDebug() << "Converting .wav file to .sph..."; program = "sox"; arguments << waveFile << sphFile; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; QFile::remove(waveFile); } program = "sox"; arguments << sphFile << sphSubFile << "trim" << QString::number(startSec) << "=" + QString::number(endSec); process.start(program, arguments); process.waitForFinished(); arguments.clear(); } } qDebug() << "Done."; qDebug() << "Parameterizing speech segments..."; // parameterizing speech segments program = "sh"; arguments << workPath + "01_RUN_feature_extraction.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Normalizing parameters..."; // normalizing parameters program = "sh"; arguments << workPath + "02a_RUN_spro_front-end.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Estimating total variability matrix..."; // estimating total variability matrix program = "sh"; arguments << workPath + "04_RUN_tv_estimation.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Extracting i-vector / speech segment..."; // extracting i-vectors program = "sh"; arguments << workPath + "05_RUN_i-vector_extraction.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Gathering i-vectors into X matrix..."; // generating X matrix program = "sh"; arguments << workPath + "06_RUN_X_mat_generate.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; // initializing X matrix (one i-vec by utterance) mat X; X.load(QString("spkDiarization/iv/X.dat").toStdString(), raw_ascii); // generating covariance matrices mat Sigma = cov(X); m_W = genWMat(spkIdx, X); emit setDiarData(X, Sigma, m_W); qDebug() << "Cleaning directories..."; // cleaning directories program = "sh"; arguments << workPath + "00_RUN_clean_directories.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; } void ProjectModel::extractSpkIVectors(const QString &epFName, bool refSpk) { QString workPath = "spkDiarization/"; QFile lblFile; QProcess process; QString program; QStringList arguments; QString args; int out; QMap>> speakers; qreal start, end; QString speakerLabel; QString sphFile; QString waveFile; QString sphSubFile; waveFile = workPath + "data/sph/" + m_baseName + ".wav"; sphFile = workPath + "data/sph/" + m_baseName + ".sph"; // initializing list of local speakers as hypothesized if (refSpk) lblFile.setFileName(workPath + "data/ref/seg/" + m_baseName + ".lbl"); else lblFile.setFileName(workPath + "lblLocalSegmentation/" + m_baseName + ".lbl"); if (!lblFile.open(QIODevice::ReadOnly | QIODevice::Text)) return; QTextStream in(&lblFile); QMap nSpeakers; // regular expression to retrieve pattern name QRegularExpression re("(.+\\d+\\)?)_.+"); QString pattLabel; // parsing hypotheses file while (!in.atEnd()) { QString line = in.readLine(); QStringList data = line.split(" "); start = data[0].toDouble(); end = data[1].toDouble(); speakerLabel = data[2]; QRegularExpressionMatch match = re.match(speakerLabel); if (match.hasMatch()) pattLabel = match.captured(1); if (!speakers.contains(speakerLabel)) nSpeakers[pattLabel]++; // if (nSpeakers[pattLabel] <= 2 && nSpeakers.size() <= 4) speakers[speakerLabel].push_back(QPair(start, end)); } /* QMap::const_iterator it1 = nSpeakers.begin(); while (it1 != nSpeakers.end()) { qDebug() << it1.key() << it1.value(); it1++; } */ // generating audio files corresponding to speech segments sphFile = workPath + "data/sph/" + m_baseName + ".sph"; program = "sox"; QMap>>::const_iterator it = speakers.begin(); while (it != speakers.end()) { speakerLabel = it.key(); // qDebug() << speakerLabel; QList> boundaries = it.value(); qSort(boundaries); speakers[speakerLabel] = boundaries; for (int i(0); i < boundaries.size(); i++) { sphSubFile = workPath + "data/sph/" + m_baseName + "_" + QString::number(boundaries[i].first * 100) + "_" + QString::number(boundaries[i].second * 100) + ".sph"; if (!QFile::exists(sphSubFile)) { // creating main audio file if needed if (!QFile::exists(sphFile)) { qDebug() << "Extracting .wav file..."; program = "avconv"; args = " -i " + epFName + " -map 0:1 -vn -acodec pcm_s16le -ar 16000 -ac 2 " + waveFile; out = std::system(qPrintable(program + args)); qDebug() << "Done."; qDebug() << "Converting .wav file to .sph..."; program = "sox"; arguments << waveFile << sphFile; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; QFile::remove(waveFile); } arguments << sphFile << sphSubFile << "trim" << QString::number(boundaries[i].first) << "=" + QString::number(boundaries[i].second); process.start(program, arguments); process.waitForFinished(); arguments.clear(); } } it++; } qDebug() << "Generating audio files corresponding to local speakers..."; // generating audio files corresponding to hyhpothesized speakers QFile lstFile(workPath + "data/data.lst"); QFile totVarFile(workPath + "ndx/totalvariability.ndx"); QFile ivExtFile(workPath + "ndx/ivExtractor.ndx"); if (!lstFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; if (!totVarFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; if (!ivExtFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream lstOut(&lstFile); QTextStream totVarOut(&totVarFile); QTextStream ivExtOut(&ivExtFile); it = speakers.begin(); while (it != speakers.end()) { speakerLabel = it.key(); QList> boundaries = it.value(); for (int i(0); i < boundaries.size(); i++) { sphSubFile = workPath + "data/sph/" + m_baseName + "_" + QString::number(boundaries[i].first * 100) + "_" + QString::number(boundaries[i].second * 100) + ".sph"; arguments << sphSubFile; } arguments << workPath + "data/sph/" + m_baseName + "_" + speakerLabel + ".sph"; lstOut << m_baseName + "_" + speakerLabel << endl; totVarOut << m_baseName + "_" + speakerLabel << endl; ivExtOut << m_baseName + "_" + speakerLabel << " " << m_baseName + "_" + speakerLabel << endl; process.start(program, arguments); process.waitForFinished(); arguments.clear(); it++; } qDebug() << "Done."; // deleting sph file QFile::remove(sphFile); qDebug() << "Parameterizing speakers..."; // parameterizing speaker models program = "sh"; arguments << workPath + "01_RUN_feature_extraction.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Normalizing parameters..."; // normalizing parameters program = "sh"; arguments << workPath + "02a_RUN_spro_front-end.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Estimating total variability matrix..."; // estimating total variability matrix program = "sh"; arguments << workPath + "04_RUN_tv_estimation.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Extracting i-vector / speaker..."; // extracting i-vectors program = "sh"; arguments << workPath + "05_RUN_i-vector_extraction.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; qDebug() << "Gathering i-vectors into X matrix..."; // generating X matrix program = "sh"; arguments << workPath + "06_RUN_X_mat_generate.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; // initializing X matrix (one i-vec by utterance) mat X; X.load(QString("spkDiarization/iv/X.dat").toStdString(), raw_ascii); // covariance matrices mat Sigma = cov(X); emit setDiarData(X, Sigma, m_W, speakers); qDebug() << "Cleaning directories..."; // cleaning directories program = "sh"; arguments << workPath + "00_RUN_clean_directories.sh"; process.start(program, arguments); process.waitForFinished(); arguments.clear(); qDebug() << "Done."; } void ProjectModel::setResolution(const QSize &resolution) { m_episode->setResolution(resolution); } void ProjectModel::setFps(qreal fps) { m_episode->setFps(fps); } void ProjectModel::appendVideoFrame(int id, qint64 position) { VideoFrame *vFrame = new VideoFrame(id, position, m_episode); m_episode->appendChild(vFrame); } void ProjectModel::initShotLevel(Segment *segment) { Segment *firstVideoFrame = getFirstVideoFrame(segment); insertShot(firstVideoFrame, Segment::Manual); } void ProjectModel::insertShot(qint64 position, Segment::Source source) { int i(-1); // retrieve video frame from the position specified Segment *segment = m_series; while (!dynamic_cast(segment)) { // closest segment index to current position i = segment->childIndexFromPosition(position); // select possible children segment = segment->child(i); } if (!shotLevelCreated(segment)) { Segment *firstVideoFrame = getFirstVideoFrame(segment); insertShot(firstVideoFrame, source); } insertShot(segment, source); } void ProjectModel::insertShot(Segment *segment, Segment::Source source) { // no shot already inserted if (!shotLevelCreated(segment)) { // retrieve episode level Segment *grandParent = segment->parent(); // creating new shot Segment *newParent = new Shot(segment->getPosition(), Shot::Cut, grandParent, source); // updating list of frames QList children = grandParent->getChildren(); int size = children.size(); for (int i = 0; i < size; i++) children[i]->setParent(newParent); // assigning to created shot the list of frames newParent->setChildren(children); // assigning shot to episode grandParent->clearChildren(); grandParent->appendChild(newParent); emit modelChanged(); } // at least one shot already inserted else { Segment *prevParent = segment->parent(); Segment *grandParent = prevParent->parent(); // no shot already added at this position if (segment->getPosition() != prevParent->getPosition()) { Segment *newParent = new Shot(segment->getPosition(), Shot::Cut, grandParent, source); QList subList1; QList subList2; prevParent->splitChildren(subList1, subList2, segment->row(), newParent); prevParent->clearChildren(); prevParent->setChildren(subList1); newParent->setChildren(subList2); grandParent->insertChild(prevParent->row() + 1, newParent); if (source == Segment::Manual) emit positionChanged(segment->getPosition()); emit modelChanged(); } // shot already added at this position else if (prevParent->getSource() != source) { prevParent->setSource(Segment::Both); if (source == Segment::Manual) emit positionChanged(segment->getPosition()); emit modelChanged(); } } } void ProjectModel::removeShot(Segment *segment, Segment::Source source) { Segment *parent = segment->parent(); // row of current shot int row = segment->row(); // shot to delete must not be the first one if (row != 0) { Segment *prevSegment = segment->parent()->child(row-1); QList prevSegmentChildren = prevSegment->getChildren(); QList segmentChildren = segment->getChildren(); prevSegmentChildren.append(segmentChildren); // setting parent of merged lists elements int size = prevSegmentChildren.size(); for (int i = 0; i < size; i++) prevSegmentChildren[i]->setParent(prevSegment); prevSegment->clearChildren(); prevSegment->setChildren(prevSegmentChildren); parent->removeChild(row); if (source == Segment::Manual) emit positionChanged(prevSegment->getPosition()); emit modelChanged(); } } void ProjectModel::labelSimShot(qint64 position, int nCamera, Segment::Source source) { int i(-1); // retrieve shot from the position specified Segment *segment = m_series; Shot *shot; while (!(shot = dynamic_cast(segment))) { // closest segment index to current position i = segment->childIndexFromPosition(position); // select possible children segment = segment->child(i); } shot->setCamera(nCamera, source); } void ProjectModel::processSegmentation(bool checked, bool annot) { if (checked || annot) { depthFirstToShots(m_series); emit segmentationRetrieved(); depthFirstToSpokenFrames(m_series, VideoFrame::Ref); emit segmentationRetrieved(); depthFirstToSpokenFrames(m_series, VideoFrame::Hyp1); emit segmentationRetrieved(); } emit viewSegmentation(checked, annot); } void ProjectModel::retrieveSpeakers(bool checked) { if (checked) { QList speakers; retrieveSpeakersList(m_series, speakers, VideoFrame::Ref); emit speakersRetrieved(speakers); } } void ProjectModel::setSpeaker(qint64 start, qint64 end, const QString &speaker, VideoFrame::SpeakerSource source) { for (int position(start); position <= end; position += 40) { // retrieve video frame from the current position Segment *segment = m_series; VideoFrame *frame; while (!(frame = dynamic_cast(segment))) { // closest segment index to current position int i = segment->childIndexFromPosition(position); // select possible children segment = segment->child(i); } frame->setSpeaker(speaker, source); } } void ProjectModel::resetSpeaker(qint64 prevStart, qint64 prevEnd, qint64 start, qint64 end, bool resetSub, VideoFrame::SpeakerSource source) { QString speaker; QString sub; for (int position(prevStart); position <= prevEnd; position += 40) { // retrieve video frame from the current position Segment *segment = m_series; VideoFrame *frame; while (!(frame = dynamic_cast(segment))) { // closest segment index to current position int i = segment->childIndexFromPosition(position); // select possible children segment = segment->child(i); } speaker = frame->getSpeaker(source); sub = frame->getSub(); frame->setSpeaker("", source); if (resetSub) frame->setSub(""); } for (int position(start); position <= end; position += 40) { // retrieve video frame from the current position Segment *segment = m_series; VideoFrame *frame; while (!(frame = dynamic_cast(segment))) { // closest segment index to current position int i = segment->childIndexFromPosition(position); // select possible children segment = segment->child(i); } frame->setSpeaker(speaker, source); if (resetSub) frame->setSub(sub); } } void ProjectModel::exportSubtitles(const QString &fName) { QFile subFile(fName + ".csv"); if (!subFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream subOut(&subFile); QList spkVFrames; retrieveSpokenVFrames(m_series, spkVFrames, VideoFrame::Ref); qint64 start(spkVFrames[0]->getPosition()); qint64 position(start); QString sub(spkVFrames[0]->getSub()); for (int i(1); i < spkVFrames.size(); i++) { if (spkVFrames[i]->getSub() != sub) { subOut << start << "\t" << position << "\t" << sub << "\n"; start = spkVFrames[i]->getPosition(); } sub = spkVFrames[i]->getSub(); position = spkVFrames[i]->getPosition(); } subOut << start << "\t" << position << "\t" << sub << "\n"; } void ProjectModel::exportGlobSpkRef(const QString &fName) { QFile lblFile(fName + ".lbl"); QString label; bool labeled(false); qreal start; qreal end; qreal globStart(m_subBound[m_subBound.size()-1].second / 1000.0); qreal globEnd(m_subBound[0].first / 1000.0); if (!lblFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream lblOut(&lblFile); // test if utterances have been labelled int i(0); qDebug() << m_subRefLbl.size(); while (i < m_subRefLbl.size() && !labeled) { labeled = (m_subRefLbl[i] != "S"); i++; } // export .lbl file for (int i(0); i < m_subRefLbl.size(); i++) { label = m_subRefLbl[i]; label.replace(QRegularExpression(" "), "_"); start = m_subBound[i].first / 1000.0; end = m_subBound[i].second / 1000.0; if (labeled) { lblOut << start << " " << end << " " << label << "\n"; // updating global speech boundaries if (start < globStart) globStart = start; if (end > globEnd) globEnd = end; } else { lblOut << start << " " << end << " " << "speech" << "\n"; // updating global speech boundaries if (start < globStart) globStart = start; if (end > globEnd) globEnd = end; } } // export .uem file QFile uemFile(fName + ".uem"); QRegularExpression re(".*/(.+_.+)"); QRegularExpressionMatch match = re.match(fName); QString baseName; if (match.hasMatch()) baseName = match.captured(1); if (!uemFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream uemOut(&uemFile); uemOut << baseName << " " << 1 << " " << globStart << " " << globEnd << "\n"; } void ProjectModel::exportLocSpkRef(const QString &fName) { QMap>>::const_iterator it(m_shotPatterns.begin()); QList> currSubs; int uttIdx; qreal start; qreal end; QString pattLabel; QString spkLabel; bool labeled(false); qreal globStart(m_subBound[m_subBound.size()-1].second / 1000.0); qreal globEnd(m_subBound[0].first / 1000.0); QString locFName = fName; QString segFName = fName; segFName.replace(QRegularExpression("local"), "seg"); QFile locFile(locFName + ".lbl"); QFile segFile(segFName + ".lbl"); if (!locFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; if (!segFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream locOut(&locFile); QTextStream segOut(&segFile); // test if utterances have been labelled int i(0); while (i < m_subRefLbl.size() && !labeled) { labeled = (m_subRefLbl[i] != "S"); i++; } // export .lbl file while (it != m_shotPatterns.end()) { pattLabel = it.key(); currSubs = it.value(); for (int i(0); i < currSubs.size(); i++) { // index of current utterance uttIdx = currSubs[i].first; // adjusting utterance boundaries to shot pattern ones QPair pair = adjustSubBoundaries(m_subBound[uttIdx].first, m_subBound[uttIdx].second, m_strictShotPattBound[pattLabel]); // setting utterance boundaries start = static_cast(pair.first) / 1000.0; end = static_cast(pair.second) / 1000.0; // speaker label spkLabel = m_subRefLbl[uttIdx]; spkLabel.replace(QRegularExpression(" "), "_"); // write out data if (labeled) { segOut << start << " " << end << " " << (pattLabel + "_" + spkLabel ) << "\n"; locOut << start << " " << end << " " << spkLabel << "\n"; // updating global speech boundaries if (start < globStart) globStart = start; if (end > globEnd) globEnd = end; } } it++; } // export .uem file QFile uemFile(fName + ".uem"); QRegularExpression re(".*/(.+_.+)"); QRegularExpressionMatch match = re.match(fName); QString baseName; if (match.hasMatch()) baseName = match.captured(1); if (!uemFile.open(QIODevice::WriteOnly | QIODevice::Text)) return; QTextStream uemOut(&uemFile); uemOut << baseName << " " << 1 << " " << globStart << " " << globEnd << "\n"; } QPair ProjectModel::adjustSubBoundaries(qint64 subStart, qint64 subEnd, QList> shotPattBound) { bool found(false); int i(0); while (i < shotPattBound.size() && !found) { // truncate utterance at the beginning if (subStart < shotPattBound[i].first && subEnd > shotPattBound[i].first) { subStart = shotPattBound[i].first; found = true; } // utterance included in shot else if (subStart >= shotPattBound[i].first && subEnd <= shotPattBound[i].second) found = true; // truncate utterance at the end else if (subStart < shotPattBound[i].second && subEnd > shotPattBound[i].second) { subEnd = shotPattBound[i].second; found = true; } i++; } return QPair(subStart, subEnd); } void ProjectModel::improveSpkDiar() { QMap speakers; QMap> speakerList; QMap>::const_iterator shotIt; int totSegment; nFramesByCamLabel(m_series, speakerList, Segment::Automatic, VideoFrame::Hyp2); shotIt = speakerList.constBegin(); QMap::const_iterator spkIt; while (shotIt != speakerList.constEnd()) { QString label = shotIt.key(); speakers = shotIt.value(); totSegment = 0; qDebug() << label; spkIt = speakers.constBegin(); while (spkIt != speakers.constEnd()) { totSegment += spkIt.value(); spkIt++; } spkIt = speakers.constBegin(); while (spkIt != speakers.constEnd()) { qDebug() << spkIt.key() << spkIt.value() / static_cast(totSegment) * 100; spkIt++; } qDebug() << ""; shotIt++; } } void ProjectModel::retrieveShotSub(qint64 position) { QMap>>::const_iterator it = m_shotPattBound.begin(); QList> bounds; QList> subFeatures; QList> subFeaturesShot1; QList> subFeaturesShot2; QString pattLabel; QStringList shotLabels; QStringList labels1; QStringList labels2; bool found(false); // retrieving current shot pattern while (!found && it != m_shotPattBound.end()) { bounds = it.value(); int i(0); while (!found && i < bounds.size()) { if (position >= bounds[i].first && position <= bounds[i].second) { pattLabel = it.key(); subFeatures = m_shotPatterns[pattLabel]; found = true; } i++; } it++; } // remove aditional characters from pattern label QRegularExpression openPar(QRegularExpression::escape("(")); QRegularExpression closePar(QRegularExpression::escape(")")); pattLabel.replace(openPar, ""); pattLabel.replace(closePar, ""); // split the pattern into its two components shotLabels = pattLabel.split("_"); if (shotLabels.size() == 2) { labels1 = shotLabels[0].split("|"); labels2 = shotLabels[1].split("|"); } // retrieving utterances of each pattern shot for (int i(0); i < subFeatures.size(); i++) { // loop over first series of equivalent shots for (int j(0); j < labels1.size(); j++) if (m_shotUtterances[labels1[j]].contains(subFeatures[i])) subFeaturesShot1.push_back(subFeatures[i]); // loop over second series of equivalent shots for (int j(0); j < labels2.size(); j++) if (m_shotUtterances[labels2[j]].contains(subFeatures[i])) subFeaturesShot2.push_back(subFeatures[i]); } emit getCurrentPattern(subFeatures); emit getPatternFirstShot(subFeaturesShot1); emit getPatternSecondShot(subFeaturesShot2); } void ProjectModel::currentSub(qint64 position) { bool found(false); int iSup(m_subBound.size()-1); int iInf(0); int iMed(-1); while (!found && iSup >= iInf) { iMed = (iSup + iInf) / 2; if (position >= m_subBound[iMed].first && position <= m_subBound[iMed].second) found = true; else if (position < m_subBound[iMed].first) iSup = iMed - 1; else if (position > m_subBound[iMed].second) iInf = iMed + 1; } if (!found) iMed = -1; emit currentSubtitle(iMed); } void ProjectModel::playSubtitle(QList utter) { QList> utterances; for (int i(0); i < utter.size(); i++) utterances.push_back(QPair(m_subBound[utter[i]].first, m_subBound[utter[i]].second)); emit playSegments(utterances); } void ProjectModel::playSeg(QList> segments) { emit playSegments(segments); } /////////////////////////////////////// // test if shot level already exists // /////////////////////////////////////// bool ProjectModel::shotLevelCreated(Segment *segment) { Shot *shot = 0; // searching among parent segments while (segment != 0 && !(shot = dynamic_cast(segment))) segment = segment->parent(); if (segment == 0) return false; return true; } ///////////////////////// // returns first frame // ///////////////////////// Segment * ProjectModel::getFirstVideoFrame(Segment *segment) { VideoFrame *frame = 0; // segment is already a frame if ((frame = dynamic_cast(segment))) return frame->parent()->child(0); // segment is not a frame: retrieving first frame among children else while (!(frame = dynamic_cast(segment))) segment = segment->child(0); return frame; } void ProjectModel::depthFirstToShots(Segment *segment) const { if ((dynamic_cast(segment))) emit getShot(segment); for (int i(0); i < segment->childCount(); i++) depthFirstToShots(segment->child(i)); } void ProjectModel::depthFirstToSpokenFrames(Segment *segment, VideoFrame::SpeakerSource source) const { VideoFrame *vFrame; QString speaker; if ((vFrame = dynamic_cast(segment))) emit getSpokenFrame(vFrame->getPosition(), vFrame->getSub(), vFrame->getSpeaker(source)); for (int i(0); i < segment->childCount(); i++) depthFirstToSpokenFrames(segment->child(i), source); } void ProjectModel::retrieveShotPositions(Segment *segment, QList &shotPositions) const { if ((dynamic_cast(segment))) shotPositions.push_back(segment->getPosition()); for (int i(0); i < segment->childCount(); i++) retrieveShotPositions(segment->child(i), shotPositions); } void ProjectModel::retrieveSimCamLabels(Segment *segment, QList &autCamLabels, QList &manCamLabels) const { if ((dynamic_cast(segment))) { autCamLabels.push_back(segment->getCamera(Segment::Automatic)); manCamLabels.push_back(segment->getCamera(Segment::Manual)); } for (int i(0); i < segment->childCount(); i++) retrieveSimCamLabels(segment->child(i), autCamLabels, manCamLabels); } void ProjectModel::nFramesByCamLabel(Segment *segment, QMap> &speakerList, Segment::Source vSource, VideoFrame::SpeakerSource sSource) const { Shot *shot; QString label; QMap shotSpeakers; QMap currSpeakers; QMap::const_iterator it; if ((shot = dynamic_cast(segment))) { label = "C" + QString::number(shot->getCamera(vSource)); if (label != "C-1" && !(shotSpeakers = shot->getSpeakerList(sSource)).isEmpty()) { currSpeakers = speakerList.value(label); it = shotSpeakers.constBegin(); while (it != shotSpeakers.constEnd()) { currSpeakers.insert(it.key(), currSpeakers.value(it.key()) + it.value()); it++; } speakerList.insert(label, currSpeakers); } } for (int i(0); i < segment->childCount(); i++) nFramesByCamLabel(segment->child(i), speakerList, vSource, sSource); } void ProjectModel::retrieveSpokenVFrames(Segment *segment, QList &spkVFrames, VideoFrame::SpeakerSource source) const { VideoFrame *frame; if ((frame = dynamic_cast(segment)) && !frame->getSpeaker(source).isEmpty() && !frame->getSub().isEmpty()) spkVFrames.push_back(frame); for (int i(0); i < segment->childCount(); i++) retrieveSpokenVFrames(segment->child(i), spkVFrames, source); } void ProjectModel::retrieveVFrames(Segment *segment, QList &vFrames) const { VideoFrame *frame; if ((frame = dynamic_cast(segment))) vFrames.push_back(frame); for (int i(0); i < segment->childCount(); i++) retrieveVFrames(segment->child(i), vFrames); } void ProjectModel::retrieveSpeakersList(Segment *segment, QList &speakers, VideoFrame::SpeakerSource source) const { VideoFrame *frame; if ((frame = dynamic_cast(segment))) speakers.push_back(frame->getSpeaker(source)); for (int i(0); i < segment->childCount(); i++) retrieveSpeakersList(segment->child(i), speakers, source); } arma::mat ProjectModel::genWMat(QMap> spkIdx, const arma::mat &X) { // computing W mat W; if (spkIdx.size() > 0) { int nUtter(m_subBound.size()); umat spkRows = zeros(spkIdx.size(), nUtter); QMap>::const_iterator it = spkIdx.begin(); int i(0); while (it != spkIdx.end()) { QList indices = it.value(); for (int j(0); j < indices.size(); j++) spkRows(i, indices[j]) = 1; it++; i++; } int m(X.n_rows); int n(X.n_cols); int nSpk(spkRows.n_rows); W.zeros(n, n); // looping over the speakers for (int i(0); i < nSpk; i++) { // index of current speaker utterances in X matrix umat idx = find(spkRows.row(i)); // number of current speaker utterances int nUtt(idx.n_rows); // submatrix of vectorized speaker utterances mat S = X.rows(idx); // speaker covariance matrix mat C = zeros(n, n); // mean vector of speaker utterance vectors mat mu = mean(S); // looping over speaker utterances for (int j(0); j < nUtt; j++) { mat dev = S.row(j) - mu; C += dev.t() * dev; } // updating W W += C; } // normalizing W W /= m; } else W = cov(X); return W; }