stm2txt.pl 696 Bytes
#!/usr/bin/perl -w

use strict;
use warnings;


if (@ARGV < 1)
{
	die "<input : stm file> <option : remove tags (1 if activated>\n";
}

my $RemoveTags=0;

open STM, $ARGV[0];

if (@ARGV > 1 && $ARGV[1] == "1")
{
	$RemoveTags=1;
}

foreach (<STM>)
{
	if (!($_=~/ignore_time_segment_in_scoring/))
	{
		if ($_=~/(.*?) .*? .*? (.*?) (.*?) \<.*?\> (.*?)( \(.*?\))*$/)
		{
			my $utterance_id=$1;
			my $start=$2*100;
			my $end=$3*100;
			my $sentence=$4;

			if ($RemoveTags)
			{
				$sentence=~s/\{.*?\}//g;
				$sentence=~s/\<.*?\>//g;

			}

			$sentence=~s/ +/ /g;
			$sentence=~s/^ //;

			$utterance_id=~s/_/-/g;
			
			printf "%s-%s-%s %s\n", $utterance_id, $start, $end, $sentence;
		}
	}
}