Blame view

tools/sctk-2.4.10/bin/hubscr.pl 27.4 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
  #!/usr/bin/perl -w
  
  use strict;
  
  ### Revision History
  # Version 0.1, Release Sep 19, 1997
  #    - initial release
  # Version 0.2, Release Oct 29, 1997
  #    - added support for sc_stats
  #    - modified the csrfilt call for German and Spanish to is use the -e option
  #      which tells it to upcase extended ASCII as well as 7-bit ASCII.
  # Version 0.3, 
  #    - Modified the filter proceedure to ALWAYS tell the user if it skipped the 
  #      filtering stage.
  # Version 0.4, Release April 6, 1998
  #    - added access to the RESULTS Server
  #    - added -M and -w options
  # Version 0.5, Released March 5, 2000
  #    - Modifed to require updated tranfilt package
  # Version 0.6, Not released
  #    - Modified to accept Extended CTMs for the RT evaluation series, selected
  #      via the -C option.
  #    - Added a new hub scoring type rt-stt
  #    - Removed access to the RESULTS server
  #    - Changed local variables to my variables
  # Version 0.7
  #    - Added sort command to sort ctm file
  #    - Added prf formatted alignment output (4/28/03 audrey)
  #    - Changed the rt-stt conversion to remove confidence scores with 'NA' values
  #    - Added -d to disable definite article changes for Arabic
  # Version 0.8
  #    - Added specialized sort for CTM data to make it run faster
  # Version 0.9
  #    - added -H to enable hamza normalization
  # Version 0.10
  #    - added -T to enable tanween filtering
  # Version 0.11
  #    - added -o to use overlapscoring
  # Version 0.12
  #    - added -a to use asclite
  # Version 0.13
  #    - added -m to use custom memory limit (JA)
  # Version 0.14 Apr 21, 2006
  #    - added -f option to use rttm file as hyp file (JA)
  # Version 0.15 Jan, 2007
  #    - added -F, -u , -M, hub of type sastt
  #    - Calls to md-eval
  # Version 0.16 March 12, 2007
  #    - Turned on Pruning for ASCLITE Runs.  
  # Version 0.17 March 13, 2007
  #    - Renamed -M SLM to -K SLM...  (no one ever uses it!)
  #    - Added the difficulty tag for asclite
  # Version 0.18 April 30, 2007
  #    - Added the forced compression for asclite (JA)
  #    - Added the block size for asclite (JA)
  # Version 0.19 June 4, 2007
  #    - Added check of speaker auto-overlap in asclite options (JA)
  # Version 0.20 April 30, 2009
  #    - Added the automatic validation of inputs step AND the ability skipp validation
  # 
  
  my $Version = "0.20"; 
  my $Usage="hubscr.pl [ -p PATH -H -T -d -R -v -L LEX ] [ -M LM | -w WWL ] [ -o numSpkr ] [ -m GB_Max_Memory[:GB_Max_Difficulty] ] [ -f FORMAT ] [ -a -C -B blocksize ] -g glm -l LANGOPT -h HUBOPT -r ref hyp1 hyp2 ...
  ".
  "Version: $Version
  ".
  "Desc: Score a Hub-4E/NE or Hub-5E/NE evaluation using the established
  ".
  "      guidelines.  There are a set of language dependent options that this
  ".
  "      script requires, they are listed below with their dependencies.
  ".
  "      If more than one hyp is present, the set of hyps are viewed as an
  ".
  "      'ensemble' of result that can be statistically compared with sc_stats.
  ".
  "      The output reports are written with a root filename specified by '-n'
  ".
  "      and optionally described with the '-e' flag.
  ".
  "General Options:
  ".
  "      -d         ->  Do not split the definite article from Arabic words
  ".
  "      -g glm     ->  'glm' specifies the filename of the Global Mapping Rules
  ".
  "      -v         ->  Verbosely tell the user what is being executed
  ". 
  "      -h [ hub4 | hub5 | rt-stt | sastt ]
  ".
  "                 ->  Use scoring rules for the task: 
  ".
  "                     hub4 or hub5 -> no special rules
  ".
  "                     rt-stt       -> removes non-lexical items from systems CTM input
  ".
  "                     sastt        -> performs SASTT scoring.  System/reference inputs
  ".
  "                                     must both be RTTMs. ASCLITE must be used for alignments.
  ".
  "      -K SLM_lm  ->  Use the CMU-Cambridge SLM V2.0 binary language model 'LM'
  ".
  "                     to perform Weighted-Word Scoring.  May not be used with -w
  ".
  "      -l [ arabic | english | german | mandarin | spanish ]
  ".
  "                 ->  Set the input language.
  ".
  "      -L LDC_Lex ->  Filename of an LDC Lexicon.  The option is required only to
  ".
  "                     score a German test.  Previous version for Arabic req'd this option.
  ".
  "      -w WWL     ->  Use the Word-Weight List File to perform Weighted-Word
  ".
  "                     scoring.  May not be used with -M
  ".
  "      -H         ->  Perform hamza normalization for Arabic data. 
  ".
  "      -T         ->  Perform tanween filteing (i.e., removal) for Arabic data. 
  ".
  "      -V         ->  Skip validation of the input transcripts.  Defauled is to validate input transcripts. 
  ".
  "Other Options:
  ".
  "      -n str     ->  Root filename to write the ensemble reports to.  Default
  ".
  "                     is 'Ensemble'
  ".
  "      -e 'desc'  ->  Use the description 'desc' as a sub-header in all reports.
  ".
  "      -p DIR[:DIR]*
  ".
  "                 ->  Use the following directories to search for needed components.
  ".
  "                     Otherwise the default path is used.
  ".
  "      -o numSpkr
  ".
  "                 ->  Overlap using 'numSpkr' number of speakers.
  ".
  "      -m GB_Max_Memory[:GB_Max_Difficulty]
  ".
  "                 ->  'GB_Max_Memory' Set the maximum memory allocation in GB for the LCM.
  ".
  "                 ->  'GB_Max_Difficulty' Set the limit of LCM difficulty (expressed in GB of memory).
  ".
  "      -a
  ".
  "                 ->  Use asclite for the alignment.
  ".
  "      -C
  ".
  "                 ->  Force compression for asclite.
  ".
  "      -B blocksize
  ".
  "                 ->  Block size for asclite. (default: 256 kB)
  ".
  "      -f [ ctm | rttm ]
  ".
  "                 ->  Specify the hyps fileformat.
  ".
  "      -F [ stm | rttm ]
  ".
  "                 ->  Specify the refs fileformat.
  ".
  "      -G         -> Produce alignment graphs when asclite is used
  ".
  "      -u UEM         Specify the UEM file for running mdeval (sastt hub only)
  ".
  "      -M \"ARGS\" -> Arguments to pass to mdeval (sastt hup only) Def. '-nafc -c 0.25 -o'
  ".            
  "
  ";
  
  
  ################################################################
  #############     Set all Global variables         #############
      my $Vb = 0;
      my $bUseAsclite=0;
      my $Lang = "Undeterm";
      my $Hub = "Undeterm";
      my $Ref = "Undeterm";
      my @Hyps = ();
      my @Hyps_iname = ();
      my @Hyps_oname = ();
      ### Installation directory for the SCTK package.  If the package's
      ### executables are accessible via your path, this variable may remain 
      ### empty.
      my $SCLITE = "sclite";
      my $ASCLITE = "asclite";
      my $ALIGN2HTML = "align2html.pl";
      my $SC_STATS = "sc_stats";
      my $CSRFILT="csrfilt.sh";
      my $DEF_ART="def_art.pl";
      my $HAMZA_NORM="hamzaNorm.pl";
      my $TANWEEN_FILTER="tanweenFilt.pl";
      my $STM2RTTM = "stm2rttm.pl";
      my $ACOMP = "acomp.pl";
      my $MDEVAL = "md-eval.pl";
      my $CTMVALID = "ctmValidator.pl";
      my $STMVALID = "stmValidator.pl";
      my $RTTMVALID = "rttmValidator.pl";
      my $DEF_ART_ENABLED=1;
      my $HAMZA_NORM_ENABLED=0;
      my $TANWEEN_FILT_ENABLED=0;
      my $OVRLAPSPK=-1;
      my $GLM = "";
      my $LDCLEX = "";
      my $MemoryLimit = 1.0;
      my $DifficultyLimit = -1.0;
      ### Defaults for SC_stats
      my $EnsembleRoot = "";
      my $EnsembleDesc = "";
      ###
      my $SLM_LM = "";
      my $WWL = "";
      
      my $hypfileformat = "ctm";
      my $reffileformat = "stm";
  
      my $UEM = "";
      my $mdevalOpts = "-nafcs -c 0.25 -o";
  
      my $produceAlignmentGraphs = 0;
      
      my $ASCLITE_FORCE_COMPRESSION = "";
      my $asclite_blocksize = 256;
  
      my $validateInputs = 1;
  #######         End of Globals         #########
  ################################################
  
  ################################################
  #######          MAIN PROGRAM          #########
  &ProcessCommandLine();
  
  my($h); 
  &VerifyResources();
  
  print "Filtering Files:
  ";
  my $filterSuccess = 1;
  $filterSuccess = 0 unless (&FilterFile($Ref, $Ref.".filt", $Lang, $reffileformat, "ref"));
  for ($h=0; $h<=$#Hyps; $h++)
  {
      $filterSuccess = 0 unless  (&FilterFile($Hyps[$h], $Hyps_oname[$h], $Lang, $hypfileformat, "hyp"));
  }
  if (! $filterSuccess){
      die "Error: Filter processes failure detect.  Aborting.  The -V option disables validation";
  }
  
  for ($h=0; $h<=$#Hyps; $h++)
  {
      &RunScoring($Ref,$Hyps[$h],$Hyps_iname[$h],$Hyps_oname[$h],$Lang);
  }
      
  &RunStatisticalTests(@Hyps_oname) if ($#Hyps > 0);
  
  exit 0;
  
  #######          END OF MAIN           #########
  ################################################
  
  
  ################################################################
  ################ Get the command line arguments ################
  sub ProcessCommandLine
  {
  	### This is an invisible option.  If the calling name is sortCTM.pl, run the sorter
  	### This is a hack to make this script completely self contained
  	if ($ARGV[0] eq "sortCTM")
  	{
  		sortCTM();
  		exit;
  	}
  	if ($ARGV[0] eq "sortSTM")
  	{
  		sortSTM();
  		exit;
  	}
  
  	use Getopt::Std;
  	#&Getopts('l:h:r:vg:L:n:e:RM:w:');
  	getopts('VGaCHTdvRl:h:r:g:L:n:e:K:w:p:o:m:f:F:u:M:B:');
  
  	if (defined($main::opt_l)) {	$Lang = $main::opt_l; $Lang =~ tr/A-Z/a-z/; }
  	if (defined($main::opt_h)) {	$Hub = $main::opt_h; $Hub =~ tr/A-Z/a-z/; }
  	if (defined($main::opt_r)) {	$Ref = $main::opt_r; }
  	if (defined($main::opt_d)) {    $DEF_ART_ENABLED = ! $main::opt_d; }
  	if (defined($main::opt_v)) {	$Vb = 1; $main::opt_v = 1; }
  	if (defined($main::opt_L)) {	$LDCLEX = $main::opt_L; }
  	if (defined($main::opt_n)) {	$EnsembleRoot = $main::opt_n; }
  	if (defined($main::opt_e)) {	$EnsembleDesc = $main::opt_e; }
  	if (defined($main::opt_K)) {	$SLM_LM = $main::opt_K; }
  	if (defined($main::opt_o)) {	$OVRLAPSPK = $main::opt_o; }
  	if (defined($main::opt_w)) {	$WWL = $main::opt_w; }
  	if (defined($main::opt_a)) {	$bUseAsclite = 1; $main::opt_a = 1; }
  	if (defined($main::opt_C)) {	$ASCLITE_FORCE_COMPRESSION = "-force-memory-compression"; $main::opt_C = 1; }
  	if (defined($main::opt_B)) {	$asclite_blocksize = $main::opt_B; }
  	if (defined($main::opt_m)) {
  	    if ($main::opt_m =~ /^(\d+|\d*\.\d+|\d+\.):(\d+|\d*\.\d+|\d+\.)$/){
  		$MemoryLimit = $1;
  		$DifficultyLimit = $2;
  	    } elsif ($main::opt_m =~ /^(\d+|\d*\.\d+|\d+\.)$/){
  		$MemoryLimit = $1;
  	    } else {
  		die "Failed to parse -m option value '$main::opt_m'";
  	    }
  	    print "Warning: Difficulty Limit($DifficultyLimit) is less than the MemoryLimit($MemoryLimit).  Did you want to do both?
  "
  		if( ($DifficultyLimit < $MemoryLimit) && ($DifficultyLimit >= 0) );
  	}
  	if (defined($main::opt_f)) {	$hypfileformat = $main::opt_f; }
  	if (defined($main::opt_F)) {	$reffileformat = $main::opt_F; }
  	if (defined($main::opt_u)) {	$UEM = $main::opt_u; }
  	if (defined($main::opt_M)) {	$mdevalOpts = $main::opt_M; }
  	if (defined($main::opt_G)) {	$produceAlignmentGraphs = $main::opt_G; }
  	if (defined($main::opt_V)) {	$validateInputs = ! $main::opt_V ; }
  
  	if (defined($main::opt_g)) {	
  		$GLM = $main::opt_g; 
  		die("$Usage
  Error: Unable to stat GLM file '$GLM'") if (! -f $GLM);
  	} else {
  		die("$Usage
  Error: GLM file required via -g option");
  	}
  
      #### Language checks/Verification
      die("$Usage
  Error: Language defintion required via -l") if ($Lang eq "Undeterm"); 
      die("$Usage
  Error: Undefined language '$Lang'") 
  	if ($Lang !~ /^(english|german|spanish|mandarin|arabic)$/);
      
      if (defined($main::opt_H)){
  	die "Error: Hamza normalization only applies to Arabic data
  " if ($Lang ne "arabic");
  	$HAMZA_NORM_ENABLED = $main::opt_H;
      }
      if (defined($main::opt_T)){
  	die "Error: Tanween filtering only applies to Arabic data
  " if ($Lang ne "arabic");
  	$TANWEEN_FILT_ENABLED = $main::opt_T;
      }
      ####
  
      #### Asclite Check
      die("$Usage
  Error: Asclite is working only with english
  ") if( ($Lang ne "english") && ($bUseAsclite == 1) );
      die("$Usage
  Error: Overlap scoring (-o) is working only with asclite
  ") if( ($OVRLAPSPK >= 0) && ($bUseAsclite == 0) );
      die("$Usage
  Error: Memory Limit (-m) is working only with asclite
  ") if( ($MemoryLimit != 1) && ($bUseAsclite == 0) );
      ####
  		
      #### Hub Check/Verification
      die("$Usage
  Error: Hub defintion required via -h") if ($Hub eq "Undeterm"); 
      die("$Usage
  Error: Undefined Hub '$Hub'") if ($Hub !~ /^(hub4|hub5|rt-stt|sastt)$/);
  
      #### Reference File Check/Verification
      die("$Usage
  Error: Reference file defintion required via -r") if ($Ref eq "Undeterm"); 
      die("$Usage
  Error: Unable to access reference file '$Ref'
  ") if (! -f $Ref);
  
      #### extract the hypothesis files
      die("$Usage
  Error: Hypothesis files required") if ($#ARGV < 0);
      my @Hyps_DEFS = @ARGV;
      my $hyp;
      foreach $hyp(@Hyps_DEFS){
  #	print "$hyp
  ";
  	my(@Arr) = split(/\\#/,$hyp);
          if ($#Arr < 1) { $Arr[1] = $Arr[0]; } elsif ($Arr[1] =~ /^$/) { $Arr[1] = $Arr[0]; }
          if ($#Arr < 2) { $Arr[2] = $Arr[0]; } elsif ($Arr[2] =~ /^$/) { $Arr[2] = $Arr[0]; }
  	push(@Hyps,$Arr[0]);
          push(@Hyps_iname,$Arr[1]);
          push(@Hyps_oname,$Arr[2].".filt");
      }
      foreach $hyp(@Hyps){
  	die("$Usage
  Error: Unable to access hypothesis file '$hyp'
  ") if (! -f $hyp);
      }
  
      print STDERR "Warning: LDC lexicon option '-L $LDCLEX' ignored!!!!
  "
  	if (($Lang ne "german" && ($Lang ne "arabic")) && $LDCLEX ne "");
  
      die("$Usage
  Error: Unable to access LDC Lexicon file '$LDCLEX'
  ") 
  	if ($DEF_ART_ENABLED && ($Lang eq "german"));
  
      #### Check the LM and WWL files
      die("$Usage
  Error: Unable to use both -M and -w
  ") 
  	if (defined($main::opt_M) && defined($main::opt_w));
      die("$Usage
  Error: SLM language model '$main::opt_M' not found
  ") 
  	if (defined($main::opt_M) && (! -f $main::opt_M));
      die("$Usage
  Error: WWL file '$main::opt_w' not found
  ") 
  	if (defined($main::opt_w) && (! -f $main::opt_w));
  
      if (defined($main::opt_p)){
  	my $p = $main::opt_p;
  	die "Error: Path not formatted properly '$main::opt_p'" if ($main::opt_p !~ /^(\S+)(:\S+)*$/);
  	$ENV{PATH} = "${main::opt_p}:$ENV{PATH}";
      }
  
      ### Make sure sastt will work
      if ($Hub eq "sastt"){
  	die "$Usage
  Error: SASTT hub requires RTTM hyp file input" if ($hypfileformat ne "rttm");
  #	die "$Usage
  Error: SASTT hub requires RTTM ref file input" if ($reffileformat ne "rttm");
  	die("$Usage
  Error: SASTT only works with ASCLITE
  ") if ($bUseAsclite != 1);
      }
  }
  
  
  #################################################################################
  #### This proceedure is a replacement for a UNIX sort command for CTMs.    ######
  #### It takes too long and
  sub ctmSort {
      return ($a->[0] cmp $b->[0]) if ($a->[0] ne $b->[0]);
      return ($a->[2] cmp $b->[2]) if ($a->[2] ne $b->[2]);
      $a->[4] <=> $b->[4];
  }
  
  sub sortCTM{
      my %data = ();
      while (<STDIN>){
  	s/^\s+//;
  	next if ($_ =~ /^;;/ || $_ =~ /^$/);
  	my (@a) = split(/(\s+)/);
  	push @{ $data{$a[0]}{$a[1]} }, \@a;
      }
  
      foreach my $file(sort (keys %data)){
  	foreach my $chan(sort (keys %{ $data{$file} })){
  	    foreach my $a(sort ctmSort @{ $data{$file}{$chan} }){
  		print join("",@$a);
  	    }
  	}
      }
  }
  
  #################################################################################
  #### This proceedure is a replacement for a UNIX sort command for STMs.    ######
  #### It takes too long and
  sub stmSort {
      return ($a->[0] cmp $b->[0]) if ($a->[0] ne $b->[0]);
      return ($a->[2] cmp $b->[2]) if ($a->[2] ne $b->[2]);
      $a->[6] <=> $b->[6];
  }
  
  sub sortSTM{
      my %data = ();
      while (<STDIN>){
  	s/^\s+//;
  	if ($_ =~ /^;;/ || $_ =~ /^$/){
  	    print;
  	    next;
  	}
  	my (@a) = split(/(\s+)/);
  	push @{ $data{$a[0]}{$a[1]} }, \@a;
      }
  
      foreach my $file(sort (keys %data)){
  	foreach my $chan(sort (keys %{ $data{$file} })){
  	    foreach my $a(sort stmSort @{ $data{$file}{$chan} }){
  		print join("",@$a);
  	    }
  	}
      }
  }
  
  ################################################################
  ###########  Make sure sclite, tranfilt, and other  ############
  ###########  resources are available.               ############
  sub get_version{
      my($exe, $name) = @_;
      my($ver) = "foo";
  
      open(IN,"$exe 2>&1 |") ||
  	die("Error: unable to exec $name with the command '$exe'");
      while (<IN>){
  	if ($_ =~ /Version: v?(\d+\.\d+)[a-z]*/){
  	    $ver = $1;
  	} elsif ($_ =~ /Version: v?(\d+)/){
  	    $ver = $1;
  	}
      }
      close(IN);
      die "Error: unable to get the version for program $name with the command '$exe'"
  	if ($ver eq "foo");
      $ver;
  }
  
  sub VerifyResources
  {
      my($ver);
  
      ### Check the version of sclite
      $ver = "";
      
      open(IN,"$ASCLITE 2>&1 |") ||
  	die("Error: unable to exec asclite with the command '$ASCLITE'");
      while (<IN>){
  	if ($_ =~ /asclite Version: (\d+)\.(\d+)[a-z]*/i){
  	    $ver = $1*100+$2;
  	}
      }
      close(IN);
      die ("ASCLITE executed by the command '$ASCLITE' is too old. 
  ".
  	 "       Version 1.0 or better is needed.  This package ls available
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 104);
  
      ### Check the version of sclite
      $ver = "";
      open(IN,"$SC_STATS 2>&1 |") ||
  	die("Error: unable to exec sc_stats with the command '$SC_STATS'");
      while (<IN>){
  	if ($_ =~ /sc_stats Version: (\d+\.\d+)[a-z]*,/){
  	    $ver = $1;
  	}
      }
      close(IN);
      die ("SC_STATS executed by the command '$SC_STATS' is too old. 
  ".
  	 "       Version 1.1 or better is needed.  This package ls available
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1.1);
  
      #### Check for CSRFILT
      $ver = &get_version($CSRFILT,"csrfilt.sh");
      die ("CSRFILT executed by the command '$CSRFILT' is too old. 
  ".
  	 "       Version 1.15 or better is needed.  Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1.15 || $ver >= 1.2);
  
      $ver = &get_version($DEF_ART,"def_art.pl");
      die ("def_art.pl executed by the command '$DEF_ART' is too old. 
  ".
  	 "       Version 1.0 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1.0);
  
      $ver = &get_version($ACOMP,"acomp.sh");
      die ("acomp.pl executed by the command '$ACOMP' is too old. 
  ".
  	 "       Version 1.0 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1.0);
  
      $ver = &get_version($HAMZA_NORM,"hamzaNorm.pl");
      die ("hamzaNorm.pl executed by the command '$HAMZA_NORM' is too old. 
  ".
  	 "       Version 1.0 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1.0);
  
      $ver = &get_version($TANWEEN_FILTER,"tanweenFilt.pl");
      die ("tanweenFilt.pl executed by the command '$TANWEEN_FILTER' is too old. 
  ".
  	 "       Version 1.0 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1.0);
  
      $ver = &get_version($MDEVAL,"md-eval.pl");
      die ("md-eval.pl executed by the command '$MDEVAL' is too old. 
  ".
  	 "       Version 21 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 21);
  
      $ver = &get_version("$ALIGN2HTML -h","align2html.pl");
      die ("align2html.pl executed by the command '$ALIGN2HTML' is too old. 
  ".
  	 "       Version 0.0 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 0.1);
  
      $ver = &get_version("$STM2RTTM -h","stm2rttm.pl");
      die ("stm2rttm.pl executed by the command '$ALIGN2HTML' is too old. 
  ".
  	 "       Version 0.0 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 0.1);
  
      $ver = &get_version("$CTMVALID -h","ctmValidator.pl");
      die ("ctmValidator.pl executed by the command '$CTMVALID' is too old. 
  ".
  	 "       Version 3 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 3);
  
      $ver = &get_version("$STMVALID -h","STMValidator.pl");
      die ("stmValidator.pl executed by the command '$STMVALID' is too old. 
  ".
  	 "       Version 1 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 1);
  
      $ver = &get_version("$RTTMVALID -h","rttmValidator.pl");
      die ("rttmValidator.pl executed by the command '$RTTMVALID' is too old. 
  ".
  	 "       Version 13 or better is needed.   Get the up-to-date SCTK package
  ".
  	 "       from the URL http://www.nist.gov/speech/software.htm") if ($ver < 13);
  
  }
  
  sub FilterFile
  {
      my($file, $outfile, $lang, $format, $purpose) = @_;
      my($rtn);
      my($csrfilt_com);
      my($def_art_com);
      my($hamza_norm_com);
      my($tanween_filter_com);
      my($acomp_com);
      my($sort_com);
      my($com);
  
      print "   Filtering $lang file '$file', $format format
  ";
  
      my $rtFilt = "cat";
      
      if ($Hub eq "rt-stt" && $format eq "ctm")
      {
  	$rtFilt = "perl -nae 'if (\$_ =~ /^;;/ || \$#F < 6) {print} else {s/^\\s+//; if (\$F[6] eq 'lex') { \$st = 6; \$st-- if (\$F[5] =~ /^na\$/i); splice(\@F, \$st, 10); print join(\" \" ,\@F).\"\
  \" }}' "
  	}
      
      if ($format eq "ctm")
      {
  	$sort_com = "$0 sortCTM < ";
  	#$sort_com = "cat";
  	#$sort_com = "sort +0 -1 +1 -2 +2nb -3";
      } 
      elsif ($format eq "stm")
      {
  	$sort_com = "$0 sortSTM < ";
      }
      elsif ($format eq "rttm")
      {
  	$sort_com = "rttmSort.pl < ";
      }
      
      if ($Lang =~ /^(arabic)$/)
      { 
  	$csrfilt_com = "$CSRFILT -s -i $format -t $purpose -dh $GLM";
  	if ($DEF_ART_ENABLED){
              $def_art_com = "$DEF_ART -s $LDCLEX -i $format - -";
  	} else {
              $def_art_com = "cat";
  	}
  	if ($HAMZA_NORM_ENABLED){
              $hamza_norm_com = "$HAMZA_NORM -i $format -- - -";
  	} else {
              $hamza_norm_com = "cat";
  	}
  	if ($TANWEEN_FILT_ENABLED){
              $tanween_filter_com = "$TANWEEN_FILTER -a -i $format -- - -";
  	} else {
  	    $tanween_filter_com = "cat";
  	}
  	$com = "$sort_com $file | $rtFilt | $def_art_com | $hamza_norm_com | $tanween_filter_com | $csrfilt_com > $outfile";
      } elsif ($Lang =~ /^(mandarin)$/){ 
  	$csrfilt_com = "$CSRFILT -i $format -t $purpose -dh $GLM";
  	
  	$com = "cat $file | $rtFilt | $csrfilt_com > $outfile";
      } elsif ($Lang =~ /^(spanish)$/){ 
  	$csrfilt_com = "$CSRFILT -e -i $format -t $purpose -dh $GLM";
  	
  	$com = "$sort_com $file | $rtFilt | $csrfilt_com > $outfile";
      } elsif ($Lang =~ /^(german)$/){ 
  	$csrfilt_com = "$CSRFILT -e -i $format -t $purpose -dh $GLM";
  	$acomp_com =   "$ACOMP -f -m 2 -l $LDCLEX -i $format - -";
  	
  	$com = "$sort_com $file | $rtFilt | $csrfilt_com | $acomp_com > $outfile";
      } elsif ($Lang =~ /^(english)$/){ 
  	$csrfilt_com = "$CSRFILT -i $format -t $purpose -dh $GLM";
  	$com = "$sort_com $file | $rtFilt | $csrfilt_com > $outfile";
      } else {
  	die "Undefined language: '$lang'";
      }
      
  #	    $com = "cat $file | $rtFilt > $outfile";
      print "      Exec: $com
  " if ($Vb);
      
      $rtn = system $com;
      if ($rtn != 0) {
  	system("rm -f $outfile");
  	die("Error: Unable to filter file: $file with command:
     $com
  ");
      }
      
      if ($validateInputs){
  	print "      Validating the output file '$outfile'
  " if ($Vb);
  	my $vcom = "";
  	if ($format eq "ctm") {
  	    $vcom = "$CTMVALID -l $Lang -i $outfile";
  	} elsif ($format eq "stm") {
  	    $vcom = "stmValidator.pl -l $Lang -i $outfile";
  	} else {  ###if ($format eq "rttm") {
  	    $vcom = "$RTTMVALID -S -f -u -i $outfile";
  	} 
  	$rtn = system "$vcom 2>&1 > /dev/null";
  	if ($rtn != 0){
  	    system $vcom . " | sed 's/^/      /'";
  	    print "Error: Filter operation yielded a non-validated $format output with return code $rtn
  ";
  	    return 0;
  	}
      }
      return 1
  }
  
  sub RunScoring
  {
      my($ref, $hyp, $hyp_iname, $hyp_oname, $lang) = @_;
      my($reff) = ($ref.".filt");
      my($rtn);
      my($outname);
  
      ($outname = "-n $hyp_oname") =~ s:^-n (\S+)/([^/]+)$:-O $1 -n $2:;
      print "Scoring $lang Hyp '$hyp_oname' against ref '$reff'
  ";
  
      my $command;
      if ($bUseAsclite == 0)
      {
          $command = "$SCLITE -r $reff stm -h $hyp_oname $hypfileformat $hyp_iname -F -D -o sum rsum sgml lur dtl pra prf -C det sbhist hist $outname";
          
          if ($Lang =~ /^(mandarin)$/)
          { 
              $command .= " -c NOASCII DH -e gb";
          }
          
          if ($Lang =~ /^(arabic)$/)
          { 
              $command .= " -s";
          }
          
          if ($Lang =~ /^(spanish)$/)
          { 
              ;
          }
          
          if ($SLM_LM !~ /^$/ || $WWL !~ /^$/)
          { 
              $command .= " -L $SLM_LM" if ($SLM_LM !~ /^$/);
              $command .= " -w $WWL" if ($WWL !~ /^$/);
              $command .= " -o wws";
          }
  
  	print "   Exec: $command
  " if ($Vb);
  	$rtn = system($command);
  	die("Error: SCLITE execution failed
        Command: $command") if ($rtn != 0);
      } 
      else
      {
  	my $spkrOpt = "";
  	my $ali2htmOpt = "";
  	if ($Hub eq "sastt"){
  	    ### Pre score for mdeval
  	    my $mdevalref = "";
  	    if ("$reffileformat" eq "stm" ){
  		# Convert to rttm
                  $mdevalref="$reff.rttm";
  		my $com = "cat $reff | $STM2RTTM -e rt05s > $mdevalref";
  		$rtn = system($com);
  		die("Error: STM2RTTM failed
        Command: $com") if ($rtn != 0);
              } else {
                  $mdevalref=$reff;
  	    }
  	    my $mdcom = "$MDEVAL $mdevalOpts ".($UEM ne "" ? "-u $UEM" : "")." -r $mdevalref -s $hyp_oname -M $hyp_oname.mdeval.spkrmap 1> $hyp_oname.mdeval";
  	    print "   Exec: $mdcom
  " if ($Vb);
  	    $rtn = system($mdcom);
  	    die("Error: MDEVAL failed
        Command: $mdcom") if ($rtn != 0);
  
  	    $spkrOpt = "-spkr-align $hyp_oname.mdeval.spkrmap";
  	    $ali2htmOpt = "-m $hyp_oname.mdeval.spkrmap";
  	}
  
          my $overlapscoring = "";
          
          if($OVRLAPSPK != -1)
          {
              $overlapscoring = "-overlap-limit $OVRLAPSPK";
          }
          
          my $OptionMemoryLimit = "-memory-limit $MemoryLimit";
          $OptionMemoryLimit .= " -difficulty-limit $DifficultyLimit" if($DifficultyLimit >= 0);
                  
          $command = "$ASCLITE -f 6 $spkrOpt $overlapscoring -adaptive-cost -time-prune 100 -word-time-align 100 $ASCLITE_FORCE_COMPRESSION -memory-compression $asclite_blocksize $OptionMemoryLimit -r $reff $reffileformat -h $hyp_oname $hypfileformat $hyp_iname -F -D -spkrautooverlap ref -o sgml sum rsum 2> $hyp_oname.aligninfo.csv";
  	print "   Exec: $command
  " if ($Vb);
  	$rtn = system($command);
  	die("Error: ASCLITE execution failed
        Command: $command") if ($rtn != 0);
  
  	if($produceAlignmentGraphs){
  	    ### Build the alignment HTML
  	    $command = "mkdir -p $hyp_oname.alignments ; $ALIGN2HTML $ali2htmOpt -a $hyp_oname.aligninfo.csv -o $hyp_oname.alignments";
  	    print "   Exec: $command
  " if ($Vb);
  	    $rtn = system($command);
  	    die("Error: ALIGN2HTML execution failed
        Command: $command") if ($rtn != 0);	
  	} else {
  	    system "rm -f $hyp_oname.aligninfo.csv";
          }
  
  	$command = "$SCLITE -P -o dtl pra prf -C det sbhist hist $outname < $hyp_oname.sgml";
  	$rtn = system($command);
  	die("Error: SCLITE execution failed
        Command: $command") if ($rtn != 0);
      }
      
  }
  
  sub RunStatisticalTests
  {
      my(@Hy) = @_;
      my($hyp);
      my($sgml);
      my($command) = "";
      my($rtn);
  
      print "Running Statistical Comparison Tests
  ";
      
      $command = "cat";
      ## verify the sgml files were made, and add to the cat list;
      print "    Checking for sclite's sgml files
  " if ($Vb);
      foreach $hyp(@Hy){
  	$sgml = $hyp.".sgml";
  	die "Error: Unable to local sgml file '$sgml'" if (! -f $sgml);
  	$command .= " $sgml";
      }
      $command .= " | $SC_STATS -p -r sum rsum es res lur -t std4 -u -g grange2 det";
      $command .= " -n $EnsembleRoot" if ($EnsembleRoot ne "");
      $command .= " -e \"$EnsembleDesc\"" if ($EnsembleDesc ne "");
  
      print "    Exec: $command
  " if ($Vb);
      $rtn = system($command);
      die("Error: SC_STATS execution failed
        Command: $command") if ($rtn != 0);
  }