Yannick Estève / ONTRAC-Kaldi

Blame view

tools/sctk-2.4.10/src/sclite/sclite.c 27.7 KB
  
  #define MAIN
  #include "sctk.h"
  
  #define SCLITE_VERSION "2.10"
  
  void do_exit(char *desc, char *prog, int ret);
  void proc_args(int argc, char **argv, char *prog, char **rname, char **rfmt, char **hname, char **hfmt, int *nhyps,  enum id_types *id_type, int *case_sens, int *outputs, char **title, int *feedback, int *linewidth, int *use_diff, char **out_dir, char **out_name,int *char_align, int *pipeout, int *pipein, int *infered_wordseg, char **lexicon, int *frag_correct, int *opt_del, int *inf_flags, int *stm2ctm_reduce, int *time_align, int *conf_outputs, int *left_to_right, char **wwl_file, char **lm_file);
  
  #define OUT_SUM            0x0001
  #define OUT_RSUM           0x0002
  #define OUT_PRALIGN        0x0004
  #define OUT_LUR            0x0008
  #define OUT_SGML           0x0010
  #define OUT_STDOUT         0x0020
  #define OUT_SENT           0x0040
  #define OUT_SPKR           0x0080
  #define OUT_DTL            0x0100
  #define OUT_PRALIGN_FULL   0x0200
  #define OUT_WWS            0x0400
  #define OUT_NL_SGML        0x0800
  
  #define CONF_OUT_NONE      0x0000
  #define CONF_OUT_DET       0x0001
  #define CONF_OUT_BHIST     0x0002
  #define CONF_OUT_HIST      0x0004
  #define CONF_OUT_SBHIST    0x0008
  
  #define REDUCE_NOTHING        0x0000
  #define REDUCE_REF_SEGMENTS   0x0001
  #define REDUCE_HYP_WORDS      0x0002
  
  char *usage = "%s: <OPTIONS>
  " 
  "sclite Version: " SCLITE_VERSION ", SCTK Version: " TK_VERSION "
  "
  "Input Options:
  "
  "    -r reffile [ <rfmt> ]
  "
  "                Define the reference file, and it's format
  "
  "    -h hypfile [ <hfmt> <title> ]
  "
  "                Define the hypothesis file, it's format, and a 'title' used
  "
  "                for reports.  The default title is 'hypfile'.  This option
  "
  "                may be used more than once.
  "
  "    -i <ids>    Set the utterance id type.   (for transcript mode only)
  "
  "    -P          Accept the piped input from another utility.
  "
  "    -e gb|euc|utf-8 [ case-conversion-localization ]
  "
  "                Interpret characters as GB, EUC, utf-8, or the default, 8-bit ASCII.
  "
  "                Optionally, case conversion localization can be set to either 'generic',
  "
  "                'babel_turkish', or 'babel_vietnamese'
  "  
  "Alignment Options:
  "
  "    -s          Do Case-sensitive alignments.
  "
  "    -d          Use GNU diff for alignments.
  "
  "    -c [ NOASCII DH ]
  "
  "                Do the alignment on characters not on words as usual by split-
  "
  "                ting words into chars. The optional argument NOASCII does not
  "
  "                split ASCII words and the optional arg. DH deletes hyphens from
  "
  "                both the ref and hyp before alingment.   Exclusive with -d.
  "
  "    -L LM       CMU-Cambridge SLM Language model file to use in alignment and scoring.
  "
  "    -S algo1 lexicon [ ASCIITOO ]
  "
  "    -S algo2 lexicon [ ASCIITOO ]
  "
  "                Instead of performing word alignments, infer the word segmenta-
  "
  "                tion using algo1 or algo2.  See sclite(1) for algorithm details.
  "
  "    -F          Score fragments as correct.  Options -F and -d are exclusive.
  "
  "    -D          Score words marked optionally deletable as correct if deleted.
  "
  "                Options -D and -d are exclusive.
  "
  "    -T          Use time information, (if available), to calculated word-to-
  "
  "                word distances based on times. Options -F and -d are exlc.
  "
  "    -w wwl      Perform Word-Weight Mediated alignments, using the WWL file 'wwl'.
  "
  "                IF wwl is 'unity' use weight 1.o for all words.
  "
  "    -m [ ref | hyp ]
  "
  "                Only used for scoring a hyp/ctm file, against a ref/stm file.
  "
  "                When the 'ref' option is used, reduce the reference segments
  "
  "                to time range of the hyp file's words.  When the 'hyp' option
  "
  "                is used, reduce the hyp words to the time range of the ref
  "
  "                segments.  The two may be used together.  The argument -m
  "
  "                by itself defaults to '-m ref'.  Exclusive with -d.
  "
  "Output Options:
  "
  "    -O output_dir
  "
  "                Writes all output files into output_dir. Defaults to the
  "
  "                hypfile's directory.
  "
  "    -f level    Defines feedback mode, default is 1
  "
  "    -l width    Defines the line width.
  "
  "    -p          Pipe the alignments to another sclite utility.  Sets -f to 0.
  "
  "Scoring Report Options:
  "
  "    -o [ sum | rsum | pralign | all | sgml | stdout | lur | snt | spk | 
  "
  "         dtl | prf | wws | nl.sgml | none ]
  "
  "                Defines the output reports. Default: 'sum stdout'
  "
  "    -C [ det | bhist | sbhist | hist | none ] 
  "
  "                Defines the output formats for analysis of confidence scores.
  "
  "                Default: 'none'
  "
  "    -n name     Writes all outputs using 'name' as a root filename instead of
  "
  "                'hypfile'.  For multiple hypothesis files, the root filename
  "
  "                is 'name'.'hypfile'
  ";
  
  #define MAX_HYPS 40
  
  int main(int argc, char **argv){
      char *prog = "sclite";
      char *refname, *hypname[MAX_HYPS], *reffmt, *hypfmt[MAX_HYPS], 
           *title[MAX_HYPS];
      char *out_dir, *out_name;
      char *wwl_file;
      enum id_types id_type;
      int errors = 0;
      int num_hyps;
      int linewidth;
      int outputs;
      int conf_outputs;
      int feedback;
      int use_diff;
      SCORES *scor[MAX_HYPS];
      int case_sense, nsc;
      int char_align;
      int pipeout;
      int pipein;
      int num_piped;
      int nh;
      char *hroot;
      int hdirLen = FILENAME_MAX;
      int outrootLen = FILENAME_MAX;
      TEXT *hdir, *outroot;
      int infered_wordseg;
      char *lexicon;
      int frag_correct;
      int opt_del;
      int inf_flags;
      int stm2ctm_reduce;
      int time_align;
      int left_to_right;
      WWL *wwl = (WWL *)0;
      char *lm_file;
  
      alloc_singZ(hdir, hdirLen, TEXT, NULL_TEXT);
      alloc_singZ(outroot, outrootLen, TEXT, NULL_TEXT);
  
  #ifdef LM_ALIGN
      /*    ng_t ng; */
  
  #endif
      
      db = db_level = 0;
      proc_args(argc, argv, prog, &refname, &reffmt, hypname, hypfmt,
  	      &num_hyps, &id_type, &case_sense,&outputs,title,&feedback,
  	      &linewidth,&use_diff,&out_dir,&out_name,&char_align,&pipeout,
  	      &pipein, &infered_wordseg, &lexicon, &frag_correct, &opt_del,
  	      &inf_flags, &stm2ctm_reduce, &time_align, &conf_outputs,
  	      &left_to_right, &wwl_file, &lm_file);
  
      if (feedback > 0) printf("sclite: " SCLITE_VERSION 
  			     " TK Version " TK_VERSION"
  ");
  
      if (pipein){
  	if (feedback >= 1) printf("Loading Piped input. . . ");
  	num_piped = 0;
  	if (!load_SCORES_sgml(stdin,scor,&num_piped,MAX_HYPS)){
  	    fprintf(scfp,"load_SCORES_sgml Failed
  ");
  	    exit(1);
  	}
  	if (feedback >= 1) printf("%d systems loaded.
  ",num_piped);	
      } else 
  	num_piped = 0;
  
      if (wwl_file != (char *)0){
        if (feedback >= 1)
  	printf("Loading WWL file '%s'
  ",wwl_file);
        if (load_WWL(&wwl,(TEXT *)wwl_file) != 0){
  	fprintf(stderr,"Error: Unable to read WWL file '%s'
  ",wwl_file);
  	exit(1);
        }
        wwl->curw = 0;
      }
  
      for (nsc=0; nsc<num_hyps + num_piped; nsc++){
  	if (nsc >= num_piped){
  	    nh = nsc - num_piped;
  	    if (feedback >= 1)
  		printf("Begin alignment of Ref File: '%s' and %s: '%s'
  ",
  		       refname, "Hyp File", hypname[nh]);
  
  	    /* Set up the output root name */
  	    if ((hroot = strrchr(hypname[nh],'/')) != NULL){
  	      if (hroot-hypname[nh] + 1 > hdirLen){
  		expand_singarr_to_size(hdir,(hroot-hypname[nh]),hdirLen,hroot-hypname[nh]+1,TEXT);
  	      }
  	      TEXT_strBcpy(hdir,hypname[nh],hroot-hypname[nh]);
  	      hroot++;
  	    } else {
  		hroot = hypname[nh];
  		TEXT_strcpy(hdir,(TEXT *)".");
  	    }
  	    
  	    { char *name;
  	      if ((out_dir != (char *)0) ||
  		  TEXT_strcmp(((out_dir != (char *)0) ? (TEXT *)out_dir : (TEXT *)hdir),(TEXT *)".") != 0)
                  name = rsprintf("%s/%s",((out_dir != (char *)0) ? out_dir : (char *)hdir),
  				(out_name != (char *)0) ? out_name : hroot);
  	      else
  	        name = rsprintf("%s",(out_name != (char *)0) ? out_name : hroot);
  	      if (outrootLen < TEXT_strlen((TEXT *)name)+1){
  		expand_singarr(outroot,TEXT_strlen((TEXT *)name),outrootLen,1.5,TEXT);
  	      }
                TEXT_strcpy(outroot, name);	      
  	    }
  	    if (strcmp(reffmt,"trn") == 0 && strcmp(hypfmt[nh],"trn") == 0)
  		if (!use_diff)
  		    scor[nh]=align_trans_mode_dp(refname,hypname[nh],title[nh],
  						 1,case_sense,feedback,
  						 char_align,id_type,
  						 infered_wordseg, lexicon,
  						 frag_correct, opt_del, 
  						 inf_flags, wwl, lm_file);
  		else
  		    scor[nh]=align_trans_mode_diff(refname,hypname[nh],
  						   title[nh],1,case_sense,
  						   feedback,id_type);
  	    else if (strcmp(reffmt,"stm")== 0 && strcmp(hypfmt[nh],"ctm") == 0)
  		if (!use_diff)
  		    scor[nh]=align_ctm_to_stm_dp(refname,hypname[nh],title[nh],
  						 1, case_sense,feedback,
  						 char_align,id_type,
  						 infered_wordseg, lexicon,
  						 frag_correct, opt_del,
  						 inf_flags, 
  				  BF_isSET(stm2ctm_reduce,REDUCE_REF_SEGMENTS),
  				  BF_isSET(stm2ctm_reduce,REDUCE_HYP_WORDS),
  						 left_to_right, wwl, lm_file);
  		else {
  		  if (left_to_right)
  		    scor[nh]=align_ctm_to_stm_diff(refname, hypname[nh],
  						   title[nh],1,case_sense,
  						   feedback,id_type);
  		  else {
  		    fprintf(scfp,"Error: can't perform diff alignments on "
  			    "right-to-left data
  ");
  		    exit(1);
  		  }
  		}
  	    else if (strcmp(reffmt,"ctm")== 0 && strcmp(hypfmt[nh],"ctm") == 0)
  	      scor[nh]=align_ctm_to_ctm(hypname[nh], refname, title[nh],
  					feedback, frag_correct, opt_del,
  					case_sense, time_align, left_to_right, wwl,
  					lm_file);
  	    else if (strcmp(reffmt,"stm")== 0 && strcmp(hypfmt[nh],"txt") ==0){
  #if DIFF_ENABLED
  	      if (left_to_right)
  		scor[nh] =align_text_to_stm(refname, hypname[nh],title[nh],
  					    1,case_sense,feedback,id_type);
  	      else {
  		fprintf(scfp,"Error: can't perform diff alignments on "
  			"right-to-left data
  ");
  		exit(1);
  	      }
  #else		
  		do_exit("Alignments via diff have been disabled",prog,1);
  #endif
  	    } else{
  		fprintf(stderr,"Error: Unable to score '%s' against '%s'
  ",
  			hypfmt[nh],reffmt);
  		exit(1);
  	    }
  	    
  	    if (scor[nh] == (SCORES *)0){
  		fprintf(stderr,"%s: Alignment failed.  Exiting.
  ",prog);
  		exit(1);
  	    }
  	} else { /* input came from piped input */
  	    /* Set up the output root name */
  	    if ((hroot = strrchr(scor[nsc]->title,'/')) != NULL){
  		strncpy(hdir,scor[nsc]->title,hroot-scor[nsc]->title);
  		hdir[hroot-scor[nsc]->title] = '\0';
  		hroot++;
  	    } else {
  		hroot = scor[nsc]->title;
  		strcpy(hdir,".");
  	    }
  	    if ((out_dir != (char *)0) ||
  		strcmp(((out_dir != (char *)0) ? out_dir : (char *)hdir),".") != 0)
  	      sprintf(outroot,"%s/%s",((out_dir != (char *)0) ? out_dir : (char *)hdir),
  		      (out_name != (char *)0) ? out_name : hroot);
  	    else
  	      sprintf(outroot,"%s",(out_name != (char *)0) ? out_name : hroot);
  	}	    
  	
  	if (BF_isSET(outputs,OUT_SUM))
  	  print_system_summary(scor[nsc],
  			       BF_isSET(outputs,OUT_STDOUT) ? "-": (char *)outroot,
  			       0, 0, 0, feedback);
  	if (BF_isSET(outputs,OUT_WWS)){
  	  if (scor[nsc]->weight_ali)
  	    print_system_summary(scor[nsc],
  				 BF_isSET(outputs,OUT_STDOUT) ? "-": (char *)outroot,
  				 0, 0, 1, feedback);
  	  else
  	    printf("    Skipping WWS Report, no word weights supplied.
  ");
  	}
  	if (BF_isSET(outputs,OUT_RSUM))
  	    print_system_summary(scor[nsc], 
  				 BF_isSET(outputs,OUT_STDOUT) ? "-":(char *)outroot,
  				 0, 1, 0, feedback);
  	if (BF_isSET(outputs,OUT_SENT))
  	    score_dtl_sent(scor[nsc], BF_isSET(outputs,OUT_STDOUT)?"-":(char *)outroot,
  			   feedback);
  	if (BF_isSET(outputs,OUT_SPKR))
  	    score_dtl_spkr(scor[nsc], BF_isSET(outputs,OUT_STDOUT)?"-":(char *)outroot,
  			   feedback);
  	if (BF_isSET(outputs,OUT_DTL))
  	    score_dtl_overall(scor[nsc],
  			      BF_isSET(outputs,OUT_STDOUT) ? "-":(char *)outroot,
  			      feedback);
  	if (BF_isSET(outputs,OUT_LUR))
  	    print_lur(scor[nsc],
  		      BF_isSET(outputs,OUT_STDOUT) ? "-":(char *)outroot, feedback);
  	if (BF_isSET(outputs,OUT_PRALIGN)){
  	    FILE *fp = stdout;
  	    if (BF_notSET(outputs,OUT_STDOUT))
  		fp = fopen(rsprintf("%s.pra",outroot),"w");
  	    if (fp == (FILE *)0) fp = stdout;
  	    if (feedback >= 1)
  		printf("    Writing string alignments to '%s'
  ",
  		       (fp == stdout) ? "stdout" :
  		       rsprintf("%s.pra",outroot));
  	    dump_SCORES_alignments(scor[nsc],fp,linewidth,0);
  	    if (fp != stdout) fclose(fp);
  	}
  	if (BF_isSET(outputs,OUT_PRALIGN_FULL)){
  	    FILE *fp = stdout;
  	    if (BF_notSET(outputs,OUT_STDOUT))
  		fp = fopen(rsprintf("%s.prf",outroot),"w");
  	    if (fp == (FILE *)0) fp = stdout;
  	    if (feedback >= 1)
  		printf("    Writing string alignments to '%s'
  ",
  		       (fp == stdout) ? "stdout" :
  		       rsprintf("%s.prf",outroot));
  	    dump_SCORES_alignments(scor[nsc],fp,linewidth,1);
  	    if (fp != stdout) fclose(fp);
  	}
  	if (BF_isSET(outputs,OUT_SGML)){
  	    FILE *fp = stdout;
  	    if (BF_notSET(outputs,OUT_STDOUT))
  		fp = fopen(rsprintf("%s.sgml",outroot),"w");
  	    if (fp == (FILE *)0) fp = stdout;
  	    if (fp != stdout && feedback >= 1)
  		printf("    Writing SGML string alignments to '%s'
  ",
  		       rsprintf("%s.sgml",outroot));
  	    dump_SCORES_sgml(scor[nsc],fp,(TEXT *)":",(TEXT *)",");
  	    if (fp != stdout) fclose(fp);
  	}
  	if (BF_isSET(outputs,OUT_NL_SGML)){
  	    FILE *fp = stdout;
  	    if (BF_notSET(outputs,OUT_STDOUT))
  		fp = fopen(rsprintf("%s.nl.sgml",outroot),"w");
  	    if (fp == (FILE *)0) fp = stdout;
  	    if (fp != stdout && feedback >= 1)
  		printf("    Writing Newline Separated SGML string alignments to '%s'
  ",
  		       rsprintf("%s.nl.sgml",outroot));
  	    dump_SCORES_sgml(scor[nsc],fp,(TEXT *)"
  ",(TEXT *)"
  ");
  	    if (fp != stdout) fclose(fp);
  	}
  	if (BF_isSET(conf_outputs,CONF_OUT_DET))
  	  if (make_SCORES_DET_curve(&(scor[nsc]),1,outroot,feedback,"") != 0)
  	    errors++;
  	if (BF_isSET(conf_outputs,CONF_OUT_BHIST))
  	    if (make_binned_confidence(scor[nsc],outroot,feedback) != 0)
  	        errors++;
  	if (BF_isSET(conf_outputs,CONF_OUT_HIST))
    	    if (make_confidence_histogram(scor[nsc],outroot,feedback) != 0)
  	        errors++;
  	if (BF_isSET(conf_outputs,CONF_OUT_SBHIST))
  	    if (make_scaled_binned_confidence(scor[nsc],outroot,20,feedback) != 0)
  	        errors++;
  	
      }
  
      if (pipeout)
  	for (nsc=0; nsc < num_hyps + num_piped; nsc++)
  	    dump_SCORES_sgml(scor[nsc],stdout,(TEXT *)"
  ",(TEXT *)"
  ");
  
      /* clean up the score structures */
      for (nsc=0; nsc < num_hyps + num_piped; nsc++)
          SCORES_free(scor[nsc]);
      if (wwl != (WWL *)0) free_WWL(&wwl);
  
      if (feedback >= 1) 
          if (errors == 0)
  	    printf("
  Successful Completion
  ");
  	else
    	    printf("
  Unsuccessful Completion
  ");
      return(0);
  }
  
  void proc_args(int argc, char **argv, char *prog, char **rname, char **rfmt, char **hname, char **hfmt, int *nhyps, enum id_types *id_type, int *case_sense, int *outputs, char **title, int *feedback, int *linewidth, int *use_diff, char **out_dir, char **out_name, int *char_align, int *pipeout, int *pipein, int *infered_wordseg, char **lexicon, int *frag_correct, int *opt_del, int *inf_flags, int *stm2ctm_reduce, int *time_align, int *conf_outputs, int *left_to_right, char **wwl_file, char **lm_file){
      int opt, fbset=0, outset=0, i, nh;
      char *id;
      struct stat fileinfo;
  
      if (argc <= 1) 
  	do_exit("Arguments reguired",prog,1);
  
      *linewidth=1000;
      *feedback=1;
      *nhyps = 0;
      *rname=(char *)0;  *hname=(char *)0;
      *rfmt=(char *)0;   *hfmt=(char *)0;
      id=(char *)0;
      *outputs=0;
      *case_sense=0;
      *title=(char *)0;
      *out_dir = (char *)0;
      *out_name = (char *)0;
      *use_diff = 0;
      *char_align = 0;
      *pipeout = 0;
      *pipein = 0;
      *infered_wordseg = 0;
      *lexicon = (char *)0;
      *frag_correct = 0;
      *opt_del = 0;
      *inf_flags = 0;
      *stm2ctm_reduce = 0;
      *time_align = 0;
      *conf_outputs = CONF_OUT_NONE;
      *left_to_right = 1;
      *wwl_file = (char *)0;
      *lm_file = (char *)0;
  
      for (opt = 1; opt < argc && (*(argv[opt]) == '-'); opt++){
  	/* printf("Current OP %s
  ",argv[opt]); */
  	if (strcmp(argv[opt],"-r") == 0){
  	    if (argc < opt + 2) do_exit("Not enough Ref arguments",prog,1);
  	    if (*(argv[opt+1]) == '-') do_exit("Req'd Ref File name",prog,1);
  	    *rname = argv[++opt];
  	    if (argc >= opt + 2 && *(argv[opt+1]) != '-') *rfmt  = argv[++opt];
  	} else if (strcmp(argv[opt],"-h") == 0){
  	    if (argc < opt + 2) do_exit("Not enough Hyp arguments",prog,1);
  	    if (*(argv[opt+1]) == '-') do_exit("Req'd Hyp File name",prog,1);
  	    hname[*nhyps] = argv[++opt];
  	    if (argc >= opt + 2 && *(argv[opt+1]) != '-') 
  		hfmt[*nhyps]  = argv[++opt];
  	    else
  		hfmt[*nhyps] = "trn";
  	    if (argc >= opt + 2 && *(argv[opt+1]) != '-') 
  		title[*nhyps] = argv[++opt];
  	    else
  		title[*nhyps] = hname[*nhyps];
  	    (*nhyps)++;
  	} else if (strcmp(argv[opt],"-i") == 0){
  	    if (argc <= opt+1) do_exit("Not enough ID arguments",prog,1);
  	    id = argv[++opt];
  	} else if (strcmp(argv[opt],"-l") == 0){
  	    if (argc <= opt+1)
  		do_exit("Not enough Line Width arguments",prog,1);
  	    *linewidth = atoi(argv[++opt]);
  	} else if (strcmp(argv[opt],"-L") == 0){
  #ifdef WITH_SLM
  	    if (argc <= opt+1)
  		do_exit("Not enough Language Model arguments",prog,1);
  	    *lm_file = argv[++opt];
  #else
  	    do_exit("-L option used, but SLM toolkit not compiled into sctk
  ",prog,1);
  #endif
  	} else if (strcmp(argv[opt],"-f") == 0){
  	    if (argc <= opt+1) do_exit("Not enough Feedback arguments",prog,1);
  	    *feedback = atoi(argv[++opt]);
  	    fbset = 1;
  	} else if (strcmp(argv[opt],"-o") == 0){
  	    opt ++;
  	    if (argc < opt + 1) do_exit("Not enough Report arguments",prog,1);
  	    while (opt < argc && *(argv[opt]) != '-'){
  		if (strcmp(argv[opt],"sum") == 0) 
  		    BF_FLIP(*outputs,OUT_SUM);
  		else if (strcmp(argv[opt],"wws") == 0) 
  		    BF_FLIP(*outputs,OUT_WWS);
  		else if (strcmp(argv[opt],"rsum") == 0) 
  		    BF_FLIP(*outputs,OUT_RSUM);
  		else if ((strcmp(argv[opt],"pralign") == 0) ||
  			 (strcmp(argv[opt],"pra") == 0)) 
  		    BF_FLIP(*outputs,OUT_PRALIGN);
  		else if (strcmp(argv[opt],"prf") == 0) 
  		    BF_FLIP(*outputs,OUT_PRALIGN_FULL);
  		else if (strcmp(argv[opt],"lur") == 0) 
  		    BF_FLIP(*outputs,OUT_LUR);
  		else if (strcmp(argv[opt],"stdout") == 0) 
  		    BF_FLIP(*outputs,OUT_STDOUT);
  		else if (strcmp(argv[opt],"sgml") == 0) 
  		    BF_FLIP(*outputs,OUT_SGML);
  		else if (strcmp(argv[opt],"nl.sgml") == 0) 
  		    BF_FLIP(*outputs,OUT_NL_SGML);
  		else if (strcmp(argv[opt],"snt") == 0) 
  		    BF_FLIP(*outputs,OUT_SENT);
  		else if (strcmp(argv[opt],"spk") == 0) 
  		    BF_FLIP(*outputs,OUT_SPKR);
  		else if (strcmp(argv[opt],"dtl") == 0) 
  		    BF_FLIP(*outputs,OUT_DTL);
  		else if (strcmp(argv[opt],"all") == 0) 
  		    BF_FLIP(*outputs,OUT_PRALIGN + OUT_SUM + OUT_RSUM);
  		else if (strcmp(argv[opt],"none") == 0) 
  		    *outputs = 0;
  		else
  		    fprintf(stderr,"Unknown report '%s'
  ",argv[opt]);
  		opt++;
  	    }
  	    /* backup if we've gone to far */
  	    if (opt < argc && *(argv[opt]) == '-') opt--;
  	    outset=1;
  	} else if (strcmp(argv[opt],"-C") == 0){
  	    opt ++;
  	    if (argc < opt + 1)
  	      do_exit("Not enough Confidence Report arguments",prog,1);
  	    while (opt < argc && *(argv[opt]) != '-'){
  		if (strcmp(argv[opt],"det") == 0) 
  		    BF_FLIP(*conf_outputs,CONF_OUT_DET);
  		else if (strcmp(argv[opt],"bhist") == 0) 
  		    BF_FLIP(*conf_outputs,CONF_OUT_BHIST);
  		else if (strcmp(argv[opt],"sbhist") == 0) 
  		    BF_FLIP(*conf_outputs,CONF_OUT_SBHIST);
  		else if (strcmp(argv[opt],"hist") == 0) 
  		    BF_FLIP(*conf_outputs,CONF_OUT_HIST);
  		else if (strcmp(argv[opt],"none") == 0) 
  		    *conf_outputs = CONF_OUT_NONE;
  		else
  		    fprintf(stderr,"Unknown confidence report '%s'
  ",
  			    argv[opt]);
  		opt++;
  	    }
  	    /* backup if we've gone to far */
  	    if (opt < argc && *(argv[opt]) == '-') opt--;
  	    outset=1;
  	} else if (strcmp(argv[opt],"-O") == 0){
  	    if (argc <= opt+1)do_exit("Output directory not specified",prog,1);
  	    *out_dir = argv[++opt];
  	} else if (strcmp(argv[opt],"-n") == 0){
  	    if (argc <= opt+1)do_exit("Output name not specified",prog,1);
  	    *out_name = argv[++opt];
  	} else if (strcmp(argv[opt],"-d") == 0){
  #if DIFF_ENABLED
  	    *use_diff = 1;
  #else
  	    do_exit("Alignments via diff have been disabled",prog,1);
  #endif
  	} else if (strcmp(argv[opt],"-s") == 0){
  	    *case_sense = 1;
  	} else if (strcmp(argv[opt],"-m") == 0){
  	    *stm2ctm_reduce = REDUCE_NOTHING;
  	    while (opt+1 < argc && *(argv[opt+1]) != '-') {
  		opt++;
  		if (strcmp(argv[opt],"ref") == 0)
  		    BF_SET(*stm2ctm_reduce,REDUCE_REF_SEGMENTS);
  		else if (strcmp(argv[opt],"hyp") == 0)
  		    BF_SET(*stm2ctm_reduce,REDUCE_HYP_WORDS);
  		else
  		    do_exit(rsprintf("Unrecognized -m option '%s'",
  				     argv[opt]),prog,1);
  	    }
  	    if (*stm2ctm_reduce == REDUCE_NOTHING)
  		*stm2ctm_reduce = REDUCE_REF_SEGMENTS;
  	} else if (strcmp(argv[opt],"-F") == 0){
  	    *frag_correct = 1;
  	} else if (strcmp(argv[opt],"-D") == 0){
  	    *opt_del = 1;
  	} else if (strcmp(argv[opt],"-c") == 0){
              *char_align = CALI_ON;
  	    while (opt+1 < argc && *(argv[opt+1]) != '-') {
  		opt++;
  		if (strcmp(argv[opt],"NOASCII") == 0)
  		    BF_SET(*char_align,CALI_NOASCII);
  		else if (strcmp(argv[opt],"DH") == 0)
  		    BF_SET(*char_align,CALI_DELHYPHEN);
  		else
  		    do_exit(rsprintf("Unrecognized character alignment "
  				     "option '%s'",argv[opt]),prog,1);
  	    }
  	} else if (strcmp(argv[opt],"-e") == 0){
  	    if (opt+1 >= argc || (opt+1 < argc && *(argv[opt+1]) == '-'))
  		do_exit("Argument required for character encoding
  ",prog,1);
  	    opt++;
  	    if (!TEXT_set_encoding(argv[opt]))
  		do_exit(rsprintf("Unrecognized character encoding "
  				 "option '%s'",argv[opt]),prog,1);
              // Parse the optional localization
  	    if (opt+1 < argc && *(argv[opt+1]) != '-'){
  	        if (!TEXT_set_lang_prof(argv[opt+1]))
  		    do_exit(rsprintf("Optional case conversion localization failed /%s/
  ",
  		            argv[opt+1]),prog,1);
    	        opt++;			 
    	     }
  	} else if (strcmp(argv[opt],"-p") == 0){
  	    *pipeout = 1;
  	    outset = 1;
  	} else if (strcmp(argv[opt],"-P") == 0){
  	    *pipein = 1;
  	} else if (strcmp(argv[opt],"-T") == 0){
  	    *time_align = 1;
  	} else if (strcmp(argv[opt],"-S") == 0){
  	    opt ++;
  	    if (argc < opt + 2) do_exit("Not enough Inferred Segmentation "
  					"arguments",prog,1);
  	    if (opt < argc && *(argv[opt]) != '-')
  		if (strcmp(argv[opt],"algo1") == 0) 
  		    *infered_wordseg ^= INF_SEG_ALGO1;
  	        else if (strcmp(argv[opt],"algo2") == 0) 
  		    *infered_wordseg ^= INF_SEG_ALGO2;
  		else 
  		    do_exit(rsprintf("Unrecognize inferred segmentation "
  				     "algorithm '%s'",argv[opt]),prog,1);
  	    else
  		do_exit("No Inferred Segmentation algorithm",prog,1);
  	    opt++;
  	    if (opt < argc && *(argv[opt]) != '-'){
  		if (stat(argv[opt],&fileinfo) != 0) 
  		    do_exit(rsprintf("Inferred Segmentation lexicon '%s'"
  				     " does not exist",argv[opt]),prog,1);
  		*lexicon = argv[opt];
  	    } else
  		do_exit("No Inferred Segmentation lexicon",prog,1);
  	    if (opt+1 < argc && *(argv[opt+1]) != '-'){
  		opt++;
  		if (strcmp(argv[opt],"ASCIITOO") == 0)
  		    BF_SET(*inf_flags,INF_ASCII_TOO);
  		else
  		    do_exit(rsprintf("Unrecognized Inferred Segmentation"
  				     " flag '%s'",argv[opt]),prog,1);
  	    }
  	} else if (strcmp(argv[opt],"-w") == 0){
  	    if (argc <= opt+1)do_exit("Word Weight list file not specified",prog,1);
  	    *wwl_file = argv[++opt];
  	} else {
              (void) fprintf(stderr,usage,prog);
              printf("Illegal argument: %s
  ",argv[opt]);
              exit(1);
          }
      }
  
      if (*pipeout){
  	*feedback = 0;
  	if (BF_isSET(*outputs,OUT_STDOUT))
  	    do_exit("Error: output can not be both to stdout and a pipe",
  		    prog,1);
      }
  
      /* set the default outputs */
      if (*outputs == 0 && !outset)
  	BF_SET(*outputs,OUT_SUM + OUT_STDOUT);
  
      /* reset feedback to 0 if STDOUT is used */
      if ((BF_isSET(*outputs,OUT_STDOUT) && outset) &&
  	(*feedback > 0 && !fbset)){
  	/* fprintf(stderr,"Feedback level changed to 0
  ");*/
  	*feedback = 0;
      }
  
      /* Check the input arguments */
      if (! *pipein && ! (*rname != (char *)0 && *nhyps > 0))
  	do_exit("Input not specified, use transcription input or "
  		"piped input",prog,1);
  
      if (*rname == (char *)0 && *nhyps > 0)
  	do_exit("Hypothesis input(s) specified, but no reference file",
  		prog,1);
      if (*rname != (char *)0 && *nhyps == 0)
  	do_exit("Reference input specified, but no Hypothesis input(s).",
  		prog,1);
  
      if (*rname != (char *)0 && *nhyps > 0){
  	if (stat(*rname,&fileinfo) != 0) 
  	    do_exit(rsprintf("Reference file '%s' does not exist",*rname),
  		    prog,1);
  
  	for (nh=0; nh < *nhyps; nh++)
  	    if (stat(hname[nh],&fileinfo) != 0) 
  		do_exit(rsprintf("Hypothesis file '%s' does not exist",
  				 hname[nh]),prog,1);
  
  	/* check the ref and hyp format arguments */
  	if (*rfmt == (char *)0) *rfmt = "trn";
  	if (! (strcmp(*rfmt,"trn") == 0 || strcmp(*rfmt,"ctm") == 0 || 
  	       strcmp(*rfmt,"tmk") == 0 || strcmp(*rfmt,"stm") == 0))
  	    do_exit(rsprintf("Reference file format '%s' not acceptable",*rfmt),
  		    prog,1);
  	
  	if (*hfmt == (char *)0) *hfmt = "trn";
  	for (i=0; i<*nhyps; i++)
  	    if (! (strcmp(hfmt[i],"trn") == 0 || strcmp(hfmt[i],"ctm") == 0 || 
  		   strcmp(hfmt[i],"tmk") == 0  || strcmp(hfmt[i],"txt") == 0)) 
  		do_exit(rsprintf("Hypothesis file format '%s' not acceptable",
  				 hfmt[i]),prog,1);
  	
  	/* check the id arguements */
  	if (id == (char *)0){
  	    if (strcmp(*rfmt,"trn") == 0)
  		do_exit("Required utterance id (option -i) for"
  			" transcript mode", prog,1);
  	    for (i=0; i<*nhyps; i++){
  		if (strcmp(hfmt[i],"trn") == 0)
  		    do_exit("Required utterance id (option -i) for"
  			    " transcript mode",prog,1);
  	    }
  	}
  
  	/* only do time alignment on ctm to ctm */
  	if (*time_align){
  	    if (strcmp(*rfmt,"ctm") != 0)
  		do_exit("Time-mediated alignments require the reference"
  			" file to be in CTM format",prog,1);
  	    for (i=0; i<*nhyps; i++)
  		if (strcmp(hfmt[i],"ctm") != 0)
  		    do_exit("Time-mediated alignments require the hypothesis"
  			    " file to be in CTM format",prog,1);
  	}
      }
      
      if (id != (char *)0){
  	if (strcmp(id,"swb") == 0)         *id_type=SWB;
  	else if (strcmp(id,"rm") == 0)     *id_type=RM;
  	else if (strcmp(id,"wsj") == 0)    *id_type=WSJ;
  	else if (strcmp(id,"spu_id") == 0) *id_type=SPUID;
          else if (strcmp(id,"sp") == 0)     *id_type=SP;
  	else
  	    do_exit(rsprintf("Utterance id type '%s' not acceptable",id),
  		    prog,1);
      }
  
      if (*stm2ctm_reduce){
  	if (strcmp(*rfmt,"stm") != 0)
  	    do_exit("-m flag requires stm format reference file",prog,1);
  	for (i=0; i<*nhyps; i++)
  	    if (strcmp(hfmt[i],"ctm") != 0)
  		do_exit("-m flag requires ctm format hypothesis file",prog,1);
  	if (*use_diff)
  	    do_exit("-m flag can not be used with -d flag",prog,1);
      }
  
      if (*char_align && (*use_diff ||
  		       (strcmp(*rfmt,"stm") == 0 && strcmp(*hfmt,"txt")==0)))
  	do_exit("Unable to do character alignments using diff
  ",prog,1);
      
      if (*wwl_file != (char *)0 && *lm_file != (char *)0)
        do_exit("Flags '-L' and '-w' are mutually exclusive
  ",prog,1);
  
      if (*wwl_file != (char *)0){
        if (*time_align) do_exit("Flags '-T' and '-w' are mutually exclusive
  ",prog,1);
        if (*char_align) do_exit("Flags '-c' and '-w' are mutually exclusive
  ",prog,1);
        if (*infered_wordseg) do_exit("Flags '-S' and '-w' are mutually exclusive
  ",prog,1);
        if (*use_diff) do_exit("Flags '-d' and '-w' are mutually exclusive
  ",prog,1);
      }
      if (*lm_file != (char *)0){
        if (*time_align) do_exit("Flags '-T' and '-L' are mutually exclusive
  ",prog,1);
        if (*char_align) do_exit("Flags '-c' and '-L' are mutually exclusive
  ",prog,1);
        if (*infered_wordseg) do_exit("Flags '-S' and '-L' are mutually exclusive
  ",prog,1);
        if (*use_diff) do_exit("Flags '-d' and '-L' are mutually exclusive
  ",prog,1);
      }
  
      if (BF_isSET(*outputs,OUT_WWS))
        if ((*wwl_file == (char *)0) && (*lm_file == (char *)0))
  	do_exit("-o wws option specified without a word weight file (via -w) or "
  		"LM file (via -L) specified
  ",
  		prog,1)	;
  }
  
  void do_exit(char *desc, char *prog, int ret){
      fprintf(stderr,usage,prog);
      fprintf(stderr,"
  %s: Error, %s
  
  ",prog,desc);
      exit(ret);
  }