/**********************************************************************/ /* */ /* FILE: signtest.c */ /* WRITTEN BY: Jonathan G. Fiscus */ /* DATE: April 14 1989 */ /* NATIONAL INSTITUTE OF STANDARDS AND TECHNOLOGY */ /* SPEECH RECOGNITION GROUP */ /* */ /* USAGE: This uses the rank structure to perform */ /* the non-parametric Sign Test and generates */ /* a report. */ /* */ /* SOURCE:Statistics: Basic Techniques for Solving */ /* Applied Problems, by Stephen A. Book, 1977 */ /* */ /* See Also: The documentation file "signtest.doc" */ /* */ /**********************************************************************/ #include "sctk.h" /****************************************************************/ /* main procedure to perform the sign test on the RANK */ /* structure. */ /****************************************************************/ void perform_signtest(RANK *rank, int verbose, int report, char *formula_str, char formula_id, int ***out_winner, char *outroot, int feedback, double ***out_confidence) { int comp1, comp2, **winner; double **confidence; FILE *fp = stdout; if (report || verbose){ char *f = rsprintf("%s.sign",outroot); if ((fp=(strcmp(outroot,"-") == 0) ? stdout : fopen(f,"w")) == (FILE *)0){ fprintf(stderr,"Warning: Open of %s for write failed. " "Using stdout instead.\n",f); fp = stdout; } else if (feedback >= 1) printf(" Output written to '%s'\n",f); } alloc_2dimZ(winner,rank->n_trt,rank->n_trt,int,NO_DIFF); alloc_2dimZ(confidence,rank->n_trt,rank->n_trt,double,0.0); *out_confidence = confidence; *out_winner = winner; for (comp1=0; comp1 < rank->n_trt -1; comp1++) for (comp2=comp1+1; comp2< rank->n_trt ; comp2++){ winner[comp1][comp2] = compute_signtest_for_treatment(rank,comp1,comp2,"Spkr", formula_str,verbose, formula_id=='E',fp, &(confidence[comp1][comp2])); } if (report) print_trt_comp_matrix_for_RANK_one_winner(winner,rank, "Comparison Matrix for the Sign Test",formula_str, "Speaker",fp); if (fp != stdout) fclose(fp); } /****************************************************************/ /* Given the two indexes of treatments to compare (in the */ /* RANK Struct) compute the Rank Sum statistics */ /* vars: */ /* zero_is_best : This identifies the "ideal" value for */ /* the value computed in the rank struct */ /* percentages. */ /****************************************************************/ int compute_signtest_for_treatment(RANK *rank, int treat1, int treat2, char *block_id, char *formula_str, int verbose, int zero_is_best, FILE *fp, double *confidence) { int sum_plus=0, sum_minus=0, sum_equal=0, i; int block, max_len_block=0, max_len_treat=6; int tptr[2]; /* a sorting pointer array for the treatment numbers */ char *pct_format, thresh_str[140]; TEXT *title_line = (TEXT *)0; char *diff_line; int paper_width = 79, rep_width, diff_col_len; double equal_thresh = 0.005; double sum_trt1=0.0, sum_trt2=0.0; /* compute the maximum block title length */ max_len_block = strlen(block_id); for (block=0; block n_blk ; block++) if (strlen(rank->blk_name [ block ] ) > max_len_block) max_len_block = strlen(rank->blk_name [ block ] ); if (max_len_treat < strlen(rank->trt_name [ treat1 ] )) max_len_treat = strlen(rank->trt_name [ treat1 ] ); if (max_len_treat < strlen(rank->trt_name [ treat2 ] )) max_len_treat = strlen(rank->trt_name [ treat2 ] ); /* set the treatment sorting array */ for (block=0; block n_blk ; block++){ sum_trt1 += rank->pcts [ block ][ treat1 ] ; sum_trt2 += rank->pcts [ block ][ treat2 ] ; } if (sum_trt1 > sum_trt2) tptr[0] = treat1, tptr[1] = treat2; else tptr[0] = treat2, tptr[1] = treat1; /* set up format strings and titles */ sprintf(thresh_str,"(Threshold for equal percentages +- %5.3f)", equal_thresh); alloc_singarr(pct_format, max_len_treat + 2,char); alloc_singarr(diff_line, max_len_treat*2 + 7,char); pct_format[0] = '\0'; strcat(pct_format,center("",(max_len_treat-6)/2)); strcat(pct_format,"%6.2f"); strcat(pct_format,center("",max_len_treat - ((max_len_treat-6)/2 + 6) )); sprintf(diff_line,"[%s - %s]",rank->trt_name[tptr[0]], rank->trt_name[tptr[1]]); diff_col_len = 15; if (strlen(diff_line) > diff_col_len) diff_col_len=strlen(diff_line); rep_width = max_len_block + max_len_treat * 2 + 4 * 3 + diff_col_len; /* Print a detailed table showing sign differences */ if (verbose) { sum_trt1 = sum_trt2 = 0.0; fprintf(fp,"%s\n",center("Sign Test Calculations Table Comparing", paper_width)); title_line = TEXT_strdup(rsprintf("%s %s Percentages for Systems %s and %s", block_id, formula_str,rank->trt_name[tptr[0]], rank->trt_name [ tptr[1] ] )); fprintf(fp,"%s\n",center(title_line,paper_width)); fprintf(fp,"%s\n\n",center(thresh_str,paper_width)); fprintf(fp,"%s",center("",(paper_width - rep_width)/2)); fprintf(fp,"%s",center("",max_len_block)); fprintf(fp," "); fprintf(fp,"%s",center("",max_len_treat)); fprintf(fp," "); fprintf(fp,"%s",center("",max_len_treat)); fprintf(fp," "); fprintf(fp,"%s\n",center("Difference Sign",diff_col_len)); fprintf(fp,"%s",center("",(paper_width - rep_width)/2)); fprintf(fp,"%s",center(block_id,max_len_block)); fprintf(fp," "); fprintf(fp,"%s",center(rank->trt_name [ tptr[0] ] ,max_len_treat)); fprintf(fp," "); fprintf(fp,"%s",center(rank->trt_name [ tptr[1] ] ,max_len_treat)); fprintf(fp," "); fprintf(fp,"%s\n",center(diff_line,diff_col_len)); fprintf(fp,"%s",center("",(paper_width - rep_width)/2)); for (i=0; in_blk ; block++){ if (verbose) { fprintf(fp,"%s",center("",(paper_width - rep_width)/2)); fprintf(fp,"%s",center(rank->blk_name [ block ] ,max_len_block)); fprintf(fp," "); fprintf(fp,pct_format,rank->pcts [ block ][ tptr[0] ] ); fprintf(fp," "); fprintf(fp,pct_format,rank->pcts [ block ][ tptr[1] ] ); fprintf(fp," "); } if (fabs(rank->pcts [ block ][ tptr[0] ] - rank->pcts [ block ][ tptr[1] ] ) <= equal_thresh){ if (verbose) fprintf(fp,"%s\n",center("0",diff_col_len)); sum_equal++; } else if (rank->pcts [ block ][ tptr[0] ] < rank->pcts [ block ][ tptr[1] ] ){ if (verbose) fprintf(fp,"%s\n",center("-",diff_col_len)); sum_minus++; } else { if (verbose) fprintf(fp,"%s\n",center("+",diff_col_len)); sum_plus++; } sum_trt1 += rank->pcts [ block ][ tptr[0] ] ; sum_trt2 += rank->pcts [ block ][ tptr[1] ] ; } if (verbose){ fprintf(fp,"%s",center("",(paper_width - rep_width)/2)); for (i=0; in_blk ); fprintf(fp," "); fprintf(fp,pct_format,sum_trt2 /rank->n_blk ); fprintf(fp,"\n\n"); sprintf(title_line,"No. Speakers with Positive %s Differences = N(+) = %2d",formula_str,sum_plus); fprintf(fp,"%s\n",center(title_line,paper_width)); sprintf(title_line,"No. Speakers with Negative %s Differences = N(-) = %2d",formula_str,sum_minus); fprintf(fp,"%s\n",center(title_line,paper_width)); sprintf(title_line,"No. Speakers with No %s Differences = N(0) = %2d",formula_str,sum_equal); fprintf(fp,"%s\n",center(title_line,paper_width)); fprintf(fp,"\n\n"); } free_singarr(pct_format,char); free_singarr(diff_line,char); if (title_line != (TEXT *)0) free_singarr(title_line, TEXT); /* Analyze the Results */ { int result; result = sign_test_analysis(sum_plus,sum_minus,sum_equal,"+","-",0, 0.05,verbose, rank->trt_name [ tptr[0] ] , rank->trt_name [ tptr[1] ] , tptr,zero_is_best,fp,confidence); /* if the result is significant, system which is better depends on if */ /* the treatments have been swapped, a negative result means tprt[0] is */ /* better, positive one tprt[1] is better */ return(result * ((tptr[0] == treat1) ? 1 : (-1))); } } /****************************************************************/ /* Given the vital numbers for computing a rank sum test, */ /* Compute it, and if requested, print a verbose analysis */ /****************************************************************/ int sign_test_analysis(int num_a, int num_b, int num_z, char *str_a, char *str_b, int str_z, double alpha, int verbose, char *treat1_str, char *treat2_str, int *tptr, int zero_is_best, FILE *fp, double *confidence) { double test_stat, p=0.5; int i; num_b += (num_z / 2) + (num_z % 2); num_a += (num_z / 2); num_z = 0; /* multiplication by 2 means it's a two-tailed test */ if (num_a != num_b) test_stat = 2.0 * compute_acc_binomial(MIN(num_a,num_b),num_a+num_b,p); else test_stat = 1.0; *confidence = test_stat; if (verbose){ fprintf(fp,"The NULL Hypothesis:\n\n"); fprintf(fp," The number of speakers for which the differences is positive\n"); fprintf(fp," equals the number of speakers for which the differences is\n"); fprintf(fp," negative.\n"); fprintf(fp," P(N(+)) = P(N(-)) = 0.50\n\n"); fprintf(fp,"Alternate Hypothesis:\n\n"); fprintf(fp," The number of speakers for which the differences is positive \n"); fprintf(fp," is NOT equal to the number of speakers for which the difference\n"); fprintf(fp," is negative.\n\n"); fprintf(fp,"Decision Analysis:\n\n"); fprintf(fp," Assumptions:\n"); fprintf(fp," A1: The distibution of positive and negative differences\n"); fprintf(fp," follows the binomial distribution for N fair coin tosses.\n"); fprintf(fp,"\n"); fprintf(fp," A2: In order to resolve the complication caused by cases where the\n"); fprintf(fp," difference in Word Accuracy is zero, half of the cases will\n"); fprintf(fp," be assigned to N(+) and half to N(-). In the event of an\n"); fprintf(fp," odd number of zero differences, N(-) will get one extra, this\n"); fprintf(fp," reduces the probability of there being a difference between\n"); fprintf(fp," the two systems.\n\n"); fprintf(fp," Rejection criterion:\n"); fprintf(fp," Reject the null hypothesis at the 95%% confidence level based\n"); fprintf(fp," on the following critical values table. N is the number of\n"); fprintf(fp," speakers being compared and N(-) is the number of negative\n"); fprintf(fp," differences.\n\n"); /* print a table of critical values */ fprintf(fp," MIN(N(-),N(+)) P(MIN(N(-),N(+)) | N=%2d)\n",num_a+num_b); fprintf(fp," -------------- ------------------------\n"); for (i=0; i <= (num_a+num_b)/2 && (i - 3) <= num_b; i++){ double val = 2.0*compute_acc_binomial(i,num_a+num_b,p), valp1 = 2.0*compute_acc_binomial(i+1,num_a+num_b,p); if (val >= 0.0005) fprintf(fp," %3d %5.3f", i,val); else fprintf(fp," %3d <0.001 ", i); if ((val < alpha) && (valp1 > alpha)) fprintf(fp," <--- Null Hypothesis rejected at or below this point\n"); else fprintf(fp,"\n"); } fprintf(fp,"\n"); fprintf(fp," Decision:\n"); fprintf(fp," There were N(-)=%d negative differences , the probability of\n",num_b); fprintf(fp," it occuring is %5.3f, therefore the null hypothesis ",test_stat); if (test_stat < alpha){ fprintf(fp,"is REJECTED\n"); fprintf(fp," in favor of the Alternate Hypothesis. Further, %s is the\n", (zero_is_best) ? treat2_str : treat1_str); fprintf(fp," better System.\n"); } else{ fprintf(fp,"is ACCEPTED\n"); fprintf(fp," There is no statistical difference between %s and %s\n",treat1_str,treat2_str); } form_feed(fp); } if (test_stat < alpha){ if (0) fprintf(fp,"Returning Result %d\n",TEST_DIFF * ((zero_is_best) ? 1 : -1)); return(TEST_DIFF * ((zero_is_best) ? 1 : -1)); } return(NO_DIFF); }