options pagesize=150 linesize=68;
filename grafout 'g03.ps';
goptions device=pslmono gsfname=grafout 
   rotate=landscape gsfmode=append;
/********************************************************/
/* Mark.A Tests of homogeneity.                         */
/********************************************************/
/********************************************************/
/* Test whether the ball frequency is the same across   */
/* all sets of balls.                                   */
/********************************************************/
/********************************************************/
/*          Lotto Example                               */
/* CA lotto data from Friedman Pisani and Purvis (1998).*/
/* Only one set of balls is used during each lottery    */
/* drawing.  Columns represent sets of balls.  Rows rep-*/
/* resent numbers 0-9.  Entries in table represent count*/
/* of how often each ball appears.  Each set of balls   */
/* was used 120 times, and so the expected number of    */
/* times each ball comes up is 12.                      */
/********************************************************/
data lotto; infile 'lotto.dat'; input seta setb setc setd;
   run;
* The data file is not formatted the way we will want to;
*  use them in this lecture.  Rather than having set    ;
* indicated by column, we want all of the counts in a   ;
* single column, with a separate variable constructed to;
* indicate set.                                         ;
data lotto; set lotto;
   count=seta; set=1; ball=_n_-1; output;
   count=setb; set=2; ball=_n_-1; output;
   count=setc; set=3; ball=_n_-1; output;
   count=setd; set=4; ball=_n_-1; output;
   keep count set ball; run;
* Calculations will be done using proc freq.  By default SAS;
* gives a table with row, column, and overall percentages.  ;
* Generally these won't be so useful for us and we will sup-;
* press them with norow nocol nopct.  The chisq option gives;
* Pearson and likelihood ratio chi sq., and a Mantel-       ;
* Haentzel chi square, which is the scored test for ordered ;
* data that we'll see later.  This ordered test is not      ;
* clearly relevant for this data set.  The expected option  ;
* prints out expected values in the table.                  ;
title 'Chi-square test of homogeneity for Lottery Data'; 
proc freq data=lotto; weight count; 
   table ball*set/norow nocol nopct chisq expected; run;
/*************************************************************/
/*  Prostate data                                            */
/* From Andrews and Herzberg (1985), Data: A Collection of   */
/* Problems from Many Fields for the Student and Research    */
/* Worker, Table 46.  Observations represent subjects in a   */
/* prostate cohort study, randomized to one of four dose     */
/* levels of diethylstilbestrol.  Rx records dose in four    */
/* ordered categories, with 1 being placebo.  Disease stage  */
/* is 3 or 4.  monfol is months of followup.  surv is 0 if   */
/* alive after 50 mo, and codes cause of death otherwise.    */
/* http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
/* The on-line version of the data set adds 3 fields before  */
/* the first field in the book.  Variables of interest are   */
/* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
/* online version, resp.  Causes of death are given by var-  */
/* ious positive integers in surv; I recode these to 1.  The */
/* data file has more columns than we are interested in.  Put*/
/* place-holding variables in for variables not of interest  */
/* between variables of interest.  Data were previously pub- */
/* lished by Byar and Corle (1977 Chronic Disease) and Byar  */
/* and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
/* mized dose begins with blank to make it alphabetized      */
/* before high.                                              */
/*************************************************************/
* If we omit the ones past where the data of interest stops   ;
*  out of the data step, SAS will ignore them.                ;
data prostate; infile 'T46.1';
   input tableno subtableno lineno patno stage rx beginmo 
      beginday beginyear monfol surv;
   if surv>0 then alive=0; if surv=0 then alive=1; 
   dose=" low"; if rx>1 then dose="high"; 
   keep stage rx monfol alive dose; run;
data stage4; set prostate; if stage<4 then delete; run;
/*******************************************************/
/* Test whether survival is the same across treatments.*/
/*******************************************************/
title 'Chi-square test of homogeneity for Prostate Data'; 
proc freq data=stage4;
   table rx*alive/norow nocol nopercent expected chisq; run;
/********************************************************/
/* Mark.B Fisher's exact test, and odds ratio CI.       */
/********************************************************/
* Adding the cmh option below, and applying this to a    ;
* 2x2 table, forces two variants of estimates and conf-  ;
* idence intervals for the odds ratios, marked Case-Con- ;
* trol.  These are defined generally for a series of tab-;
* les, and reduce to the same thing for a simgle table.  ;
* It also forces calculation of an additional chi-square ;
* test statistic, this one using the exact hypergeometric;
* variance formula with a slightly different denomonator.;
* Fisher's exact test is always done when asking for     ;
* chi-square test for 2x2 tables.  You can force it for  ;
* larger tables with fisher option, and general exact    ;
* calculation with exact option.  SAS/STAT doesn't do    ;
* exact CIs.  An extra-cost addon, LogXact, does these.  ;
title 'Fisher exact test for Prostate Data, dose 2 levels';
proc freq data=stage4;
   table dose*alive/norow nocol nopct expected chisq cmh; 
   run;
/*************************************************/
/* Mark.C Tests of Trend.                        */
/*************************************************/
title 'Cochran-Armitage Test for Prostate Data';
* Get more general trend test with cmh option, which;
* triggers the use of different scores.;
proc freq data=stage4; table rx*alive/noprint trend; run;