options pagesize=150 linesize=68;
filename grafout 'g04.ps';
goptions device=pslmono gsfname=grafout 
   rotate=landscape gsfmode=append;
/*************************************************************/
/*  Prostate data                                            */
/* From Andrews and Herzberg (1985), Data: A Collection of   */
/* Problems from Many Fields for the Student and Research    */
/* Worker, Table 46.  Observations represent subjects in a   */
/* prostate cohort study, randomized to one of four dose     */
/* levels of diethylstilbestrol.  Rx records dose in four    */
/* ordered categories, with 1 being placebo.  Disease stage  */
/* is 3 or 4.  monfol is months of followup.  surv is 0 if   */
/* alive after 50 mo, and codes cause of death otherwise.    */
/* http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
/* The on-line version of the data set adds 3 fields before  */
/* the first field in the book.  Variables of interest are   */
/* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
/* online version, resp.  Causes of death are given by var-  */
/* ious positive integers in surv; I recode these to 1.  The */
/* data file has more columns than we are interested in.  Put*/
/* place-holding variables in for variables not of interest  */
/* between variables of interest.  Data were previously pub- */
/* lished by Byar and Corle (1977 Chronic Disease) and Byar  */
/* and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
/* mized dose begins with blank to make it alphabetized      */
/* before high.                                              */
/*************************************************************/
* If we omit the ones past where the data of interest stops   ;
*  out of the data step, SAS will ignore them.                ;
data prostate; infile 'T46.1';
   input tableno subtableno lineno patno stage rx beginmo 
      beginday beginyear monfol surv;
   if surv>0 then alive=0; if surv=0 then alive=1; 
   dose=" low"; if rx>1 then dose="high"; 
   keep stage rx monfol alive dose; run;
data stage4; set prostate; if stage<4 then delete; run;
/*********************************************************/
/* Mark.A Ordered Association tests when both dimensions */
/* may exceed 2                                          */
/*********************************************************/
/*Test for trend in various ways.  The correlation stat-  */
/*istic generated by cmh uses the variance from the hyper-*/
/*geometric distribution, and the C-A test, triggered by  */
/*the trend keyword, uses the unconditional distribution. */
/*Hence chi-square statistics differ by a factor of       */
/*(n-1)/n.  Furthermore, you can use various scores.  The */
/*default are a sequence of integers.                     */
title 'Prostate Data, correlation trend test, Ridit scores';
proc freq data=stage4;
   table rx*alive/norow nocol nopercent expected 
     cmh scores=ridit; run;
title 'Prostate Data, Cochran-Armitage test of trend';
proc freq data=stage4;
   table rx*alive/norow nocol nopercent expected 
     trend scores=ridit; run;
/********************************************************/
/* Mark.B Testing in the presence of Confounding.       */
/* Compare odds ratio estimates ignoring and controling */
/* for a third variable.                                */
/********************************************************/
/**************************************************************/
/*          Death Penalty Example                             */
/* Death penalty data from Radelet, M. (1981), Racial         */
/* Characteristics and Imposition of the Death Penalty        */
/* American Sociological Review, 46, 918-927. Data reflect    */
/* outcomes of murder cases in which race of victim and       */
/* principle suspect are known.  1st column is indicator of   */
/* relationship between victim and suspect (P for close       */
/* relationship, N for not), Race of victim and defendant     */
/* (both either B or W; other races deleted), number of cases,*/
/* number of indictments, and number of death sentences.      */
/* Referenced by Moore (2000) Basic Practice of Statistics, p.*/
/* Following Moore, we will only use the non-primary cases.   */
/* http://stat.rutgers.edu/~kolassa/960-553/floridadeath.dat  */
/**************************************************************/
title1 'Death penalty data';
data death; infile 'floridadeath.dat';
   input rela $ rv $ rd $ nc ni nd ;
   rdc="White Defend.";if rd="B" then rdc="Black Defend.";
   rvc="White Victim"; if rv="B" then rvc="Black Victim";
   lc="Death"; we=nd; output ;
   lc="Live"; we=nc-nd; output;
run;
data nonprim; set death; if rela="P" then delete; run;
* SAS requires that one first sort by stratification var-;
* iables.  We'll then make three calls to freq, to       ;
* get aggregate, stratified, and controlled tests.  For  ;
* now ignore estimates.                                  ;
proc sort data=nonprim; by rvc; run;
title "Death Penalty Data";
proc freq data=nonprim; weight we; 
   table rdc*lc/norow nopct nocol chisq; run;
proc freq data=nonprim; weight we; by rvc;
   table rdc*lc/norow nopct nocol chisq; run;
proc freq data=nonprim; weight we; 
   table rvc*rdc*lc/noprint cmh; run;
/********************************************************/
/* Mark.C Estimation in the presence of Confounding.    */
/* Compare odds ratio estimates ignoring and controling */
/* for a third variable.                                */
/********************************************************/
* We'll make three more calls to freq, to get aggregate, ;
* stratified, and controlled estimates.  Suppress print- ;
* ing, but save estimates in an output data set to be    ;
* printed at the end.                                    ;
proc freq data=nonprim noprint; weight we;
   output out=o1 lgor mhor;
   table rdc*lc/chisq cmh; run;
data o1; set o1; rvc="Aggregated"; run;
proc freq data=nonprim noprint; weight we;
   output out=o2 lgor mhor; by rvc;
   table rdc*lc/chisq cmh; run;
proc freq data=nonprim noprint; weight we;
   output out=o3 lgor mhor;
   table rvc*rdc*lc/chisq cmh; run;
data o3; set o3; rvc="Adjusted"; run;
data out; set o1 o2 o3 ; run;
proc print data=out noobs; run;
/*********************************************************/
/* Mark.D: Test the strategy of testing for confounding, */
/* and stratifying only if test says to.                 */
/* Use h people at each level of the confounder, split   */
/* between exposed and unexposed r1, h-r1 and  h-r1,r1   */
/* The tables are                                        */
/*   confounder=0                   confounder=1         */
/*        Control Case             Control Case          */
/* Unexp. ii       r1-ii   | r1    kk     h-r1-kk | h-r1 */
/* Exp.   jj       h-r1-jj | h-r1  ll     r1-ll   | r1   */
/* Total  ii+jj    h-ii-jj         kk+ll  h-kk-ll        */
/*                                                       */
/* Then confounder-exposure table is                     */
/*            Confounder=0 Confounder=1                  */
/* Unexposed  r1                   h-r1                  */
/* Exposed    h-r1                 r1                    */
/* We'll compare two procedures: always doing Mantel-    */
/* Hanzel test, and only doing MH test when confounder   */
/* is demonstrated related to case/control.  Table is    */
/*       Control       Case                              */
/* 0     ii+jj         h-ii-jj       | h                 */
/* 1     kk+ll         h-kk-ll       | h                 */
/* Total ii+jj+kk+ll   h-ii-jj-kk-ll                     */
/* Do Chi-square test of H0:                             */
/* 1: unit odds ratio accross strata. (CMH)              */
/* 2: unit odds ratio, unstratified.                     */
/* 3: no association between case/control and confounder.*/
/* Arguments are:                                        */
/* q1=P[Disease|Unexposed] for confounder low            */
/* q2=P[Disease|  Exposed] for confounder low            */
/* q3=P[Disease|Unexposed] for confounder high           */
/* q4=P[Disease|  Exposed] for confounder high           */
/* r1=number unexposed for confounder low                */
/*********************************************************/

%macro stratsize(q1,q2,q3,q4,cut,r1,h);
data fake;
   do ii=0 to &r1;    p1=pmf('Binomial',ii,&q1,&r1);
   do jj=0 to &h-&r1; p2=pmf('Binomial',jj,&q2,&h-&r1);
   e1=&r1*(ii+jj)/&h;
   do kk=0 to &h-&r1; p3=pmf('Binomial',kk,&q3,&h-&r1);
   do ll=0 to &r1;    p4=pmf('Binomial',ll,&q4,&r1);
   tot=ii+jj+kk+ll;
   e2=(&h-&r1)*(kk+ll)/&h;
   e3=&h*tot/(2*&h);
   if (&h*ii)=(&r1*(ii+jj)) and (kk*&h)=((&h-&r1)*(kk+ll)) 
      then t1=0; else t1=((ii-e1)+(kk-e2))**2/(
      &r1*(&h-&r1)*(ii+jj)*(&h-ii-jj)/(&h**2*(&h-1)) 
      +&r1*(&h-&r1)*(kk+ll)*(&h-kk-ll)/(&h**2*(&h-1)));
   if (ii+kk-e3)=0 then t2=0; else t2=(ii+kk-e3)**2*
      (1/e3+1/(&h-e3)+1/(tot-e3)+1/(&h-tot+e3));
   if (ii+jj-e3)=0 then t3=0; else t3=(ii+jj-e3)**2*
      (1/e3+1/(&h-e3)+1/(tot-e3)+1/(&h-tot+e3));
   s1=p1*p2*p3*p4;
   s2=s1;
   if t1<1.96**2 then s1=0;
   if(t3<&cut) then do; if (t2<1.96**2) then s2=0; end;
   if(t3>=&cut) then do; if (t1<1.96**2) then s2=0; end;
   output;
   end; end; end; end;
run;
proc means data=fake sum noprint; var s1 s2; 
   output out=outs sum=; run;
data outs; set outs; q1=&q1; q2=&q2; q3=&q3; q4=&q4; 
   r1=&r1; run;
data outp; set outp outs; if q1=. then delete; run;
%mend stratsize;
data outp; run;
/* Table probabilities are .6 ,.4 ,.4 ,.6   */
%stratsize(.6,.6,.4,.4,3.84,15,40);
%stratsize(.6,.6,.4,.4,3.84,18,40);
%stratsize(.6,.6,.4,.4,3.84,10,40);
/* Table probabilities are .4 ,.6 ,27/35,8/35.  */
%stratsize(.6,.4,.4,.2286,3.84,15,40);
proc print data=outp noobs; var q1 q2 q3 q4 r1 s1 s2; run;