postscript("l04.ps");options(width=68)
#************************************************************/
#  Prostate data                                            */
# From Andrews and Herzberg (1985), Data: A Collection of   */
# Problems from Many Fields for the Student and Research    */
# Worker, Table 46.  Observations represent subjects in a   */
# prostate cohort study, randomized to one of four dose     */
# levels of diethylstilbestrol.  Rx records dose in four    */
# ordered categories, with 1 being placebo.  Disease stage  */
# is 3 or 4.  monfol is months of followup.  surv is 0 if   */
# alive after 50 mo, and codes cause of death otherwise.    */
# http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
# The on-line version of the data set adds 3 fields before  */
# the first field in the book.  Variables of interest are   */
# stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
# online version, resp.  Causes of death are given by var-  */
# ious positive integers in surv; I recode these to 1.  The */
# data file has more columns than we are interested in.  Put*/
# place-holding variables in for variables not of interest  */
# between variables of interest.  Data were previously pub- */
# lished by Byar and Corle (1977 Chronic Disease) and Byar  */
# and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
# mized dose begins with blank to make it alphabetized      */
# before high.                                              */
#************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them. 
prostate<-as.data.frame(scan("T46.1",flush=T,what=        
list(tableno=0,subtableno=0,lineno=0,patno=0,stage=0,rx=0,
beginmo=0,beginday=0,beginyear=0,monfol=0,surv=0)))[      
 ,c("stage","rx","monfol","surv")]                        
prostate$alive<-(prostate$surv==0)+0                      
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
stage4<-prostate[prostate$stage==4,]
#********************************************************/
# Mark.A Ordered Association tests when both dimensions */
# may exceed 2                                          */
#********************************************************/
#Test for trend in various ways.  The correlation stat-  */
#istic generated by cmh uses the variance from the hyper-*/
#geometric distribution, and the C-A test, triggered by  */
#the trend keyword, uses the unconditional distribution. */
#Hence chi-square statistics differ by a factor of       */
#(n-1)/n.  Furthermore, you can use various scores.  The */
#default are a sequence of integers.                     */
#R featuers a wide variety of tests associated with the    
#names Cochran, Mantel, and Haenszel.  The only one I found
#that does a general test of association between two       
#variables, both with three or more categories, is CMHtst  
#in library vcdExtra.  If you are going to use it, you must
#install the library.                                      
cat('\n Prostate Data, correlation trend test, Ridit scores  \n')
library(vcdExtra)
temp<-table(stage4$rx,stage4$alive)
rs<-apply(temp,1,sum); rs<-c(0,cumsum(rs[-length(rs)]))+rs/2
CMHtest(temp,rscore=rs)                                    
cat('\n Prostate Data, Cochran-Armitage test of trend  \n')
prop.trend.test(temp[,1],temp[,1]+temp[,2],score=rs)
#*******************************************************/
# Mark.B Testing in the presence of Confounding.       */
# Compare odds ratio estimates ignoring and controling */
# for a third variable.                                */
#*******************************************************/
#*************************************************************/
#          Death Penalty Example                             */
# Death penalty data from Radelet, M. (1981), Racial         */
# Characteristics and Imposition of the Death Penalty        */
# American Sociological Review, 46, 918-927. Data reflect    */
# outcomes of murder cases in which race of victim and       */
# principle suspect are known.  1st column is indicator of   */
# relationship between victim and suspect (P for close       */
# relationship, N for not), Race of victim and defendant     */
# (both either B or W; other races deleted), number of cases,*/
# number of indictments, and number of death sentences.      */
# Referenced by Moore (2000) Basic Practice of Statistics, p.*/
# Following Moore, we will only use the non-primary cases.   */
# http://stat.rutgers.edu/~kolassa/960-553/floridadeath.dat  */
#*************************************************************/
death1<-as.data.frame(scan("floridadeath.dat",         
  what=list(rela="",rv="",rd="",nc=0,ni=0,nd=0)))    
death2<-death1; death1$we<-death1$nd;death1$lc<-"Death" 
death2$we<-death1$nc-death1$nd;death2$lc<-"Live"
death<-rbind(death1,death2)
death$rdc<-c("White defend.","Black defend.")[(death$rd=="B")+1]
death$rvc<-c("White victim","Black victim")[(death$rv=="B")+1]
nonprim<-death[death$rela=="N",]                       
m<-xtabs(we~rdc+lc,nonprim);m
chisq.test(m)                                         
# mantelhaen.test requires the stratifier variable to 
# be the last dimension. See mantelhaen.test document-
# ation for nontabulated observations.                
m<-xtabs(we~rdc+lc+rvc,nonprim);m
mantelhaen.test(m,correct=F)                          
#In this case, note large effect of continuity corr.  
mantelhaen.test(m)                          
cat('\n Death Penalty Data  \n')
#*******************************************************/
# Mark.C Estimation in the presence of Confounding.    */
# Compare odds ratio estimates ignoring and controling */
# for a third variable.                                */
#*******************************************************/
#R does MH estimation whenever it does MH testing for a;
#2x2xK table, and doesn't obviously give the logit est.;
#I won't bother to rerun the code.                     ;
#library(DescTools)#To get the BreslowDayTest
#BreslowDayTest(m)
#********************************************************/
# Mark.D: Test the strategy of testing for confounding, */
# and stratifying only if test says to.                 */
# Use h people at each level of the confounder, split   */
# between exposed and unexposed r1, h-r1 and  h-r1,r1   */
# The tables are                                        */
#   confounder=0                   confounder=1         */
#        Control Case             Control Case          */
# Unexp. ii       r1-ii   | r1    kk     h-r1-kk | h-r1 */
# Exp.   jj       h-r1-jj | h-r1  ll     r1-ll   | r1   */
# Total  ii+jj    h-ii-jj         kk+ll  h-kk-ll        */
#                                                       */
# Then confounder-exposure table is                     */
#            Confounder=0 Confounder=1                  */
# Unexposed  r1                   h-r1                  */
# Exposed    h-r1                 r1                    */
# We'll compare two procedures: always doing Mantel-    */
# Hanzel test, and only doing MH test when confounder   */
# is demonstrated related to case/control.  Table is    */
#       Control       Case                              */
# 0     ii+jj         h-ii-jj       | h                 */
# 1     kk+ll         h-kk-ll       | h                 */
# Total ii+jj+kk+ll   h-ii-jj-kk-ll                     */
# Do Chi-square test of H0:                             */
# 1: unit odds ratio accross strata. (CMH)              */
# 2: unit odds ratio, unstratified.                     */
# 3: no association between case/control and confounder.*/
# Arguments are:                                        */
# q1=P[Disease|Unexposed] for confounder low            */
# q2=P[Disease|  Exposed] for confounder low            */
# q3=P[Disease|Unexposed] for confounder high           */
# q4=P[Disease|  Exposed] for confounder high           */
# r1=number unexposed for confounder low                */
#********************************************************/
# Table probabilities are .6 ,.4 ,.4 ,.6   */
# Table probabilities are .4 ,.6 ,27/35,8/35.  */
fun.stratsize<-function(qv,cut,r1,h){
  cv<-c(1.96^2,1.96^2,cut)
  pv<-ind<-rep(NA,4)
  tt<-rep(NA,3)
  s<-c(0,0)
  m1<-h^2*(h-1)/(r1*(h-r1))
  for(ii in 0:r1){
     pv[1]<-dbinom(ii,r1,qv[1])
     for(jj in 0:(h-r1)){
        pv[2]<-dbinom(jj,h-r1,qv[2])
        e1<-r1*(ii+jj)/h
        for(kk in 0:(h-r1)){
           pv[3]<-dbinom(kk,h-r1,qv[3])
           for(ll in 0:r1){
              pv[4]<-dbinom(ll,r1,qv[4])
              tot <- ii+jj+kk+ll
              e2<-(h-r1)*(kk+ll)/h
              e3<-h*tot/(2*h)
              bot<-((ii+jj)*(h-ii-jj) +(kk+ll)*(h-kk-ll))
              tt[1]<-if(bot>0){
                 m1*((ii-e1)+(kk-e2))^2/bot}else{0}
              bv<-c(e3,h-e3,tot-e3,h-tot+e3)
              if(any(bv<=0)){
                 tt[2:3]<-0
              }else{
                 m2<-sum(1/bv)
                 tt[2]<-(ii+kk-e3)^2*m2
                 tt[3]<-(ii+jj-e3)^2*m2
              }
              for(mm in seq(3)) ind[mm]<-tt[mm]>cv[mm]
              ind[4]<-(ind[3]&ind[1])|((!ind[3])&ind[2])
              for(mm in seq(2))
                 s[mm]=s[mm]+prod(pv)*ind[3*mm-2]
           }#end for ll
        }#end for kk
     }#end for ll
  }#end for kk
  return(c(qv,r1,s))
}
fun.stratsize(c(.6,.6,.4,.4),3.84,15,40)
fun.stratsize(c(.6,.6,.4,.4),3.84,18,40)
fun.stratsize(c(.6,.6,.4,.4),3.84,10,40)
fun.stratsize(c(.6,.4,.4,.2286),3.84,15,40)