postscript("l03.ps");options(width=68)
#*******************************************************/
# Mark.A Tests of homogeneity.                         */
#*******************************************************/
#*******************************************************/
# Test whether the ball frequency is the same across   */
# all sets of balls.                                   */
#*******************************************************/
#*******************************************************/
#          Lotto Example                               */
# CA lotto data from Friedman Pisani and Purvis (1998).*/
# Only one set of balls is used during each lottery    */
# drawing.  Columns represent sets of balls.  Rows rep-*/
# resent numbers 0-9.  Entries in table represent count*/
# of how often each ball appears.  Each set of balls   */
# was used 120 times, and so the expected number of    */
# times each ball comes up is 12.                      */
#*******************************************************/
lotto<-read.table("lotto.dat")                       
names(lotto)<-c("seta","setb","setc","setd")         
#R function test.chisq for contingency table testing 
#accepts data either as a matrix, or as two group    
#indicator variables.  It won't accept the SAS proc  
#freq format of two indicator variables and a fre-   
#quency variable.  To do this, you need to preprocess
#process with xtabs, to be exhibited later.          
chisq.test(as.matrix(lotto))                         
cat('\n Chi-square test of homogeneity for Lottery Data  \n') 
#************************************************************/
#  Prostate data                                            */
# From Andrews and Herzberg (1985), Data: A Collection of   */
# Problems from Many Fields for the Student and Research    */
# Worker, Table 46.  Observations represent subjects in a   */
# prostate cohort study, randomized to one of four dose     */
# levels of diethylstilbestrol.  Rx records dose in four    */
# ordered categories, with 1 being placebo.  Disease stage  */
# is 3 or 4.  monfol is months of followup.  surv is 0 if   */
# alive after 50 mo, and codes cause of death otherwise.    */
# http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
# The on-line version of the data set adds 3 fields before  */
# the first field in the book.  Variables of interest are   */
# stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
# online version, resp.  Causes of death are given by var-  */
# ious positive integers in surv; I recode these to 1.  The */
# data file has more columns than we are interested in.  Put*/
# place-holding variables in for variables not of interest  */
# between variables of interest.  Data were previously pub- */
# lished by Byar and Corle (1977 Chronic Disease) and Byar  */
# and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
# mized dose begins with blank to make it alphabetized      */
# before high.                                              */
#************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them. 
prostate<-as.data.frame(scan("T46.1",flush=T,what=        
list(tableno=0,subtableno=0,lineno=0,patno=0,stage=0,rx=0,
beginmo=0,beginday=0,beginyear=0,monfol=0,surv=0)))[      
 ,c("stage","rx","monfol","surv")]                        
prostate$alive<-(prostate$surv==0)+0                      
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
stage4<-prostate[prostate$stage==4,]
#******************************************************/
# Test whether survival is the same across treatments.*/
#******************************************************/
cat('\n Chi-square test of homogeneity for Prostate Data  \n') 
chisq.test(stage4$rx,stage4$alive)                   
#*******************************************************/
# Mark.B Fisher's exact test, and odds ratio CI.       */
#*******************************************************/
# fisher.test does exact test and gives exact CI.
cat('\n Fisher exact test for Prostate Data, dose 2 levels  \n')
fisher.test(stage4$dose,stage4$alive)                
mat<-table(stage4$dose,stage4$alive)    
# I don't think that there's a built-in that gives 
# the asymptotic confidence interval. 
exp(log((mat[1,1]*mat[2,2])/(mat[2,1]*mat[1,2]))+ 
   c(-1,1)*1.96*sqrt(sum(1/mat)))   
#************************************************/
# Mark.C Tests of Trend.                        */
#************************************************/
cat('\n Cochran-Armitage Test for Prostate Data  \n')
temp<-table(stage4$rx,stage4$alive)
prop.trend.test(temp[,1],temp[,1]+temp[,2])