postscript("l06.ps");options(width=68)
#********************************************************/
# Mark.A Multi-Way Analyses                             */
#********************************************************/
cat('\n Shipping example, 1 categorical and 2 cts. covs.  \n')
#*************************************************************/
# Shipping Data: McCullagh and Nelder (1989) Generalized     */
# Linear Models provide data on losses by a shipping insurer.*/
# Data are grouped into putatively homogeneous classes, based*/
# on ship type, start of construction 5 year period, star of */
# observation 5 year period, ship months at risk, and count  */
# of losses.  Empty categories are removed.                  */
#*************************************************************/
ships<-read.table("ships.dat",col.names=c("type","built",
  "period","smar","cases"))
#You can save the result of a complicated model in R just as
#you can save any other object.  Printing the result of a 
#complicated model in R is generally not informative.
glmo<-glm(cases~type+built,family=poisson,data=ships)
cat("Result of printing glm output\n")
glmo
#Instead, print the results of a summary.
cat("Result of printing glm summary\n")
summary(glmo)
cat('\n Shipping example, 2 categorical covs.  \n')
summary(glm(cases~type+factor(built)+
  offset(log(smar)),family=poisson,data=ships))
cat('\n Shipping example, 3 categorical covs.  \n')
summary(glm(cases~type+factor(built)+factor(period)+
  offset(log(smar)),family=poisson,data=ships))
#********************************************************/
# Mark.B Models with Interaction                        */
#********************************************************/
cat('\n Result with interaction.  Note infinite ets.  \n')
summary(glm(cases~type*factor(built)+offset(log(smar)),
  family=poisson,data=ships))
cat('\n Death penalty data  \n')
#*************************************************************/
#          Death Penalty Example                             */
# Death penalty data from Radelet, M. (1981), Racial         */
# Characteristics and Imposition of the Death Penalty        */
# American Sociological Review, 46, 918-927. Data reflect    */
# outcomes of murder cases in which race of victim and       */
# principle suspect are known.  1st column is indicator of   */
# relationship between victim and suspect (P for close       */
# relationship, N for not), Race of victim and defendant     */
# (both either B or W; other races deleted), number of cases,*/
# number of indictments, and number of death sentences.      */
# Referenced by Moore (2000) Basic Practice of Statistics, p.*/
# Following Moore, we will only use the non-primary cases.   */
# http://stat.rutgers.edu/~kolassa/960-553/floridadeath.dat  */
#*************************************************************/
death1<-as.data.frame(scan("floridadeath.dat",         
  what=list(rela="",rv="",rd="",nc=0,ni=0,nd=0)))    
death2<-death1; death1$we<-death1$nd;death1$lc<-"Death" 
death2$we<-death1$nc-death1$nd;death2$lc<-"Live"
death<-rbind(death1,death2)
death$rdc<-c("White defend.","Black defend.")[(death$rd=="B")+1]
death$rvc<-c("White victim","Black victim")[(death$rv=="B")+1]
nonprim<-death[death$rela=="N",]                       
cat('\n Two parameterizations for Model ignoring race of victem  \n')
out<-glm(we~rdc+lc+rdc*lc,data=nonprim,family=poisson)
summary(out)
cat('\n Simultaneously estimate all three common odds ratios  \n')
out<-glm(we~rdc*lc+lc*rvc+rdc*rvc,data=nonprim,
  family=poisson)
summary(out)
#In mantelhaen.test last entry in model gives strata, and so
#estimated OR measures association betw rdc and lc.  Est-   
#imator is MH estimator.                                    
#**********************************************************/
#Compare OR estimate to exp of rdc*lc interaction in      */
#Poisson regression with all 3-way interactions above.    */
#**********************************************************/
cat('\n Traditional estimates for comparison.  \n')
mantelhaen.test(xtabs(we~rdc+lc+rvc,data=nonprim))
#*************************************************************/
#          Montana Nickel Smelter Example                    */
# Causes of death for nickel smelters, from Breslow and Day  */
# (1987) Statistical Models in Cancer Research V. II, Inter- */
# national Agency for Research on Cancer.  The study is desc-*/
# ribled on pp. 349ff.  Variables are age group, calendar    */
# period, hire period, arsenic exposure, person-years at     */
# risk, deaths from all causes, deaths from respiratory can- */
# cer, deaths from all cancers, and deaths from cirulatory   */
# disease.                                                   */
# http://faculty.washington.edu/norm/BDdata/montana4.dat     */
#*************************************************************/
montana<-as.data.frame(scan("montana4.dat",
  what=list(ageg=0,period=0,hire=0,exp=0,pyar=0,alld=0,
     rcd=0,acd=0,cd=0)))
montana$lpyar<-log(montana$pyar)
#*************************************************************/
# Mark.C Proportional Mortality: Fit two Poisson regression  */
# models, with offset.                                       */
#*************************************************************/
cat('\n Poisson regression for Resp. Cancer Deaths  \n')
summary(glm(rcd~factor(period)+factor(hire)+factor(ageg)+exp+
  +offset(lpyar),family=poisson,data=montana))$coef
cat('\n Poisson regression for Circulatory Deaths  \n')
summary(glm(cd~factor(period)+factor(hire)+factor(ageg)+exp+
  +offset(lpyar),family=poisson,data=montana))$coef
#*************************************************************/
# Now add the two causes of death, and do prop. mortality.   */
#*************************************************************/
cat('\n Logistic regression for proportional mortality  \n')
summary(glm(cbind(rcd,cd)~factor(period)+factor(hire)+
  factor(ageg)+exp,family=binomial,data=montana))$coef
#*************************************************************/
# Mark.D Logistic regregression for 2xK table.               */
#*************************************************************/
cat('\n Logistic regression for prostate date, ungrouped  \n')
#************************************************************/
#  Prostate data                                            */
# From Andrews and Herzberg (1985), Data: A Collection of   */
# Problems from Many Fields for the Student and Research    */
# Worker, Table 46.  Observations represent subjects in a   */
# prostate cohort study, randomized to one of four dose     */
# levels of diethylstilbestrol.  Rx records dose in four    */
# ordered categories, with 1 being placebo.  Disease stage  */
# is 3 or 4.  monfol is months of followup.  surv is 0 if   */
# alive after 50 mo, and codes cause of death otherwise.    */
# http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
# The on-line version of the data set adds 3 fields before  */
# the first field in the book.  Variables of interest are   */
# stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
# online version, resp.  Causes of death are given by var-  */
# ious positive integers in surv; I recode these to 1.  The */
# data file has more columns than we are interested in.  Put*/
# place-holding variables in for variables not of interest  */
# between variables of interest.  Data were previously pub- */
# lished by Byar and Corle (1977 Chronic Disease) and Byar  */
# and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
# mized dose begins with blank to make it alphabetized      */
# before high.                                              */
#************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them. 
prostate<-as.data.frame(scan("T46.1",flush=T,what=        
list(tableno=0,subtableno=0,lineno=0,patno=0,stage=0,rx=0,
beginmo=0,beginday=0,beginyear=0,monfol=0,surv=0)))[      
 ,c("stage","rx","monfol","surv")]                        
prostate$alive<-(prostate$surv==0)+0                      
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
stage4<-prostate[prostate$stage==4,]
stage4$frx<-factor(stage4$rx)
cat('\n Logistic regression for prostate date, grouped  \n')
summary(glm(alive~frx,family=binomial,data=stage4))$coef
# Demonstraton of logistic regression with data grouped.*/
stage4a<-as.data.frame(xtabs(~rx+alive,data=stage4))
stage4a$frx<-factor(stage4a$rx)
summary(glm(alive~frx,family=binomial,weight=Freq,
  data=stage4a))$coef
#*************************************************************/
# Mark.F Probit Regression                                   */
#*************************************************************/
cat('\n Probit regression  \n')
summary(glmout<-glm(alive~as.factor(rx),
family=binomial(link=probit),data=stage4a,weight=Freq))$coef
#*************************************************************/
# Mark.E Stratified 2x2 tables using logistic regression     */
# Treat death penalty data as two 2x2 tables for lc vs rvc   */
# stratified on rdc, or as two 2x2 tables for lc vs rdc      */
# stratified on rvc.  The model without interactions presumes*/
# the same OR between lc and each of the stratifiers/cov-    */
# variates in both tables, and the coefficient of covariate  */
# is the log of the odds ratio.  So the Wald test for each   */
# covariate is an alternative to the Mantel-Hanszel test.    */
# The test of the interaction is an alternative to the Bres- */
# low-Day test for homogeity of odds ratios.  Note infinite  */
# MLE for model with interactions.                           */
#*************************************************************/
cat('\n Death penalty log. reg. w/o interactions to give MH.  \n')
nonprim$surv<-(nonprim$lc=="Live")+0
nonprim$rvf<-factor(nonprim$rvc,labels=c("White","Black"))
nonprim$rdf<-factor(nonprim$rdc,labels=c("White","Black"))
summary(glm(surv~rvf+rdf,
  family=binomial,data=nonprim,weight=we))$coef
cat('\n Death penalty data with interactions  \n')
summary(glmout<-glm(surv~rvf*rdf,
  family=binomial,data=nonprim,weight=we))$coef
anova(glmout)
#*************************************************************/
# Mark.a Link comparison                                     */
#*************************************************************/
uuu<-(0:90)/30;vvv<-cbind(pnorm(uuu),1/(1+exp(-1.702*uuu)))
plot(range(uuu),range(vvv),type="n",ylab="Probability",
  xlab="Linear Predictor",
  main="Comparison of Probit and Logistic Link Functions")
for(jj in 1:2) lines(uuu,vvv[,jj],lty=jj)
legend(1.5,.8,lty=1:2,legend=c("Probit","Logit rescaled"))