options pagesize=150 linesize=68;
filename grafout 'g06.ps';
goptions device=pslmono gsfname=grafout 
   rotate=landscape gsfmode=append;
/*********************************************************/
/* Mark.A Multi-Way Analyses                             */
/*********************************************************/
title 'Shipping example, 1 categorical and 2 cts. covs.';
/**************************************************************/
/* Shipping Data: McCullagh and Nelder (1989) Generalized     */
/* Linear Models provide data on losses by a shipping insurer.*/
/* Data are grouped into putatively homogeneous classes, based*/
/* on ship type, start of construction 5 year period, star of */
/* observation 5 year period, ship months at risk, and count  */
/* of losses.  Empty categories are removed.                  */
/**************************************************************/
data ships; infile 'ships.dat'; 
   input type $ built  period  smar cases; lsmar=log(smar); run;
* Apply a Poisson regression to shipping data.          ;
proc genmod data=ships; class type; 
   model cases=type built period/dist=poisson offset=lsmar 
      type1; run;
* The above treats build as numeric.  Treat them as ;
* categorical instead.;
title 'Shipping example, 2 categorical covs.';
proc genmod data=ships; class type built period; 
   model cases=type built/dist=poisson offset=lsmar type1; run;
* Extends easily to higher-way tables.;
title 'Shipping example, 3 categorical covs.';
proc genmod data=ships; class type built period; 
   model cases=type built period/dist=poisson offset=lsmar; run;
/*********************************************************/
/* Mark.B Models with Interaction                        */
/*********************************************************/
title 'Result with interaction.  Note infinite ets.';
proc genmod data=ships; class type built period; 
   model cases=type*built /dist=poisson offset=lsmar; run;
title 'Death penalty data';
/**************************************************************/
/*          Death Penalty Example                             */
/* Death penalty data from Radelet, M. (1981), Racial         */
/* Characteristics and Imposition of the Death Penalty        */
/* American Sociological Review, 46, 918-927. Data reflect    */
/* outcomes of murder cases in which race of victim and       */
/* principle suspect are known.  1st column is indicator of   */
/* relationship between victim and suspect (P for close       */
/* relationship, N for not), Race of victim and defendant     */
/* (both either B or W; other races deleted), number of cases,*/
/* number of indictments, and number of death sentences.      */
/* Referenced by Moore (2000) Basic Practice of Statistics, p.*/
/* Following Moore, we will only use the non-primary cases.   */
/* http://stat.rutgers.edu/~kolassa/960-553/floridadeath.dat  */
/**************************************************************/
title1 'Death penalty data';
data death; infile 'floridadeath.dat';
   input rela $ rv $ rd $ nc ni nd ;
   rdc="White Defend.";if rd="B" then rdc="Black Defend.";
   rvc="White Victim"; if rv="B" then rvc="Black Victim";
   lc="Death"; we=nd; output ;
   lc="Live"; we=nc-nd; output;
run;
data nonprim; set death; if rela="P" then delete; run;
title 'Two parameterizations for Model ignoring race of victem';
* Note that I can get away with abbreviating Poisson poi ;
proc genmod data=nonprim; class rdc lc;
   model we=rdc*lc/dist=poi; run;
proc genmod data=nonprim; class rdc lc;
   model we=lc rdc rdc*lc/dist=poi; run;
title 'Simultaneously estimate all three common odds ratios';
proc genmod data=nonprim; class rdc lc rvc;
   model we=lc rdc rvc rdc*lc rdc*rvc lc*rvc/dist=poi; run;
/***********************************************************/
/*Compare OR estimate to exp of rdc*lc interaction in      */
/*Poisson regression with all 3-way interactions above.    */
/***********************************************************/
title 'Traditional estimates for comparison.';
proc freq data=nonprim; weight we; 
   table rvc*lc*rdc/nofreq nocum norow nocol nopct cmh; run;
/**************************************************************/
/*          Montana Nickel Smelter Example                    */
/* Causes of death for nickel smelters, from Breslow and Day  */
/* (1987) Statistical Models in Cancer Research V. II, Inter- */
/* national Agency for Research on Cancer.  The study is desc-*/
/* ribled on pp. 349ff.  Variables are age group, calendar    */
/* period, hire period, arsenic exposure, person-years at     */
/* risk, deaths from all causes, deaths from respiratory can- */
/* cer, deaths from all cancers, and deaths from cirulatory   */
/* disease.                                                   */
/* http://faculty.washington.edu/norm/BDdata/montana4.dat     */
/**************************************************************/
title 'Montana Nickel Smelter Data';
data montana; infile 'montana4.dat';
   input ageg period hire exp pyar alld rcd acd cd; 
   lpyar=log(pyar); run;
/**************************************************************/
/* Mark.C Proportional Mortality: Fit two Poisson regression  */
/* models, with offset.                                       */
/**************************************************************/
title 'Poisson regression for Resp. Cancer Deaths';
proc genmod data=montana; class ageg period hire;
   model rcd=period hire ageg exp/dist=pois offset=lpyar; run;
title 'Poisson regression for Circulatory Deaths';
proc genmod data=montana; class ageg period hire;
   model cd=period hire ageg exp/dist=Poisson offset=lpyar; run;
/**************************************************************/
/* Now add the two causes of death, and do prop. mortality.   */
/**************************************************************/
title 'Logistic regression for proportional mortality';
data montana; set montana; rcorc=rcd+cd; run;
proc genmod data=montana; class ageg period hire;
   model rcd/rcorc=period hire ageg exp/dist=binomial; run;
/**************************************************************/
/* Mark.D Logistic regregression for 2xK table.               */
/**************************************************************/
title 'Logistic regression for prostate date, ungrouped';
/*************************************************************/
/*  Prostate data                                            */
/* From Andrews and Herzberg (1985), Data: A Collection of   */
/* Problems from Many Fields for the Student and Research    */
/* Worker, Table 46.  Observations represent subjects in a   */
/* prostate cohort study, randomized to one of four dose     */
/* levels of diethylstilbestrol.  Rx records dose in four    */
/* ordered categories, with 1 being placebo.  Disease stage  */
/* is 3 or 4.  monfol is months of followup.  surv is 0 if   */
/* alive after 50 mo, and codes cause of death otherwise.    */
/* http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
/* The on-line version of the data set adds 3 fields before  */
/* the first field in the book.  Variables of interest are   */
/* stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
/* online version, resp.  Causes of death are given by var-  */
/* ious positive integers in surv; I recode these to 1.  The */
/* data file has more columns than we are interested in.  Put*/
/* place-holding variables in for variables not of interest  */
/* between variables of interest.  Data were previously pub- */
/* lished by Byar and Corle (1977 Chronic Disease) and Byar  */
/* and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
/* mized dose begins with blank to make it alphabetized      */
/* before high.                                              */
/*************************************************************/
* If we omit the ones past where the data of interest stops   ;
*  out of the data step, SAS will ignore them.                ;
data prostate; infile 'T46.1';
   input tableno subtableno lineno patno stage rx beginmo 
      beginday beginyear monfol surv;
   if surv>0 then alive=0; if surv=0 then alive=1; 
   dose=" low"; if rx>1 then dose="high"; 
   keep stage rx monfol alive dose; run;
data stage4; set prostate; if stage<4 then delete; run;
proc genmod data=stage4; class rx;
   model alive=rx/dist=binomial; run;
title 'Logistic regression for prostate date, grouped';
/* Demonstraton of logistic regression with data grouped.*/
proc freq data=stage4 noprint;
   table rx*alive/nofreq nocum norow nopct nocol
      out=stage4a; run;
proc genmod data=stage4a; class rx; weight count;
   model alive=rx/dist=binomial; run;
/**************************************************************/
/* Mark.F Probit Regression                                   */
/**************************************************************/
title 'Probit regression';
proc genmod data=stage4a; class rx; weight count;
   model alive=rx/dist=binomial link=probit; run;
/**************************************************************/
/* Mark.E Stratified 2x2 tables using logistic regression     */
/* Treat death penalty data as two 2x2 tables for lc vs rvc   */
/* stratified on rdc, or as two 2x2 tables for lc vs rdc      */
/* stratified on rvc.  The model without interactions presumes*/
/* the same OR between lc and each of the stratifiers/cov-    */
/* variates in both tables, and the coefficient of covariate  */
/* is the log of the odds ratio.  So the Wald test for each   */
/* covariate is an alternative to the Mantel-Hanszel test.    */
/* The test of the interaction is an alternative to the Bres- */
/* low-Day test for homogeity of odds ratios.  Note infinite  */
/* MLE for model with interactions.                           */
/**************************************************************/
title 'Death penalty log. reg. w/o interactions to give MH.';
* Each main effect corr. to an interaction with response in;
* Poisson model, and conditioning on covariates means condit-;
* ioning on covariate interactions.                          ;
proc genmod data=nonprim; class rdc lc rvc; weight we;
   model lc=rvc rdc /dist=binomial; run;
title 'Death penalty data with interactions';
proc genmod data=nonprim; class rdc lc rvc; weight we;
   model lc=rvc rdc rdc*rvc/dist=binomial type3; run;