postscript("l11.ps");options(width=68)
#************************************************************/
#Results for 2017 NBA regular and pre season, to date, from */
#http://stats.nba.com/schedule/#!?PD=N .  Includes 1 pre-   */
#season game each vs. Brisbane Bullets, Melbourne United,   */
#Shanghai Sharks, Guanzhou Long-Lions, and 3 for Haifa      */
#Maccabi.  All resulted in NBA victory, although United was */
#close.                                                     */
#************************************************************/
nba<-read.csv("nba2017.dat",head=F,stringsAsFactors=F)
names(nba)<-c("type","v","vs","h","hs","Venue","st")
#*************************************************************/
# The model calls for as many regressors as there are compet-*/
# itors compared.  In this case we have 35 teams.  To auto-  */
# mate coding of the regressors, give each team a number. The*/
# number will be the rank of each alphabatized city name.    */
#*************************************************************/
count<-table(c(nba$h,nba$v))
ngames<-dim(nba)[1]
allteams<-sort(unique(c(nba$h,nba$v)))
nba$xmat<-array(0, c(ngames,length(allteams)))
dimnames(nba$xmat)<-list(NULL,allteams)
nba$exhibition<-rep(F,ngames)
exhibitionteams<-names(count[count<4])
for(jj in seq(ngames)){
  nba$xmat[jj,nba$h[jj]]<-1
  nba$xmat[jj,nba$v[jj]]<--1
  if(count[nba$h[jj]]<4) nba$exhibition[jj]<-T
  if(count[nba$v[jj]]<4) nba$exhibition[jj]<-T
}
glm((hs>vs)~xmat-1,data=nba,family=binomial)
glm((hs>vs)~xmat-1,family=binomial,
    subset=!nba$exhibition,data=nba)
#*******************************************************/
# Set up the logistic regression data set.  The resp-  */
# onse is 1 if the home team wins, and 0 if the visit- */
# ing team wins.  The covariate associated with the    */
# home team is +1, and that associated with the visit- */
# ors is -1.  We use the codes hom and vis created     */
# above, in conjunction with the array structure and   */
# looping, to avoid tedious if statements.             */
#*******************************************************/
ngames<-dim(nba)[1]
allteams<-sort(unique(c(nba$h,nba$v)))
nba$xmat<-array(0, c(ngames,length(allteams)))
dimnames(nba$xmat)<-list(NULL,allteams)
nba$exhibition<-rep(F,ngames)
exhibitionteams<-names(count[count<4])
for(jj in seq(ngames)){
  nba$xmat[jj,nba$h[jj]]<-1
  nba$xmat[jj,nba$v[jj]]<--1
  if(count[nba$h[jj]]<3) nba$exhibition[jj]<-T
  if(count[nba$v[jj]]<3) nba$exhibition[jj]<-T
}
#*************************************************************/
# Now run the models.  The first model is the basic pref     */
# -erence model.  The team with the largest fitted parameter */
# pis judged best by the model.  The team with the highest   */
# integer code is taken as baseline, and all other team      */
# results are relative to that one.                          */
#*************************************************************/
glm((hs>vs)~xmat,data=nba,family=binomial)
#Note that the exhibition teams don't change the fit
glm((hs>vs)~xmat,family=binomial,subset=!nba$exhibition,
  data=nba)
#************************************************************/
# The second model contains an intercept, allowing for home-*/
# field advantage.                                          */
#************************************************************/
glm((hs>vs)~xmat,data=nba,family=binomial)
#************************************************************/
#              Cow Mastitis Example                         */
# Data from Andrews and Herzberg (1985), Data: A Collection */
# of Problems from Many Fields for the Student and Research */
# Worker, Table 61.  Observations represent infections in   */
# udders of cows given a placebo or one of eight anti-      */
# biotics.  First two variables identify the table.  Third  */
# and fourth identify cow, fifth identifies herdd, sixth    */
# treatment, and next eight represent level of infection at */
# each of eight sites.  Treatment assignment is non-random. */
# http://lib.stat.cmu.edu/datasets/Andrews/T61.1            */
#************************************************************/
cows<-as.data.frame(scan("T61.1",flush=T,what=list(ex=0,   
  ta=0,c1=0,c2=0,herd=0,treaetment=0,q11=0,q12=0,q21=0))) 
#*************************************************************/
# Explore relation between the first four treatments and     */
# infection severity in udder 2 location 1 in two ways: both */
# variables treated as ordered, and both as unordered.       */
#*************************************************************/
cat('\n Exact Tests for Assoc. betw. treatment and infection  \n')
#*************************************************************/
#Do a "toy" example of logistic regression.                  */
#*************************************************************/
#R won't do exact logistic regression.
glm(c(1,1,0,0)~c(-1,1,-1,1),weight=c(7,3,3,7),family=binomial)
#*************************************************************/
#If we knew the intercept term, we would use it as an offset.*/
#The p-value depends on the value of this offset.            */
#*************************************************************/
#*************************************************************/
#Investigate actual size of unconditional test.              */
#*************************************************************/
cat('\n asy  \n')
glm(c(1,1,0,0)~c(-1,1,-1,1),weight=c(7,3,3,7),family=binomial)
fun.wald<- function(y1,y2,n1,n2){
  out<-glm(c(1,1,0,0)~c(-1,1,-1,1),weight=c(y1,y2,n1-y1,n2-y2),
     family=binomial)
  return(summary(out)$coef[2,4])
}
fun.allwald<- function (n1,n2) {
  out<-array(NA,c(n1,n2)+1)
  for(y1 in 0:n1) for(y2 in 0:n2) 
     out[y1+1,y2+1]<-fun.wald(y1,y2,n1,n2)
  return(out)
}
fun.probs<- function(npts,n1,n2) {
  out<-array(NA,c(npts,n1+1,n2+1))
  pi<-4*((1-npts)/2):((npts-1)/2)/(npts-1)
  pi<-exp(pi)/(1+exp(pi))
  for(y1 in 0:n1) for (y2 in 0:n2) 
     out[,y1+1,y2+1]<-dbinom(y1,n1,pi)*dbinom(y2,n2,pi)
  return(list(pi=pi,prob=out))
}
fun.size<-function(npts,n1,n2){
  wo<-fun.allwald(n1,n2)<=.05
  out<-rep(NA,npts)
  probs<-fun.probs(npts,n1,n2)
  for(i in seq(npts)) out[i]<-sum(probs$prob[i,,]*wo)
  return(list(pi=probs$pi,size=out))
}
out<-fun.size(201,10,10)
plot(out[[1]],out[[2]],xlab="Common Probability",
  ylab="Size of nominal .05 test",type="l",
  main="Size of Wald Test from Logistic Regression")