postscript("l01.ps");options(width=68)
#*********************************************************/
#Mark.a Graphical Demonstration of Continuity Correction */
#*********************************************************/
nn<-10
p<-dbinom(0:nn,nn,.5)
hp<-apply(cbind(c(p,0),c(0,p)),1,"max")
plot(c(0,11)-.5,range(p)*c(1,1.2),type="n",
xlab="Potential Binomial Value",
  ylab="Binomial Probabilities",
  main="Demonstration of Continuity Correction")
segments((0:11)-.5,rep(0,12),(0:11)-.5,hp)
segments((0:nn)-.5,p,(0:nn)+.5,p)
xxx<-(-5:(10*nn+5))/10
lines(xxx,dnorm(xxx,.5*nn,sqrt(.5*.5*nn)))
x<-4
xx<-c(rep(0:x,rep(2,(x+1)))-.5,x,x)
yy<-c(0,rep(p[1:(x+1)],rep(2,(x+1))),0)
polygon(xx,yy,border=FALSE,density=10)
polygon(c(x,x+.5,x+.5,x),c(p[x+1],p[x+1],0,0),border=FALSE,
 density=10,angle=-45)
legend(-.5,1.2*max(p),density=10,angle=c(45,-45),
  legend=c("Area Approximated without cont. corr.",
   "Additional area represented by cont. corr."))
#************************************************************/
#Mark.b General Continuous Confidence Interval Construction */
#************************************************************/
sp<-4
theta<-(0:150)/5
ql<-qbeta(.025,theta,sp);qu<-qbeta(.975,theta,sp)
plot(range(theta),range(c(ql,qu)),type="n",xlab="theta",
  ylab="Observation",
  main="Continuous Confidence Interval Example",
  sub=paste("beta(theta,",sp,") Example"))
lines(theta,ql,lty=1)
lines(theta,qu,lty=1)
typical=mean(range(c(ql,qu)))*1.2
abline(h=typical,lty=2)
ep<-c(approx(ql,theta,typical)$y, approx(qu,theta,typical)$y)
for(jj in 1:2) segments(ep[jj],typical,ep[jj],0,lty=3)
legend(mean(theta),.2,lty=1:3,legend=c(".025, .975 quantiles",
  "Typical observed value","Confidence interval endpoints"))
#**************************************************/
#Mark.c Discrete Confidence Interval Construction */
#**************************************************/
nn<-10
obsd<-5
epsilon<-.05
piv<-(1:9999)/10000
q<-cbind(qbinom(.025,10,piv),qbinom(.975,10,piv))
plot(range(piv),range(q),type="n",xlab="pi",ylab="Observed",
  main="Discrete Confidence Interval Example")
for(ii in 1:2){
  diffl<-diff(q[,ii])>.5
  begin<-seq(length(piv))[c(TRUE,diffl)]
  end<-seq(length(piv))[c(diffl,TRUE)]
  for(jj in seq(length(end))){
     segments(piv[begin[jj]],q[begin[jj],ii]+(2*ii-3)*epsilon,
     piv[end[jj]],q[end[jj],ii]+(2*ii-3)*epsilon)
  }
}
abline(h=obsd,lty=2)
cie<-c(min(piv[q[,2]==obsd]),max(piv[q[,1]==obsd]))
for(ii in c(1,2)) segments(cie[ii],obsd,cie[ii],0,lty=3)
legend(0,10,lty=c(1:2,NA,3,NA),legend=c(".025, .975 quantiles",
  "Typical Observed","value","Confidence Interval",
  "End Points"))
#************************************************************/
#Mark.d Table of Endpoints for Binomial Confidence Interval */
#************************************************************/
fun.approxnormalci<-function(x,nn,alpha=.05,
     alternative="two.sided"){
  se<-sqrt(x/nn*(1-x/nn)/nn)
  if(substring(alternative,1,1)=="t"){
     z<-qnorm(1-alpha/2)
     ciends<-c(max(x/nn-z*se,0),min(x/nn+z*se,1))
     sides<-""
  }
  if(substring(alternative,1,1)=="l"){
     z<-qnorm(1-alpha)
     ciends<-c(0,x/nn+z*se)
     sides<-"One-sided"
  }
  if(substring(alternative,1,1)=="g"){
     z<-qnorm(1-alpha)
     ciends<-c(x/nn-z*se,1)
     sides<-"One-sided"
  }
  return(ciends)
}
fun.coverageplot<-function(nn,alpha=.05,
     alternative="two.sided",plot=TRUE){
  pi<-(1:999)/1000
  cover<-array(NA,c(length(pi),2))
  ciends<-array(NA,c(2,2,nn+1))
  dimnames(ciends)<-list(c("Lower","Upper"),
     c("Normal","Exact"),as.character(0:nn))
  for(x in 0:nn){
     ciends[,1,x+1]<-fun.approxnormalci(x,nn,alpha=alpha,
        alternative=alternative)
     ciends[,2,x+1]<-binom.test(x,nn,
        alternative=alternative,conf.level=1-alpha)$conf.int
  }
  for(jj in seq(length(pi))){
     ps<-dbinom(0:nn,nn,pi[jj])
     hit<-array(FALSE,c(nn+1,2))
     for(x in 0:nn){
        for(ii in 1:2) if((ciends[1,ii,x+1]<=pi[jj])&(
           ciends[2,ii,x+1]>=pi[jj])) hit[x+1,ii]<-TRUE
     }
     for(ii in 1:2) cover[jj,ii]<-sum(ps*hit[,ii])
  }
  if(plot){
     plot(range(pi),range(cover),type="n",
        xlab="True Probability",ylab="Coverage",
        main=paste("Coverage for",1-alpha,
           if(substring(alternative,1,1)=="t") "" else 
             "one-sided",
        "Confidence Interval"),
        sub=paste("Sample Size",nn))
     lines(pi,cover[,1],lty=2); lines(pi,cover[,2],lty=1)
     abline(h=1-alpha,lty=3)
     legend(.3,.4,lty=1:3,
         legend=c("Naive Normal","Exact","Nominal Level"))
  }
  return(ciends)
}
ciends<-fun.coverageplot(10,plot=FALSE)
show<-round(rbind(ciends[,1,1:6],ciends[,2,1:6]),3)
for(j in 1:2)  dimnames(show)[[1]][2*j+(-1:0)]<-paste(
  dimnames(ciends)[[2]][j],dimnames(show)[[1]][2*j+(-1:0)])
show
#***********************************************************/
#Mark.e Coverage of One-Sided Binomial Confidence Interval */
#***********************************************************/
ciends<-fun.coverageplot(10,alpha=.1,alternative="g")
#**********************************************************/
#Mark.f Coverage of Two-Sided Binomial Confidence Interval*/
#**********************************************************/
ciends<-fun.coverageplot(10,alpha=.1)
#************************************************************/
# Mark.H Confidence interval for real data                  */
#************************************************************/
cat('\n Binomial Confidence Interval for cancer death  \n')
#************************************************************/
#  Prostate data                                            */
# From Andrews and Herzberg (1985), Data: A Collection of   */
# Problems from Many Fields for the Student and Research    */
# Worker, Table 46.  Observations represent subjects in a   */
# prostate cohort study, randomized to one of four dose     */
# levels of diethylstilbestrol.  Rx records dose in four    */
# ordered categories, with 1 being placebo.  Disease stage  */
# is 3 or 4.  monfol is months of followup.  surv is 0 if   */
# alive after 50 mo, and codes cause of death otherwise.    */
# http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
# The on-line version of the data set adds 3 fields before  */
# the first field in the book.  Variables of interest are   */
# stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
# online version, resp.  Causes of death are given by var-  */
# ious positive integers in surv; I recode these to 1.  The */
# data file has more columns than we are interested in.  Put*/
# place-holding variables in for variables not of interest  */
# between variables of interest.  Data were previously pub- */
# lished by Byar and Corle (1977 Chronic Disease) and Byar  */
# and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
# mized dose begins with blank to make it alphabetized      */
# before high.                                              */
#************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them. 
prostate<-as.data.frame(scan("T46.1",flush=T,what=        
list(tableno=0,subtableno=0,lineno=0,patno=0,stage=0,rx=0,
beginmo=0,beginday=0,beginyear=0,monfol=0,surv=0)))[      
 ,c("stage","rx","monfol","surv")]                        
prostate$alive<-(prostate$surv==0)+0                      
prostate$dose<-c(" low","high")[1+(prostate$rx>1)]         
stage4<-prostate[prostate$stage==4,]
binom.test(sum(prostate$alive),length(prostate$alive))
#An alternate approach using the F distribution
fun.exactbin<-function(x0,x1,level=.95){
  alpha<-1-level
  fv<-if(x1==0) 1 else fv<-qf(alpha/2,2*x0+2,2*x1,
     lower.tail=FALSE)
  fu<-if(x0==0) 1 else fv<-qf(alpha/2,2*x1+2,2*x0,
     lower.tail=FALSE)
  return(c(x0/(x0+(x1+1)*fu),((x0+1)*fv)/(x1+(x0+1)*fv)))
}
fun.exactbin(sum(prostate$alive),sum(1-prostate$alive))
#*********************************************************/
#Mark.g Confidence Region for Two Multinomail Parameters */
#*********************************************************/
fun.elipse<-function(mu,A,cc,n=1000){
  pi<-4*atan(1)
  aaa<-rbind(cos(2*pi*(0:n)/n),sin(2*pi*(0:n)/n))
  return(mu+solve(chol(A/cc))%*%aaa)
}
fun.cscont<-function(n,piv,alpha,lty){
  mu<-piv*n
  if(!is.na(alpha)){
     A<-(diag(1/piv)+array(1,c(2,2))/(1-sum(piv)))*2/n
     cc<-qchisq(1-alpha,2)
     eo<-fun.elipse(mu,A,cc)
     lines(x=eo[1,],y=eo[2,],lty=lty)
  }else{
     points(x=mu[1],y=mu[2],pch=lty)
  }
}
fun.threecat<-function(n,piv,alpha=NA){
 if(!is.array(piv)) piv<-array(piv,c(1,2))
 plot(c(0,n),c(0,n),type="n",xlab="X1",ylab="X2",
    sub=paste("alpha=",alpha,
      "3 category multinomial,",n," items"),
    main="Rejection Region for Chi-Square Tests")
 xv<-outer(0:n,rep(1,n+1))
 yv<-outer(rep(1,n+1),0:n)
 sv<-(xv-n/3)^2/(n/3)+(yv-n/3)^2/(n/3)+(n-xv-yv-n/3)^2/(n/3)
 use<-xv+yv<=(n+.5)
 xv<-xv[use]
 yv<-yv[use]
 points(xv,yv)
 for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],alpha,jj+1)
 for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],NA   ,jj+1)
 levec<-c("Pi",apply(round(piv,3),1,paste,collapse=","))
 legend(.4*n,.95*n,legend=levec,pch=c(0,2:(dim(piv)[1]+1)))
 legend(.5*n,.75*n,legend=levec,lty=c(0,2:(dim(piv)[1]+1)))
}
#fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4))
fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4),.05)