```postscript("l01.ps");options(width=68)
#*********************************************************/
#Mark.a Graphical Demonstration of Continuity Correction */
#*********************************************************/
nn<-10
p<-dbinom(0:nn,nn,.5)
hp<-apply(cbind(c(p,0),c(0,p)),1,"max")
plot(c(0,11)-.5,range(p)*c(1,1.2),type="n",
xlab="Potential Binomial Value",
ylab="Binomial Probabilities",
main="Demonstration of Continuity Correction")
segments((0:11)-.5,rep(0,12),(0:11)-.5,hp)
segments((0:nn)-.5,p,(0:nn)+.5,p)
xxx<-(-5:(10*nn+5))/10
lines(xxx,dnorm(xxx,.5*nn,sqrt(.5*.5*nn)))
x<-4
xx<-c(rep(0:x,rep(2,(x+1)))-.5,x,x)
yy<-c(0,rep(p[1:(x+1)],rep(2,(x+1))),0)
polygon(xx,yy,border=FALSE,density=10)
polygon(c(x,x+.5,x+.5,x),c(p[x+1],p[x+1],0,0),border=FALSE,
density=10,angle=-45)
legend(-.5,1.2*max(p),density=10,angle=c(45,-45),
legend=c("Area Approximated without cont. corr.",
"Additional area represented by cont. corr."))
#************************************************************/
#Mark.b General Continuous Confidence Interval Construction */
#************************************************************/
sp<-4
theta<-(0:150)/5
ql<-qbeta(.025,theta,sp);qu<-qbeta(.975,theta,sp)
plot(range(theta),range(c(ql,qu)),type="n",xlab="theta",
ylab="Observation",
main="Continuous Confidence Interval Example",
sub=paste("beta(theta,",sp,") Example"))
lines(theta,ql,lty=1)
lines(theta,qu,lty=1)
typical=mean(range(c(ql,qu)))*1.2
abline(h=typical,lty=2)
ep<-c(approx(ql,theta,typical)\$y, approx(qu,theta,typical)\$y)
for(jj in 1:2) segments(ep[jj],typical,ep[jj],0,lty=3)
legend(mean(theta),.2,lty=1:3,legend=c(".025, .975 quantiles",
"Typical observed value","Confidence interval endpoints"))
#**************************************************/
#Mark.c Discrete Confidence Interval Construction */
#**************************************************/
nn<-10
obsd<-5
epsilon<-.05
piv<-(1:9999)/10000
q<-cbind(qbinom(.025,10,piv),qbinom(.975,10,piv))
plot(range(piv),range(q),type="n",xlab="pi",ylab="Observed",
main="Discrete Confidence Interval Example")
for(ii in 1:2){
diffl<-diff(q[,ii])>.5
begin<-seq(length(piv))[c(TRUE,diffl)]
end<-seq(length(piv))[c(diffl,TRUE)]
for(jj in seq(length(end))){
segments(piv[begin[jj]],q[begin[jj],ii]+(2*ii-3)*epsilon,
piv[end[jj]],q[end[jj],ii]+(2*ii-3)*epsilon)
}
}
abline(h=obsd,lty=2)
cie<-c(min(piv[q[,2]==obsd]),max(piv[q[,1]==obsd]))
for(ii in c(1,2)) segments(cie[ii],obsd,cie[ii],0,lty=3)
legend(0,10,lty=c(1:2,NA,3,NA),legend=c(".025, .975 quantiles",
"Typical Observed","value","Confidence Interval",
"End Points"))
#************************************************************/
#Mark.d Table of Endpoints for Binomial Confidence Interval */
#************************************************************/
fun.approxnormalci<-function(x,nn,alpha=.05,
alternative="two.sided"){
se<-sqrt(x/nn*(1-x/nn)/nn)
if(substring(alternative,1,1)=="t"){
z<-qnorm(1-alpha/2)
ciends<-c(max(x/nn-z*se,0),min(x/nn+z*se,1))
sides<-""
}
if(substring(alternative,1,1)=="l"){
z<-qnorm(1-alpha)
ciends<-c(0,x/nn+z*se)
sides<-"One-sided"
}
if(substring(alternative,1,1)=="g"){
z<-qnorm(1-alpha)
ciends<-c(x/nn-z*se,1)
sides<-"One-sided"
}
return(ciends)
}
fun.coverageplot<-function(nn,alpha=.05,
alternative="two.sided",plot=TRUE){
pi<-(1:999)/1000
cover<-array(NA,c(length(pi),2))
ciends<-array(NA,c(2,2,nn+1))
dimnames(ciends)<-list(c("Lower","Upper"),
c("Normal","Exact"),as.character(0:nn))
for(x in 0:nn){
ciends[,1,x+1]<-fun.approxnormalci(x,nn,alpha=alpha,
alternative=alternative)
ciends[,2,x+1]<-binom.test(x,nn,
alternative=alternative,conf.level=1-alpha)\$conf.int
}
for(jj in seq(length(pi))){
ps<-dbinom(0:nn,nn,pi[jj])
hit<-array(FALSE,c(nn+1,2))
for(x in 0:nn){
for(ii in 1:2) if((ciends[1,ii,x+1]<=pi[jj])&(
ciends[2,ii,x+1]>=pi[jj])) hit[x+1,ii]<-TRUE
}
for(ii in 1:2) cover[jj,ii]<-sum(ps*hit[,ii])
}
if(plot){
plot(range(pi),range(cover),type="n",
xlab="True Probability",ylab="Coverage",
main=paste("Coverage for",1-alpha,
if(substring(alternative,1,1)=="t") "" else
"one-sided",
"Confidence Interval"),
sub=paste("Sample Size",nn))
lines(pi,cover[,1],lty=2); lines(pi,cover[,2],lty=1)
abline(h=1-alpha,lty=3)
legend(.3,.4,lty=1:3,
legend=c("Naive Normal","Exact","Nominal Level"))
}
return(ciends)
}
ciends<-fun.coverageplot(10,plot=FALSE)
show<-round(rbind(ciends[,1,1:6],ciends[,2,1:6]),3)
for(j in 1:2)  dimnames(show)[[1]][2*j+(-1:0)]<-paste(
dimnames(ciends)[[2]][j],dimnames(show)[[1]][2*j+(-1:0)])
show
#***********************************************************/
#Mark.e Coverage of One-Sided Binomial Confidence Interval */
#***********************************************************/
ciends<-fun.coverageplot(10,alpha=.1,alternative="g")
#**********************************************************/
#Mark.f Coverage of Two-Sided Binomial Confidence Interval*/
#**********************************************************/
ciends<-fun.coverageplot(10,alpha=.1)
#************************************************************/
# Mark.H Confidence interval for real data                  */
#************************************************************/
cat('\n Binomial Confidence Interval for cancer death  \n')
#************************************************************/
#  Prostate data                                            */
# From Andrews and Herzberg (1985), Data: A Collection of   */
# Problems from Many Fields for the Student and Research    */
# Worker, Table 46.  Observations represent subjects in a   */
# prostate cohort study, randomized to one of four dose     */
# levels of diethylstilbestrol.  Rx records dose in four    */
# ordered categories, with 1 being placebo.  Disease stage  */
# is 3 or 4.  monfol is months of followup.  surv is 0 if   */
# alive after 50 mo, and codes cause of death otherwise.    */
# http://lib.stat.cmu.edu/datasets/Andrews/T46.1            */
# The on-line version of the data set adds 3 fields before  */
# the first field in the book.  Variables of interest are   */
# stage, rx, monfol, and surv in fields 5, 6, 10, 11 of the */
# online version, resp.  Causes of death are given by var-  */
# ious positive integers in surv; I recode these to 1.  The */
# data file has more columns than we are interested in.  Put*/
# place-holding variables in for variables not of interest  */
# between variables of interest.  Data were previously pub- */
# lished by Byar and Corle (1977 Chronic Disease) and Byar  */
# and Green (1980 Bull. Cancer).  Lower value of dichoto-   */
# mized dose begins with blank to make it alphabetized      */
# before high.                                              */
#************************************************************/
# If we omit the ones past where the data of interest stops
# in the "what" list, and use flush=T, R will ignore them.
prostate<-as.data.frame(scan("T46.1",flush=T,what=
list(tableno=0,subtableno=0,lineno=0,patno=0,stage=0,rx=0,
beginmo=0,beginday=0,beginyear=0,monfol=0,surv=0)))[
,c("stage","rx","monfol","surv")]
prostate\$alive<-(prostate\$surv==0)+0
prostate\$dose<-c(" low","high")[1+(prostate\$rx>1)]
stage4<-prostate[prostate\$stage==4,]
binom.test(sum(prostate\$alive),length(prostate\$alive))
#An alternate approach using the F distribution
fun.exactbin<-function(x0,x1,level=.95){
alpha<-1-level
fv<-if(x1==0) 1 else fv<-qf(alpha/2,2*x0+2,2*x1,
lower.tail=FALSE)
fu<-if(x0==0) 1 else fv<-qf(alpha/2,2*x1+2,2*x0,
lower.tail=FALSE)
return(c(x0/(x0+(x1+1)*fu),((x0+1)*fv)/(x1+(x0+1)*fv)))
}
fun.exactbin(sum(prostate\$alive),sum(1-prostate\$alive))
#*********************************************************/
#Mark.g Confidence Region for Two Multinomail Parameters */
#*********************************************************/
fun.elipse<-function(mu,A,cc,n=1000){
pi<-4*atan(1)
aaa<-rbind(cos(2*pi*(0:n)/n),sin(2*pi*(0:n)/n))
return(mu+solve(chol(A/cc))%*%aaa)
}
fun.cscont<-function(n,piv,alpha,lty){
mu<-piv*n
if(!is.na(alpha)){
A<-(diag(1/piv)+array(1,c(2,2))/(1-sum(piv)))*2/n
cc<-qchisq(1-alpha,2)
eo<-fun.elipse(mu,A,cc)
lines(x=eo[1,],y=eo[2,],lty=lty)
}else{
points(x=mu[1],y=mu[2],pch=lty)
}
}
fun.threecat<-function(n,piv,alpha=NA){
if(!is.array(piv)) piv<-array(piv,c(1,2))
plot(c(0,n),c(0,n),type="n",xlab="X1",ylab="X2",
sub=paste("alpha=",alpha,
"3 category multinomial,",n," items"),
main="Rejection Region for Chi-Square Tests")
xv<-outer(0:n,rep(1,n+1))
yv<-outer(rep(1,n+1),0:n)
sv<-(xv-n/3)^2/(n/3)+(yv-n/3)^2/(n/3)+(n-xv-yv-n/3)^2/(n/3)
use<-xv+yv<=(n+.5)
xv<-xv[use]
yv<-yv[use]
points(xv,yv)
for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],alpha,jj+1)
for(jj in seq(dim(piv)[1])) fun.cscont(n,piv[jj,],NA   ,jj+1)
levec<-c("Pi",apply(round(piv,3),1,paste,collapse=","))
legend(.4*n,.95*n,legend=levec,pch=c(0,2:(dim(piv)[1]+1)))
legend(.5*n,.75*n,legend=levec,lty=c(0,2:(dim(piv)[1]+1)))
}
#fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4))
fun.threecat(13,rbind(rep(1,2)/4,rep(1,2)/3,c(1,2)/4),.05)
```