options pagesize=150 linesize=68;
filename grafout 'g11.ps';
goptions device=pscolor gsfname=grafout 
   rotate=landscape gsfmode=append;
/*************************************************************/
/*http://stats.nba.com/schedule/#!?PD=N .  Includes 1 pre-   */
/*season game each vs. Brisbane Bullets, Melbourne United,   */
/*Shanghai Sharks, Guanzhou Long-Lions, and 3 for Haifa      */
/*Maccabi.  All resulted in NBA victory, although United was */
/*close.                                                     */
/*************************************************************/
data nba; infile "nba2017.dat" dlm=",";
    input season $ v $ vs h $ hs ve $ st$; run;
proc print data=nba; run;
/**************************************************************/
/* The model calls for as many regressors as there are compet-*/
/* itors compared.  In this case we have 35 teams.  To auto-  */
/* mate coding of the regressors, give each team a number. The*/
/* number will be the rank of each alphabatized city name.    */
/**************************************************************/
* Get a list of all names, by making separate lists of home;
* away names.  Then these together using a set statment and;
* sort and remove duplicates.  I could have done this more;
* easliy keeping in mind that the exhibition teams were all;
* visitors, but I didn't do this.  I store both home and ;
* away team names in the variable v.;
proc sort data=nba; by v; run;
data namesv; set nba; by v; if first.v then; else delete; 
   keep v; run;
proc sort data=nba; by h; run;
data namesh; set nba; by h; if first.h then; else delete; v=h; 
   keep v; run;
data allnames; set namesh namesv; run;
proc sort data=allnames; by v; run;
data allnames; set allnames ; by v; if first.v then; else delete; run;
* Number the teams in alphabetical order.  Note 76rs are first.;
data allnames; set allnames ; vi=_n_; run;
/********************************************************/
/* Set up the logistic regression data set.  The resp-  */
/* onse is 1 if the home team wins, and 0 if the visit- */
/* ing team wins.  The covariate associated with the    */
/* home team is +1, and that associated with the visit- */
/* ors is -1.  We use the codes hom and vis created     */
/* above, in conjunction with the array structure and   */
/* looping, to avoid tedious if statements.             */
/********************************************************/
* Add number to file by merging.  Note that nba first must be;
* sorted.  First the visitors;
proc sort data=nba; by v; run;
data nba; merge nba allnames; by v; run;
* Now add the number for the home team;
data allnames; set allnames; h=v; hi=vi; keep h hi; run;
proc sort data=nba; by h; run;
data nba; merge nba allnames; by h; if vs=. then delete; run;
* This macro grabs the name out of the allnames data set;
%macro vlabel();
   %local dsid getvalue i ;
   %let dsid=%sysfunc(open(allnames));
   %do i=1 %to 35;
   %let rc=%sysfunc(fetchobs(&dsid,&i));
   %let getvalue=%qsysfunc(getvarc(&dsid,1));
   x&i = "&getvalue"
   %end;
%mend;
data nba; set nba; result=(hs>vs); array x(1:35) x1-x35;
   do i=1 to 35;  x(i)=0; end;
   x(hi)=1; x(vi)=-1; label %vlabel(); run;
/**************************************************************/
/* Now run the models.  The first model is the basic pref     */
/* -erence model.  The team with the largest fitted parameter */
/* pis judged best by the model.  The team with the highest   */
/* integer code is taken as baseline, and all other team      */
/* results are relative to that one.                          */
/**************************************************************/
proc logistic data=nba descending; 
   model result=x1-x35/noint parmlabel; run;
/*************************************************************/
/* The second model contains an intercept, allowing for home-*/
/* field advantage.                                          */
/*************************************************************/
proc logistic data=nba descending; 
   model result=x1-x35/parmlabel; run;
/*************************************************************/
/*              Cow Mastitis Example                         */
/* Data from Andrews and Herzberg (1985), Data: A Collection */
/* of Problems from Many Fields for the Student and Research */
/* Worker, Table 61.  Observations represent infections in   */
/* udders of cows given a placebo or one of eight anti-      */
/* biotics.  First two variables identify the table.  Third  */
/* and fourth identify cow, fifth identifies herdd, sixth    */
/* treatment, and next eight represent level of infection at */
/* each of eight sites.  Treatment assignment is non-random. */
/* http://lib.stat.cmu.edu/datasets/Andrews/T61.1            */
/*************************************************************/
data cows; infile 'T61.1';
   input ex ta c1 c2 herd treatment q11 q12 q21; run;
/**************************************************************/
/* Explore relation between the first four treatments and     */
/* infection severity in udder 2 location 1 in two ways: both */
/* variables treated as ordered, and both as unordered.       */
/**************************************************************/
* The df=3 approach with one variable ordered is in principle  ;
* no harder, but SAS does not implement it.                    ;
title 'Exact Tests for Assoc. betw. treatment and infection';
data herd1; set cows; if herd=4 then; else delete;
   if treatment>4 then delete; run;
proc freq data=herd1; table treatment*q21/chisq norow nocol 
   nopct; exact chisq mhchi; run;
/**************************************************************/
/*Do a "toy" example of logistic regression.                  */
/**************************************************************/
data try; input x y n @@; cards;
 1 7 10 -1 3 10
; run;
* Don't use the weight version here if you want exact tests.;
proc logistic data=try; model y/n=x; exact x; run;
/**************************************************************/
/*If we knew the intercept term, we would use it as an offset.*/
/*The p-value depends on the value of this offset.            */
/**************************************************************/
data bigtry; set try; do i=-100 to 100; o=i/50; output; end;
proc sort data=bigtry; by i; run;
prog logistic data=bigtry outtest=test ;
   model y/n=x/noint offset=o; by i;
data plot; set plot; o=i/50;
  if Variable="x" then; else delete; run;
proc gplot data=plot; plot o*ProbChiSq; symbol i=join;
  title 'P-value as a function of offset'; run;
/**************************************************************/
/*Investigate actual size of unconditional test.              */
/**************************************************************/
%macro calcpow(n1,n2,lor);
data probs; do y1=0 to &n1; do y2=0 to &n2;
   do i=-100 to 100; 
   pi1=1/(1+exp(-i/50+&lor/2)); pi2=1/(1+exp(-i/50-&lor/2));
   p=PMF('Binomial',y1,pi1,&n1)*PMF('Binomial',y2,pi2,&n2); 
   output;
   end; end; end; run;
data hugetry; do y1=0 to &n1; do y2=0 to &n2;
      n=&n1;y=y1; x=-1; output; n=&n2; y=y2; x=1; output;
   end; end; run;
data hugetry1; do y1=0 to &n1; do y2=0 to &n2;
      a=0; b=0; cnt=y1; output; a=0; b=1; cnt=&n1-y1; output;
      a=1; b=0; cnt=y2; output; a=1; b=1; cnt=&n2-y2; output;
   end; end; run;
proc logistic data=hugetry; by y1 y2; model y/n=x; exact x;
proc freq data=hugetry1; by y1 y2; table a*b/chisq; weight cnt;
   exact pchi; run;
data asy; set asy; 
   if Statistic="Chi-Square" then; else delete; 
   keep y1 y2 Prob; run;
title 'asy';
data fet; set fet; if Name1="XP_PCHI" then; else delete; 
    keep y1 y2 nValue1; run; 
data et; set et; if Test="Score" then; else delete; 
   keep y1 y2 ExactPValue; run;
data test; set test; if Variable="x" then; else delete;
   keep y1 y2 ProbChiSq; run;
data add; merge probs test et asy; by y1 y2;
   if ProbChiSq=. then ProbChiSq=1;
   if ExactPValue=. then ExactPValue=1;
   if Prob=. then Prob=1;
   pp=p*(ProbChiSq<=.05); qq=p*(ExactPValue<=.05);
   rr=p*(Prob<=.05) ; run;
proc sort data=add; by i; run;
proc means data=add sum noprint; var  pp qq rr; by i; 
   output out=size sum=asymsize exactsize exactpsize; run;
proc gplot data=size;
   plot asymsize*i exactsize*i exactpsize*i/overlay legend=legend1;
   title 'Test size as a function of the common probability';
   legend1 label=("Test") value=("Wald" "Exact" "Pearson");
   symbol1 i=join line=1; symbol2 i=join line=2;
   symbol3 i=join line=3; run;
%mend;
%calcpow(10,10,0);
%calcpow(10,10,1);