options pagesize=150 linesize=68;
filename grafout 'g02.ps';
goptions device=pslmono gsfname=grafout 
   rotate=landscape gsfmode=append;
/*          Lotto Example                               */
/* CA lotto data from Friedman Pisani and Purvis (1998).*/
/* Only one set of balls is used during each lottery    */
/* drawing.  Columns represent sets of balls.  Rows rep-*/
/* resent numbers 0-9.  Entries in table represent count*/
/* of how often each ball appears.  Each set of balls   */
/* was used 120 times, and so the expected number of    */
/* times each ball comes up is 12.                      */
data lotto; infile 'lotto.dat'; input seta setb setc setd;
/* Mark.A Perform the chi-square test of the null hypothesis*/
/* that each ball is equally likely on each draw.           */ 
* Do a separate chi-square calculation for each set of balls;
* 'manually'.                                               ;
/* The data step calculates the contribution for each ball.*/
data tests; set lotto;
   setad=(seta-12)**2/12; setbd=(setb-12)**2/12;
   setcd=(setc-12)**2/12; setdd=(setd-12)**2/12; run;
proc print data=tests noobs; run;
* Proc means adds these contributions, to get the sum for   ;
* each set.;
proc means data=tests sum noprint; var setad setbd setcd setdd;
   output out=chisq sum=; run;
* The data step applies the chi-square cdf to get p-values.;
data chisq; set chisq; drop _freq_ _type_;
   lab="Pearson Statistic"; output; 
   setad=1-CDF('chisq',setad,9); setbd=1-CDF('chisq',setbd,9);
   setcd=1-CDF('chisq',setcd,9); setdd=1-CDF('chisq',setdd,9); 
   lab="p-value"; output; run;
proc print data=chisq noobs; run;
title 'Using built-in routines for goodness of fit test';
* We could also do this using proc freq.  Note that we  ;
* need to manually enter the percentages of the expected;
* frequencies.  This is unattractive.                   ;
data lotto; set lotto; digit=_n_-1; run;
proc freq data=lotto ; weight seta; table digit/
   testp=(.1 .1 .1 .1 .1 .1 .1 .1 .1 .1 ) noprint ; run;
/* Number of accidental deaths from falls, 1979        */
/* World Almanac and Book of Facts 1984 (New York:     */
/* Newspaper Enterprise Association, 1984), and quoted */
/* in the Minitab Handbook, 3rd Edn. (Belmont, CA: Dux-*/
/* bury, 1994).  Falls are by month.  Columns are num- */
/* bers of falls, numbers of days in the month, and    */
/* month abbreviation.  Deaths may be influence by the */
/* weather; scorebelow scores months based on icyness. */
title 'Falls data';
data falls; infile 'falls.dat'; input falls days month $; 
  score=0; _freq_=12;
  if _n_=1 or _n_=2 then score=1;
  if _n_=11 or _n_=12 or _n_=3 then score=2;
/* Test to see whether expected number of falls is prop-*/
/* ortional to the number of days in the month.  In this*/
/* case the expected number of deaths from falls in a   */
/* given month, under the null hypothesis, is the total */
/* number of deaths times the number of days in the     */
/* month divided by the number of days in the year.     */
* Proc means gives the totals.  A data step renames the  ;
* totals, and a second data step with a merge puts the   ;
* totals and untotaled falls and days together.  The     ;
* variable _freq_ that I merged by attaches the totals to;
* each observation.                                      ;
proc means data=falls noprint; var days falls;
   output out=tot sum=tdays tfalls ; run;
data falls; merge falls tot; by _freq_; run;
data falls; set falls; exp=tfalls*days/tdays; run;
proc print data=falls noobs; run;
* Calculate Pearson's goodness of fit statistic.         ;
* I could have started this in the last data step.       ;
data tests; set falls;
  keep pears;
proc means data=tests noprint; output out=tests sum=; run;
data tests; merge tests tot;
  keep pears pearspv; run;
proc print data=tests noobs; run;
proc freq data=falls ; weight falls; table month/ testp=( 
   0.0849 0.0767 0.0849 0.0822 0.0849 0.0822 0.0849 0.0849 
   0.0822 0.0849 0.0822 0.0849) noprint ; run;
/* Mark.B Perform the test of trend on falls.               */
* Calculate the test of trend from Armitage (1955)        ;
data tests; set falls; trend=score*(falls-exp);
  v1=score**2*exp; v2=score*exp; keep trend v1 v2; run;
proc means data=tests noprint;output out=tests sum=; run;
data tests; merge tests tot; 
  keep trend pearspv trendpv; run;
proc print data=tests noobs; run;