*****************************************************************;
*** Chapter 10                                               ***;
*** Problem from Neter, Kutner, Nachtsheim, & Wasserman 1996 ***;
****************************************************************;
OPTIONS LS=99 PS=256 NOCENTER NODATE NONUMBER;
 
DATA ONE; INFILE CARDS MISSOVER;
   TITLE1 'EXST7034 - NKNW Table 10.11';
   INPUT state $ 1-25 mathprof parents homelib reading tvwatch absences;
      weight = 1;
*   label mathprof = 'proficiency in math';
*   label parents = '% 8th grade with both parents at home';
*   label homelib = ''% 8th grade with 3 or more types of reference materials';
*   label reading = ''% 8th grade who read more than 10 pages/day';
*   label TVWatch = ''% 8th grade who watch TV 6 hrs or more daily';
*   label Absences = ''% 8th grade absent 3 or more days last month';

*---+----1----+----2----+----3----+----4----+----5----+----6;
CARDS; RUN;
 Alabama                 252  75  78  34  18  18
 Arizona                 259  75  73  41  12  26
 Arkansas                256  77  77  28  20  23
 California              256  78  68  42  11  28
 Colorado                267  78  85  38   9  25
 Connecticut             270  79  86  43  12  22
 Delaware                261  75  83  32  18  28
 Distric of Columbia     231  47  76  24  33  37
 Florida                 255  75  73  31  19  27
 Georgia                 258  73  80  36  17  22
 Guam                    231  81  64  32  20  28
 Hawaii                  251  78  69  36  23  26
 Idaho                   272  84  84  48   7  21
 Illinois                260  78  82  43  14  21
 Indiana                 267  81  84  37  11  23
 Iowa                    278  83  88  43   8  20
 Kentucky                256  79  78  36  14  23
 Louisiana               246  73  76  36  19  27
 Maryland                260  75  83  34  19  27
 Michigan                264  77  84  31  14  25
 Minnesota               276  83  88  36   7  20
 Montana                 280  83  88  44   6  21
 Nebraska                276  85  88  42   9  19
 New Hampshire           273  83  88  40   7  22
 New Jersey              269  79  84  41  13  23
 New Mexico              256  77  72  40  11  27
 New York                261  76  79  35  17  29
 North Carolina          250  74  78  37  21  25
 North Dakota            281  85  90  41   6  14
 Ohio                    264  79  84  36  11  22
 Oklahoma                263  78  78  37  14  22
 Oregon                  271  81  82  41   9  31
 Pennsylvania            266  80  86  34  10  24
 Rhode Island            260  78  80  38  12  28
 Texas                   258  77  70  34  15  18
 Virgin Islands          218  63  76  23  27  22
 Virginia                264  78  82  33  16  24
 West Virginia           256  82  80  36  16  25
 Wisconsin               274  81  86  38   8  21
 Wyoming                 272  85  86  43   7  23
;
Title2 'Robust regression';
OPTIONS LS=99 PS=56;
PROC REG DATA=ONE lineprinter; weight weight; id state;
   MODEL mathprof = homelib / influence; 
   output out=next1a r=e p=yhat; 
   plot residual.*homelib; 
run;
data next1a; set next1a; abse = abs(e); run;
proc univariate data=next1a noprint; var abse; 
   output out=next1b median=median; run;
data next1c; set next1b next1a; 
   if mathprof eq . then mad = (1/0.6745)*median; 
   mu = e / mad; 
   weight = 1; 
   if abs(mu) gt 1.346 then weight = 1.345 / abs(mu);
   retain mad; 
run; 
OPTIONS LS=99 PS=256;
proc print data=next1c; run;
data next1c; set next1c; if mathprof eq . then delete; 
   drop mad mu yhat e abse; run; 

PROC REG DATA=next1c; weight weight; MODEL mathprof = homelib; 
   output out=next2a r=e p=yhat; run;
data next2a; set next2a; abse = abs(e); run;
proc univariate data=next2a noprint; var abse; 
   output out=next2b median=median; run;
data next2c; set next2b next2a; 
   if mathprof eq . then mad = (1/0.6745)*median; 
   mu = e / mad; 
   weight = 1; 
   if abs(mu) gt 1.346 then weight = 1.345 / abs(mu);
   retain mad; 
run; 
proc print data=next2c; run;
data next2c; set next2c; if mathprof eq . then delete; 
   drop mad mu yhat e abse; run; 

PROC REG DATA=next2c; weight weight; MODEL mathprof = homelib; 
   output out=next3a r=e p=yhat; run;
data next3a; set next3a; abse = abs(e); run;
proc univariate data=next3a noprint; var abse; 
   output out=next3b median=median; run;
data next3c; set next3b next3a; 
   if mathprof eq . then mad = (1/0.6745)*median; 
   mu = e / mad; 
   weight = 1; 
   if abs(mu) gt 1.346 then weight = 1.345 / abs(mu);
   retain mad; 
run; 
proc print data=next3c; run;
data next3c; set next3c; if mathprof eq . then delete; 
   drop mad mu yhat e abse; run; 

PROC REG DATA=next3c; weight weight; MODEL mathprof = homelib; 
   output out=next4a r=e p=yhat; run;
data next4a; set next4a; abse = abs(e); run;
proc univariate data=next4a noprint; var abse; 
   output out=next4b median=median; run;
data next4c; set next4b next4a; 
   if mathprof eq . then mad = (1/0.6745)*median; 
   mu = e / mad; 
   weight = 1; 
   if abs(mu) gt 1.346 then weight = 1.345 / abs(mu);
   retain mad; 
run; 
proc print data=next4c; run;
data next4c; set next4c; if mathprof eq . then delete; 
   drop mad mu yhat e abse; run; 

OPTIONS LS=99 PS=56;
PROC REG DATA=next4c lineprinter; weight weight; 
   MODEL mathprof = homelib; 
   plot residual.*homelib; 
run;



PROC NLIN DATA=ONE CONVERGE=10E-12 MAXITER=200;
  TITLE2 'Robust regression';
   PARAMETERS B0 = 136 b1=1.56; *** Estimates from OLS ***;
   MODEL mathprof = b0 + b1*homelib;
RUN;
   resid=mathprof-model.mathprof;
      sigma=2;
      b=4.685;
      r=abs(resid/sigma);
      if r<=b then _weight_=(1-(r/b)**2)**2;
      else _weight_=0;
      output out=c r=rbi;
   data c;
   set c;
      sigma=2;
      b=4.685;
      r=abs(rbi/sigma);
      if r<=b then _weight_=(1-(r/b)**2)**2;
      else _weight_=0;
   proc print;
   run;