*****************************************************************; *** Chapter 10 ***; *** Problem from Neter, Kutner, Nachtsheim, & Wasserman 1996 ***; ****************************************************************; OPTIONS LS=99 PS=256 NOCENTER NODATE NONUMBER; DATA ONE; INFILE CARDS MISSOVER; TITLE1 'EXST7034 - NKNW Table 10.11'; INPUT state $ 1-25 mathprof parents homelib reading tvwatch absences; weight = 1; * label mathprof = 'proficiency in math'; * label parents = '% 8th grade with both parents at home'; * label homelib = ''% 8th grade with 3 or more types of reference materials'; * label reading = ''% 8th grade who read more than 10 pages/day'; * label TVWatch = ''% 8th grade who watch TV 6 hrs or more daily'; * label Absences = ''% 8th grade absent 3 or more days last month'; *---+----1----+----2----+----3----+----4----+----5----+----6; CARDS; RUN; Alabama 252 75 78 34 18 18 Arizona 259 75 73 41 12 26 Arkansas 256 77 77 28 20 23 California 256 78 68 42 11 28 Colorado 267 78 85 38 9 25 Connecticut 270 79 86 43 12 22 Delaware 261 75 83 32 18 28 Distric of Columbia 231 47 76 24 33 37 Florida 255 75 73 31 19 27 Georgia 258 73 80 36 17 22 Guam 231 81 64 32 20 28 Hawaii 251 78 69 36 23 26 Idaho 272 84 84 48 7 21 Illinois 260 78 82 43 14 21 Indiana 267 81 84 37 11 23 Iowa 278 83 88 43 8 20 Kentucky 256 79 78 36 14 23 Louisiana 246 73 76 36 19 27 Maryland 260 75 83 34 19 27 Michigan 264 77 84 31 14 25 Minnesota 276 83 88 36 7 20 Montana 280 83 88 44 6 21 Nebraska 276 85 88 42 9 19 New Hampshire 273 83 88 40 7 22 New Jersey 269 79 84 41 13 23 New Mexico 256 77 72 40 11 27 New York 261 76 79 35 17 29 North Carolina 250 74 78 37 21 25 North Dakota 281 85 90 41 6 14 Ohio 264 79 84 36 11 22 Oklahoma 263 78 78 37 14 22 Oregon 271 81 82 41 9 31 Pennsylvania 266 80 86 34 10 24 Rhode Island 260 78 80 38 12 28 Texas 258 77 70 34 15 18 Virgin Islands 218 63 76 23 27 22 Virginia 264 78 82 33 16 24 West Virginia 256 82 80 36 16 25 Wisconsin 274 81 86 38 8 21 Wyoming 272 85 86 43 7 23 ; Title2 'Robust regression'; OPTIONS LS=99 PS=56; PROC REG DATA=ONE lineprinter; weight weight; id state; MODEL mathprof = homelib / influence; output out=next1a r=e p=yhat; plot residual.*homelib; run; data next1a; set next1a; abse = abs(e); run; proc univariate data=next1a noprint; var abse; output out=next1b median=median; run; data next1c; set next1b next1a; if mathprof eq . then mad = (1/0.6745)*median; mu = e / mad; weight = 1; if abs(mu) gt 1.346 then weight = 1.345 / abs(mu); retain mad; run; OPTIONS LS=99 PS=256; proc print data=next1c; run; data next1c; set next1c; if mathprof eq . then delete; drop mad mu yhat e abse; run; PROC REG DATA=next1c; weight weight; MODEL mathprof = homelib; output out=next2a r=e p=yhat; run; data next2a; set next2a; abse = abs(e); run; proc univariate data=next2a noprint; var abse; output out=next2b median=median; run; data next2c; set next2b next2a; if mathprof eq . then mad = (1/0.6745)*median; mu = e / mad; weight = 1; if abs(mu) gt 1.346 then weight = 1.345 / abs(mu); retain mad; run; proc print data=next2c; run; data next2c; set next2c; if mathprof eq . then delete; drop mad mu yhat e abse; run; PROC REG DATA=next2c; weight weight; MODEL mathprof = homelib; output out=next3a r=e p=yhat; run; data next3a; set next3a; abse = abs(e); run; proc univariate data=next3a noprint; var abse; output out=next3b median=median; run; data next3c; set next3b next3a; if mathprof eq . then mad = (1/0.6745)*median; mu = e / mad; weight = 1; if abs(mu) gt 1.346 then weight = 1.345 / abs(mu); retain mad; run; proc print data=next3c; run; data next3c; set next3c; if mathprof eq . then delete; drop mad mu yhat e abse; run; PROC REG DATA=next3c; weight weight; MODEL mathprof = homelib; output out=next4a r=e p=yhat; run; data next4a; set next4a; abse = abs(e); run; proc univariate data=next4a noprint; var abse; output out=next4b median=median; run; data next4c; set next4b next4a; if mathprof eq . then mad = (1/0.6745)*median; mu = e / mad; weight = 1; if abs(mu) gt 1.346 then weight = 1.345 / abs(mu); retain mad; run; proc print data=next4c; run; data next4c; set next4c; if mathprof eq . then delete; drop mad mu yhat e abse; run; OPTIONS LS=99 PS=56; PROC REG DATA=next4c lineprinter; weight weight; MODEL mathprof = homelib; plot residual.*homelib; run; PROC NLIN DATA=ONE CONVERGE=10E-12 MAXITER=200; TITLE2 'Robust regression'; PARAMETERS B0 = 136 b1=1.56; *** Estimates from OLS ***; MODEL mathprof = b0 + b1*homelib; RUN; resid=mathprof-model.mathprof; sigma=2; b=4.685; r=abs(resid/sigma); if r<=b then _weight_=(1-(r/b)**2)**2; else _weight_=0; output out=c r=rbi; data c; set c; sigma=2; b=4.685; r=abs(rbi/sigma); if r<=b then _weight_=(1-(r/b)**2)**2; else _weight_=0; proc print; run;