Chapter 10 homeworkBelow are partial answers to the problems. 10.7 a. > #Read in the data> patient<-read.table(file = "C:\\chris\\UNL\\STAT870\\Instructor_CD\\Data Sets\\Chapter 6 Data Sets\\CH06PR15.txt", header = FALSE, col.names = c("satisfaction", "age", "illness", "anxiety"), sep = "")> head(patient) satisfaction age illness anxiety1 48 50 51 2.32 57 36 46 2.33 66 40 48 2.24 70 41 44 1.85 89 28 43 1.86 36 49 54 2.9> mod.fit<-lm(formula = satisfaction ~ age + illness + anxiety, data = patient)> sum.fit<-summary(mod.fit)> sum.fitCall:lm(formula = satisfaction ~ age + illness + anxiety, data = patient)Residuals: Min 1Q Median 3Q Max -18.3524 -6.4230 0.5196 8.3715 17.1601 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 158.4913 18.1259 8.744 5.26e-11 ***age -1.1416 0.2148 -5.315 3.81e-06 ***illness -0.4420 0.4920 -0.898 0.3741 anxiety -13.4702 7.0997 -1.897 0.0647 . ---Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 10.06 on 42 degrees of freedomMultiple R-Squared: 0.6822, Adjusted R-squared: 0.6595 F-statistic: 30.05 on 3 and 42 DF, p-value: 1.542e-10 > ############################################################################> # Part a> > #Added variable plots> library(car)> av.plots(model = mod.fit, ask=FALSE, one.page = TRUE, identify.points=FALSE) 1-0.2 -0.1 0.0 0.1-40 -20 0 20Added-Variable Plot(Intercept) | otherssatisfaction | others-15 -10 -5 0 5 10 15-20 0 10 20 30Added-Variable Plotage | otherssatisfaction | others-6 -4 -2 0 2 4 6 8-20 -10 0 10Added-Variable Plotillness | otherssatisfaction | others-0.4 -0.2 0.0 0.2 0.4-15 -5 5 15Added-Variable Plotanxiety | otherssatisfaction | othersb. No – the plots indicate linear relationships. 210.11 Answer key:a. Conclude outlier if greater than 3.27. > #Critical value> n<-length(mod.fit$fitted.values)> qt(p = 1-0.10/(2*n), df = mod.fit$df.residual-1)1] 3.271524> #t.i vs. Y.hat.i> plot(x = mod.fit$fitted.values, y = t.i, xlab = "Estimated mean response", ylab = "Studentized deleted residuals", main = expression(paste(t[i], " vs. estimated mean response")), panel.first = grid(col = "gray", lty = "dotted"), ylim = c(min(qt(p = 0.05/(2*n), df = mod.fit$df.residual-1), min(t.i)), max(qt(p = 1-0.05/(2*n), df = mod.fit$df.residual-1), max(t.i))))> abline(h = 0, col = "darkgreen")> abline(h = c(qt(p = 0.01/2, df = mod.fit$df.residual-1),qt(p = 1-0.01/2, df = mod.fit$df.residual-1)), col = "red", lwd = 2)> abline(h = c(qt(p = 0.10/(2*n), df = mod.fit$df.residual-1),qt(p = 1- 0.10/(2*n), df = mod.fit$df.residual-1)), col = "darkred", lwd = 2)> identify(x = mod.fit$fitted.values, y = t.i)340 50 60 70 80 90-3 -2 -1 0 1 2 3ti vs. estimated mean responseEstimated mean responseStudentized deleted residualsNo values outside of the limits. > #Fox's outlier.test() function > outlier.test(mod.fit)max|rstudent| = 1.974202, degrees of freedom = 41,unadjusted p = 0.05512149, Bonferroni p > 1Observation: 27 The p-value is > 1 due to the adjustment made by the Bonferroni procedure. Of course, probability values can not be greater than 1!b.> h.ii<-hatvalues(model = mod.fit)> p<-length(mod.fit$coefficients)> n<-length(mod.fit$residuals)> round(h.ii[h.ii>2*p/n],2) 9 28 39 0.18 0.19 0.18 > plot(x = 1:n, y = h.ii, ylab = expression(h[ii]), xlab = "Observation number", main = expression(paste("Index plot of ", h[ii])))> abline(h = 2*p/n, col = "red", lty = "dashed")> abline(h = c(0.2,0.5), col = "blue", lty = "dotted")> identify(x = 1:n, y =h.ii)[1] 9 28 3940 10 20 30 400.05 0.10 0.15Index plot of hiiObservation numberhii92839c. Possibly yes due to hnew,new being much larger than the other hii values for those observations in the data set. > #See p. 400> X.new<-c(1, 30, 58, 2)> X<-as.matrix(cbind(1, patient[,2:4])) #Need as.matrix() so that R treats it like a matrix for its calculations> h.new.new<-t(X.new)%*%solve(t(X)%*%X)%*%X.new> h.new.new [,1][1,] 0.3267004> summary(h.ii) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.02723 0.04808 0.08067 0.08696 0.11890 0.18600d.DFFITS are not too far past the “large” data sets borderline. Cook’s D does not have any values past the quantile from the F-distribution. Some DFBETAS are passed the borderline forthe “large” data set criterion. 50 10 20 30 40-1.0 -0.5 0.0 0.5 1.0DFFITS vs. observation numberObservation numberDFFITS172731 0 10 20 30 400.0 0.2 0.4 0.6 0.8Cook's D vs. observation numberObservation numberCook's D0 10 20 30 40-1.0 -0.5 0.0 0.5 1.0DFBETAS for variable 1 vs. observation numberObservation numberDFBETAS111517270 10 20 30 40-1.0 -0.5 0.0 0.5 1.0DFBETAS for variable 2 vs. observation numberObservation numberDFBETAS131731390 10 20 30 40-1.0 -0.5 0.0 0.5 1.0DFBETAS for variable 3 vs. observation numberObservation numberDFBETAS11153237e.> #Individually – kind of like a DFFIT measure> mod.fit.wo11<-lm(formula = satisfaction ~ age + illness + anxiety, data = patient[-11,])> pred.wo11<-predict(object = mod.fit.wo11, newdata = patient[11,])> pred<-predict(object = mod.fit, newdata = patient[11,])> abs(pred.wo11-pred)/pred[1] 0.02293143 > mod.fit.wo17<-lm(formula = satisfaction ~ age + illness + anxiety, data = patient[-17,])6> pred.wo17<-predict(object = mod.fit.wo17, newdata = patient[17,])> pred<-predict(object = mod.fit, newdata = patient[17,])> abs(pred.wo17-pred)/pred[1] 0.03614547 > mod.fit.wo27<-lm(formula = satisfaction ~ age + illness + anxiety, data = patient[-27,])> pred.wo27<-predict(object = mod.fit.wo27, newdata = patient[27,])> pred<-predict(object = mod.fit, newdata = patient[27,])> abs(pred.wo27-pred)/pred[1] 0.0214488 > #For all observations – this is what the problem is asking for, kind of like a Cook's D measure> mod.fit.wo11<-lm(formula = satisfaction ~ age + illness + anxiety, data = patient[-11,])> pred.wo11<-predict(object = mod.fit.wo11, newdata = patient)> pred<-predict(object = mod.fit, newdata = patient)> mean(abs(pred.wo11-pred)/pred)[1]
View Full Document