Chapter 11 homeworkBelow are partial answers to the problems. Reproduce the example on p. 427 of KNN. Below is my R code and output. > set1<-read.table(file = "C:\\chris\\UNL\\STAT870\\Instructor_CD\\Data Sets\\Chapter 11 Data Sets\\CH11TA01.txt", header = FALSE, col.names = c("X", "Y"), sep = "")> #Check first few observations> head(set1) X Y1 27 732 21 663 22 634 24 755 25 716 23 70 > #Regular least squares> mod.fit<-lm(formula = Y ~ X, data = set1)> summary(mod.fit)Call:lm(formula = Y ~ X, data = set1)Residuals: Min 1Q Median 3Q Max -16.47859 -5.78765 -0.07844 5.61173 19.78132 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 56.15693 3.99367 14.061 < 2e-16 ***X 0.58003 0.09695 5.983 2.05e-07 ***---Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 8.146 on 52 degrees of freedomMultiple R-Squared: 0.4077, Adjusted R-squared: 0.3963 F-statistic: 35.79 on 1 and 52 DF, p-value: 2.050e-07 > par(mfrow = c(1,3), pty = "s")> #Y vs. X with sample model> plot(x = set1$X, y = set1$Y, xlab = "X", ylab = "Y", main = "Y vs. X", panel.first = grid(col = "gray", lty = "dotted"))> curve(expr = mod.fit$coefficients[1] + mod.fit$coefficients[2]*x, col = "red", lty = "solid", lwd = 2, add = TRUE, from = min(set1$X), to = max(set1$X))> #Residuals vs. Y^> plot(x = mod.fit$fitted.values, y = mod.fit$residuals, xlab = expression(hat(Y)), ylab = "Residuals", main = "Residuals vs. estimated mean response", panel.first = grid(col = "gray", lty = "dotted"))> abline(h = 0, col = "darkgreen") > #abs(residuals) vs. X> plot(x = set1$X, y = abs(mod.fit$residuals), xlab = "X", ylab = "Residuals", main = "Residuals vs. X", panel.first = grid(col = "gray", lty = "dotted"))> abline(lm(abs(mod.fit$residuals)~set1$X), col = "red") #Quick way to get line on 1plot20 30 40 50 6070 80 90 100 110Y vs. XXY70 75 80 85 90-10 0 10 20Residuals vs. estimated mean responseY^Residuals20 30 40 50 600 5 10 15 20Residuals vs. XXResiduals> #Examine how abs(e) is a function of X> mod.fit.s<-lm(abs(mod.fit$residuals)~set1$X)> mod.fit.s$coefficients(Intercept) set1$X -1.5494776 0.1981723 > #Table 11> table.11<-data.frame(set1, e = mod.fit$residuals, abs.e = abs(mod.fit$residuals), s = mod.fit.s$fitted.values, w = 1/mod.fit.s$fitted.values^2) > head(table.11) X Y e abs.e s w1 27 73 1.1822391 1.1822391 3.801175 0.069209282 21 66 -2.3375761 2.3375761 2.612141 0.146557083 22 63 -5.9176069 5.9176069 2.810313 0.126616574 24 75 4.9223315 4.9223315 3.206658 0.097251155 25 71 0.3423007 0.3423007 3.404830 0.086259936 23 70 0.5023623 0.5023623 3.008485 0.11048521> tail(table.11) X Y e abs.e s w49 50 71 -14.1584692 14.1584692 8.359138 0.01431123250 59 90 -0.3787464 0.3787464 10.142689 0.00972061751 50 91 5.8415308 5.8415308 8.359138 0.01431123252 52 100 13.6814692 13.6814692 8.755482 0.01304487253 58 80 -9.7987156 9.7987156 9.944516 0.01011189854 57 109 19.7813152 19.7813152 9.746344 0.010527289> #Weighted least squares> mod.fit.wls<-lm(formula = Y ~ X, data = set1, weight = table.11$w)> summary(mod.fit.wls)Call:lm(formula = Y ~ X, data = set1, weights = table.11$w)Residuals: Min 1Q Median 3Q Max -2.0230 -0.9939 -0.0327 0.9250 2.2008 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 55.56577 2.52092 22.042 < 2e-16 ***X 0.59634 0.07924 7.526 7.19e-10 ***2---Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.213 on 52 degrees of freedomMultiple R-Squared: 0.5214, Adjusted R-squared: 0.5122 F-statistic: 56.64 on 1 and 52 DF, p-value: 7.187e-10 > confint(mod.fit.wls, level = 0.95) 2.5 % 97.5 %(Intercept) 50.507175 60.6243577X 0.437339 0.7553445Now, try putting the data into 5 groups. > #################################################################################> # Try putting the data into 5 groups > #Find quantiles for Y^'s> quant5<-quantile(x = mod.fit$fitted.values, probs = c(0.2, 0.4, 0.6, 0.8), type = 1) > quant5 20% 40% 60% 80% 71.81776 77.61807 81.67828 86.31853 > #Put Y^'s into groups based upon quantiles> groups<-ifelse(mod.fit$fitted.values < quant5[1], 1, ifelse(mod.fit$fitted.values < quant5[2], 2, ifelse(mod.fit$fitted.values < quant5[3], 3, ifelse(mod.fit$fitted.values < quant5[4], 4, 5))))> var.eps<-tapply(X = mod.fit$residuals, INDEX = groups, FUN = var)> var.eps 1 2 3 4 5 16.34516 22.13168 67.99866 113.24704 126.25498 > #Visualization of creating the groups> par(mfrow = c(1,1), pty = "m")> plot(x = mod.fit$fitted.values, y = mod.fit$residuals, xlab = expression(hat(Y)), ylab = "Residuals", main = "Residuals vs. estimated mean response", panel.first = grid(col = "gray", lty = "dotted"))> abline(h = 0, col = "darkgreen")> abline(v = quant5, col = "red", lwd = 3)370 75 80 85 90-10 0 10 20Residuals vs. estimated mean responseY^Residuals> #Put the group variances into a vector corresponding to each observation> group.var<-ifelse(groups == 1, var.eps[1], ifelse(groups == 2, var.eps[2], ifelse(groups == 3, var.eps[3], ifelse(groups == 4, var.eps[4], var.eps[5])))) > mod.fit1<-lm(formula = Y ~ X, data = set1, weight = 1/group.var^2)> summary(mod.fit1)Call:lm(formula = Y ~ X, data = set1, weights = 1/group.var^2)Residuals: Min 1Q Median 3Q Max -0.370761 -0.117747 -0.001656 0.101663 0.471864 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 56.11229 2.77571 20.22 < 2e-16 ***X 0.58365 0.09943 5.87 3.08e-07 ***---Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.1599 on 52 degrees of freedomMultiple R-Squared: 0.3986, Adjusted R-squared: 0.387 F-statistic: 34.46 on 1 and 52 DF, p-value: 3.084e-07 Overall, we can see this model is similar to the previous model. The standard errors are a littlelarger here. 411.11 Below is my plot of the models2 4 6 8 100 50 100 150Minutes spent vs. Number of copiersNumber of copiersMinutes spent by service personLS w/ 46-47LS w/o
View Full Document