Regresi linear merupakan metode yang telah dikenal sejak abad ke-19. Regresi linear berganda dapat dinyatakan sebagai y = f(x), dengan
Contoh Regresi dengan R
>
library(ISLR) >
head(Auto) mpg cylinders displacement horsepower
weight acceleration year origin 1 18
8 307 130
3504 12.0 70
1 2 15
8 350 165
3693 11.5 70
1 3 18
8 318 150
3436 11.0 70
1 4 16
8 304 150
3433 12.0 70
1 5 17
8 302 140
3449 10.5 70
1 6 15
8 429 198
4341 10.0 70
1 name 1 chevrolet chevelle
malibu 2 buick skylark 320 3 plymouth satellite 4 amc rebel sst 5 ford torino 6 ford galaxie 500 >
attach(Auto) >
plot(cylinders, mpg) >
plot(displacement, mpg)
>
plot(horsepower,mpg)
>
plot(weight,mpg) >
plot(acceleration,mpg) >
plot(year,mpg) >
plot(origin,mpg)
>
#regresi linear ganda > m1
<-
lm(mpg~cylinders+displacement+horsepower+weight+acceleration+year+origin,
data = Auto) >
summary(m1) Call: lm(formula = mpg ~
cylinders + displacement + horsepower + weight + acceleration + year + origin, data =
Auto) Residuals: Min
1Q Median 3Q
Max -9.5903 -2.1565
-0.1169 1.8690 13.0604 Coefficients: Estimate Std. Error t value
Pr(>|t|) (Intercept) -17.218435
4.644294 -3.707 0.00024 *** cylinders -0.493376
0.323282 -1.526 0.12780
displacement 0.019896
0.007515 2.647 0.00844 ** horsepower -0.016951 0.013787
-1.230 0.21963 weight -0.006474 0.000652
-9.929 < 2e-16 *** acceleration 0.080576
0.098845 0.815 0.41548
year 0.750773 0.050973
14.729 < 2e-16 *** origin 1.426141 0.278136
5.127 4.67e-07 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard error:
3.328 on 384 degrees of freedom Multiple
R-squared: 0.8215, Adjusted R-squared: 0.8182 F-statistic: 252.4 on 7
and 384 DF, p-value: < 2.2e-16 > m2
<- lm(mpg~cylinders+displacement+horsepower+weight+year+origin, data =
Auto) >
summary(m2) Call: lm(formula = mpg ~
cylinders + displacement + horsepower + weight + year + origin, data = Auto) Residuals: Min
1Q Median 3Q
Max -9.7604 -2.1791
-0.1535 1.8524 13.1209 Coefficients: Estimate Std. Error t value
Pr(>|t|) (Intercept) -1.556e+01
4.175e+00 -3.728 0.000222 *** cylinders -5.067e-01 3.227e-01
-1.570 0.117236 displacement 1.927e-02
7.472e-03 2.579 0.010287
* horsepower -2.389e-02
1.084e-02 -2.205 0.028031
* weight -6.218e-03 5.714e-04 -10.883 < 2e-16 *** year 7.475e-01 5.079e-02
14.717 < 2e-16 *** origin 1.428e+00 2.780e-01
5.138 4.43e-07 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 3.326 on 385 degrees of freedom Multiple
R-squared: 0.8212, Adjusted R-squared: 0.8184 F-statistic: 294.6 on 6
and 385 DF, p-value: < 2.2e-16 > m3
<- lm(mpg~displacement+horsepower+weight+year+origin, data = Auto) >
summary(m3) Call: lm(formula = mpg ~
displacement + horsepower + weight + year + origin, data = Auto) Residuals: Min
1Q Median 3Q
Max -9.4882 -2.1157
-0.1645 1.8650 13.0544 Coefficients: Estimate Std. Error t value
Pr(>|t|) (Intercept) -1.669e+01
4.120e+00 -4.051 6.16e-05 *** displacement 1.137e-02
5.536e-03 2.054 0.0406 *
horsepower -2.192e-02
1.078e-02 -2.033 0.0428 *
weight -6.324e-03 5.685e-04 -11.124 < 2e-16 *** year 7.484e-01 5.089e-02
14.707 < 2e-16 *** origin 1.385e+00 2.772e-01
4.998 8.80e-07 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 3.333 on 386 degrees of freedom Multiple
R-squared: 0.82, Adjusted R-squared: 0.8177 F-statistic: 351.7 on 5
and 386 DF, p-value: < 2.2e-16 >
hors2 <- horsepower^2 >
weig2 <- weight^2 >
Auto2 <- cbind(Auto, hors2, weig2) > m4
<- lm(mpg~horsepower+hors2+weight+weig2+year+origin, data = Auto2) >
summary(m4) Call: lm(formula = mpg ~
horsepower + hors2 + weight + weig2 + year + origin, data = Auto2) Residuals: Min
1Q Median 3Q
Max -8.8841 -1.7292 -0.1211 1.5860 12.1360 Coefficients: Estimate Std. Error t value
Pr(>|t|) (Intercept) 2.602e+00
4.082e+00 0.637 0.52426
horsepower -1.593e-01
2.960e-02 -5.383 1.27e-07 *** hors2 5.088e-04 1.042e-04
4.882 1.55e-06 *** weight -1.481e-02 1.838e-03
-8.056 1.00e-14 *** weig2 1.568e-06 2.699e-07
5.809 1.32e-08 *** year 7.792e-01 4.471e-02
17.426 < 2e-16 *** origin 6.685e-01 2.372e-01
2.818 0.00508 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 2.915 on 385 degrees of freedom Multiple
R-squared: 0.8626, Adjusted R-squared: 0.8605 F-statistic: 403 on 6 and 385 DF, p-value: < 2.2e-16 > >
#validasi model regresi >
nrow(Auto) [1] 392 >
set.seed(10) >
train1 <- sample(392,196) >
set.seed(20) >
train2 <- sample(392,196) >
head(train1) [1] 137 330 368 72 211 344 >
head(train2) [1] 166 191 363 376 130
249 > m31
<- lm(mpg~displacement+horsepower+weight+year+origin, data = Auto, subset
= train1) >
summary(m31) Call: lm(formula = mpg ~
displacement + horsepower + weight + year + origin, data = Auto, subset = train1) Residuals: Min
1Q Median 3Q
Max -8.8324 -2.1154
-0.0379 1.8378 11.9800 Coefficients: Estimate Std. Error t value
Pr(>|t|) (Intercept) -1.454e+01
5.379e+00 -2.703 0.00749 ** displacement 8.726e-03
7.790e-03 1.120 0.26409
horsepower -3.019e-02
1.491e-02 -2.024 0.04432 *
weight -5.556e-03 7.915e-04
-7.020 3.82e-11 *** year 7.103e-01 6.748e-02
10.526 < 2e-16 *** origin 1.144e+00 3.908e-01
2.927 0.00384 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 3.278 on 190 degrees of freedom Multiple
R-squared: 0.8166, Adjusted R-squared: 0.8118 F-statistic: 169.2 on 5
and 190 DF, p-value: < 2.2e-16 > m32
<- lm(mpg~displacement+horsepower+weight+year+origin, data = Auto, subset
= train2) >
summary(m32) Call: lm(formula = mpg ~
displacement + horsepower + weight + year + origin, data = Auto, subset = train2) Residuals: Min
1Q Median 3Q Max -8.053 -2.345
-0.156 1.829 13.245 Coefficients: Estimate Std. Error t value
Pr(>|t|) (Intercept) -1.364e+01
6.288e+00 -2.170 0.03126 *
displacement 7.400e-03
7.475e-03 0.990 0.32345
horsepower -1.319e-02
1.491e-02 -0.884 0.37758
weight -6.474e-03 7.569e-04
-8.554 3.92e-15 *** year 7.115e-01 7.950e-02
8.949 3.24e-16 *** origin 1.321e+00 4.165e-01
3.172 0.00177 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 3.34 on 190 degrees of freedom Multiple
R-squared: 0.8238, Adjusted R-squared: 0.8192 F-statistic: 177.7 on 5
and 190 DF, p-value: < 2.2e-16 > m41
<- lm(mpg~poly(horsepower,2)+poly(weight,2)+year+origin, data = Auto,
subset = train1) >
summary(m41) Call: lm(formula = mpg ~
poly(horsepower, 2) + poly(weight, 2) + year + origin, data = Auto, subset = train1) Residuals: Min
1Q Median 3Q
Max -8.3721 -1.8389
-0.1336 1.4726 11.0388 Coefficients: Estimate Std. Error t
value Pr(>|t|) (Intercept) -38.57462 4.72545
-8.163 4.51e-14 *** poly(horsepower, 2)1
-27.63378 9.03225 -3.059 0.002540 ** poly(horsepower,
2)2 13.19552 5.41544
2.437 0.015751 * poly(weight, 2)1 -76.66090 10.14388
-7.557 1.72e-12 *** poly(weight, 2)2 21.36900 5.69954
3.749 0.000236 *** year 0.80114 0.06267
12.784 < 2e-16 *** origin 0.56215 0.34823
1.614 0.108124 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 2.955 on 189 degrees of freedom Multiple
R-squared: 0.8517, Adjusted R-squared: 0.847 F-statistic: 181 on 6 and 189 DF, p-value: < 2.2e-16 > m42
<- lm(mpg~poly(horsepower,2)+poly(weight,2)+year+origin, data = Auto,
subset = train2) >
summary(m42) Call: lm(formula = mpg ~
poly(horsepower, 2) + poly(weight, 2) + year + origin, data = Auto, subset = train2) Residuals: Min
1Q Median 3Q
Max -6.4428 -1.6895
-0.0122 1.4802 12.2928 Coefficients: Estimate Std. Error t
value Pr(>|t|) (Intercept) -35.19914 5.28107
-6.665 2.82e-10 *** poly(horsepower, 2)1
-19.47293 9.01539 -2.160 0.032035 * poly(horsepower,
2)2 17.91697 5.17801
3.460 0.000667 *** poly(weight, 2)1 -88.38118 10.19010
-8.673 1.90e-15 *** poly(weight, 2)2 22.67798 5.33780
4.249 3.37e-05 *** year 0.76087 0.07059
10.778 < 2e-16 *** origin 0.50389 0.36643
1.375 0.170719 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1 Residual standard
error: 2.943 on 189 degrees of freedom Multiple
R-squared: 0.8639, Adjusted R-squared: 0.8596 F-statistic: 199.9 on 6
and 189 DF, p-value: < 2.2e-16 > >
mean((mpg-predict(m31, Auto))[-train1]^2) [1] 11.84881 >
mean((mpg-predict(m32, Auto))[-train2]^2) [1] 11.24764 >
mean((mpg-predict(m41, Auto))[-train1]^2) [1] 8.651271 >
mean((mpg-predict(m42, Auto))[-train2]^2) [1] 8.428948 > >
#validasi LOOCV >
library(boot) > m3a
<- glm(mpg~displacement+horsepower+weight+year+origin, data = Auto) > m4a
<- glm(mpg~poly(horsepower,2)+poly(weight,2)+year+origin, data = Auto) > cv3
<- cv.glm(Auto, m3a) > cv4
<- cv.glm(Auto, m4a) >
cv3$delta [1] 11.30361 11.30313 >
cv4$delta [1] 8.681069 8.680636 > >
#validasi k-cross validation >
cv.glm(Auto, m3a, K=5)$delta [1] 11.24819 11.21356 >
cv.glm(Auto, m4a, K=5)$delta [1] 8.756626 8.710914 >
cv.glm(Auto, m3a, K=10)$delta [1] 11.20207 11.18776 >
cv.glm(Auto, m4a, K=10)$delta [1] 8.749589 8.728033 |
Tidak ada komentar:
Write komentar