library(ISLR)
data(Hitters)
x=model.matrix(Salary~.,Hitters)[,-1]
y=Hitters$Salary[!is.na(Hitters$Salary)]
x=x[!is.na(y),]
# Also install GLMNET!
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 3.0-1
grid=10^seq(10,-2,length=100)
ridge.mod=glmnet(x,y,alpha=0,lambda=grid)
print(dim(coef(ridge.mod)))
## [1] 20 100
print(ridge.mod$lambda[50])
## [1] 11497.57
coef(ridge.mod)[,50]
## (Intercept) AtBat Hits HmRun Runs
## 407.356050200 0.036957182 0.138180344 0.524629976 0.230701523
## RBI Walks Years CAtBat CHits
## 0.239841459 0.289618741 1.107702929 0.003131815 0.011653637
## CHmRun CRuns CRBI CWalks LeagueN
## 0.087545670 0.023379882 0.024138320 0.025015421 0.085028114
## DivisionW PutOuts Assists Errors NewLeagueN
## -6.215440973 0.016482577 0.002612988 -0.020502690 0.301433531
print(sqrt(sum(coef(ridge.mod)[-1,50]^2)))
## [1] 6.360612
print(ridge.mod$lambda[60])
## [1] 705.4802
coef(ridge.mod)[,60]
## (Intercept) AtBat Hits HmRun Runs
## 54.32519950 0.11211115 0.65622409 1.17980910 0.93769713
## RBI Walks Years CAtBat CHits
## 0.84718546 1.31987948 2.59640425 0.01083413 0.04674557
## CHmRun CRuns CRBI CWalks LeagueN
## 0.33777318 0.09355528 0.09780402 0.07189612 13.68370191
## DivisionW PutOuts Assists Errors NewLeagueN
## -54.65877750 0.11852289 0.01606037 -0.70358655 8.61181213
print(sqrt(sum(coef(ridge.mod)[-1,60]^2)))
## [1] 57.11001
predict(ridge.mod,s=50,type='coefficients')[1:20,]
## (Intercept) AtBat Hits HmRun Runs
## 4.876610e+01 -3.580999e-01 1.969359e+00 -1.278248e+00 1.145892e+00
## RBI Walks Years CAtBat CHits
## 8.038292e-01 2.716186e+00 -6.218319e+00 5.447837e-03 1.064895e-01
## CHmRun CRuns CRBI CWalks LeagueN
## 6.244860e-01 2.214985e-01 2.186914e-01 -1.500245e-01 4.592589e+01
## DivisionW PutOuts Assists Errors NewLeagueN
## -1.182011e+02 2.502322e-01 1.215665e-01 -3.278600e+00 -9.496680e+00
set.seed(1)
train=sample(1:nrow(x),nrow(x)/2)
test=(-train)
y.test=y[test]
ridge.mod=glmnet(x[train,],y[train],alpha=0,lambda=grid,thresh=1e-12)
ridge.pred=predict(ridge.mod,s=4,newx=x[test,])
print(mean((ridge.pred-y.test)^2))
## [1] 142199.2
print(mean((mean(y[train])-y.test)^2))
## [1] 224669.9
ridge.pred=predict(ridge.mod,s=10e10,newx=x[test,])
print(mean((ridge.pred-y.test)^2))
## [1] 224669.8
ridge.pred=predict(ridge.mod,s=0,newx=x[test,])
print(mean((ridge.pred-y.test)^2))
## [1] 167789.8
set.seed(1)
cv.out=cv.glmnet(x[train,],y[train],alpha=0)
plot(cv.out)
bestlam=cv.out$lambda.min
ridge.pred=predict(ridge.mod,s=bestlam,newx=x[test,])
print(mean((ridge.pred-y.test)^2))
## [1] 139856.6
print(bestlam)
## [1] 326.0828
out=glmnet(x,y,alpha=0)
predict(out,type="coefficients",s=bestlam)[1:20,]
## (Intercept) AtBat Hits HmRun Runs
## 15.44383135 0.07715547 0.85911581 0.60103107 1.06369007
## RBI Walks Years CAtBat CHits
## 0.87936105 1.62444616 1.35254780 0.01134999 0.05746654
## CHmRun CRuns CRBI CWalks LeagueN
## 0.40680157 0.11456224 0.12116504 0.05299202 22.09143189
## DivisionW PutOuts Assists Errors NewLeagueN
## -79.04032637 0.16619903 0.02941950 -1.36092945 9.12487767
lasso.mod=glmnet(x[train,],y[train],alpha=1,lambda=grid)
plot(lasso.mod)
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to
## unique 'x' values
set.seed(1)
cv.out=cv.glmnet(x[train,],y[train],alpha=1)
plot(cv.out)
bestlam=cv.out$lambda.min
lasso.pred=predict(lasso.mod,s=bestlam,newx=x[test,])
print(bestlam)
## [1] 9.286955
print(mean((lasso.pred-y.test)^2))
## [1] 143673.6
out=glmnet(x,y,alpha=1,lambda=grid)
lasso.coef=predict(out,type="coefficients",s=bestlam)[1:20,]
lasso.coef
## (Intercept) AtBat Hits HmRun Runs
## 1.27479059 -0.05497143 2.18034583 0.00000000 0.00000000
## RBI Walks Years CAtBat CHits
## 0.00000000 2.29192406 -0.33806109 0.00000000 0.00000000
## CHmRun CRuns CRBI CWalks LeagueN
## 0.02825013 0.21628385 0.41712537 0.00000000 20.28615023
## DivisionW PutOuts Assists Errors NewLeagueN
## -116.16755870 0.23752385 0.00000000 -0.85629148 0.00000000