Based off of this tutorial https://nicercode.github.io/guides/functions/
Example of code that does not use functions https://nicercode.github.io/guides/functions/before.html.
sum.of.squares <- function(x,y) {
x^2 + y^2
}
sum.of.squares(-3,4)
## [1] 25
Procedure for writing functions:
function.name <- function(arg1, arg2, arg3=2, ...) {
newVar <- sin(arg1) + sin(arg2) # do some stuff
# return(newVar/arg3) # return value using return()
newVar/arg3 # return value not using return()
}
function.name(1,2)
## [1] 0.8753842
source('examplefunction.R')
exp_absolute_diff(10,15)
## [1] 148.4132
red.plot <- function(x,y,...) {
plot(x,y,col='red',...)
}
# red.plot with only specified arguments
red.plot(1:10,1:10)
# red.plot making use of the ellipse
red.plot(1:10,1:10,xlab='My X Axis',ylab='My Y Axis')
exp_sin_plot <- function(x,y,color){
# PURPOSE: This function takes a vector x and plots the function exp(y*sin(x))
#
# INPUT: x, a vector
# y, a scalar
# OUTPUT: a plot of exp(y*sin(x))
f_x <- exp(y*sin(x))
plot(x,f_x,xlab='X',ylab=paste('exp(',y,'*sin(X))'),type='o')
}
exp_sin_plot(seq(1,20,length=200),2)
p <- .2
logit <- function(p){
log(p) - log(1-p)
}
logit(.3)
## [1] -0.8472979
Example code calculating the square of even numbers in a sequence
x <- seq(1,1000)
even_x_square <- c()
# Use a for loop
start_time = Sys.time()
for(i in x) {
if(i %% 2 == 0){
even_x_square <- append(even_x_square,i**2)
}
}
end_time = Sys.time()
print(end_time-start_time)
## Time difference of 0.005947828 secs
In general when working with large datasets you DO NOT want to use loops. Vectorizing code is much quicker.
start_time = Sys.time()
even_x_square <- x[(x%%2)==0]**2
end_time = Sys.time()
print(end_time-start_time)
## Time difference of 0.001984119 secs
# Stock market data from last class
Smarket = read.csv(file='Smarket.csv')
run_smarket_analysis <- function(dataset,x_variables){
regression_formula <- as.formula(
paste('Direction',
paste(x_variables, collapse = " + "),
sep = " ~ "))
train=(dataset$Year<2005)
dataset.2005 = dataset[!train,]
Direction.2005 = dataset$Direction[!train]
glm.fits = glm(regression_formula,
data=dataset,
family=binomial,
subset = train)
glm.probs = predict(glm.fits,dataset.2005,type="response")
glm.pred=rep("Down",252)
glm.pred[glm.probs>.5]="Up"
mean(glm.pred==Direction.2005)
}
variable_groups <- list(c('Lag1'),
c('Lag2'),
c('Lag1','Lag2'),
c('Lag1','Lag2','Lag3','Volume'))
for(vars in variable_groups){
cat("\n\n")
print(vars)
cat('Test Accuracy',run_smarket_analysis(Smarket,vars))
}
##
##
## [1] "Lag1"
## Test Accuracy 0.5396825
##
## [1] "Lag2"
## Test Accuracy 0.5912698
##
## [1] "Lag1" "Lag2"
## Test Accuracy 0.5595238
##
## [1] "Lag1" "Lag2" "Lag3" "Volume"
## Test Accuracy 0.4761905