rm(list=ls())
library(parcor)
####################################################################################
#
# applyAdalasso: A function that, given a model matrix and response values,
# returns the model identified by adaptive lasso in the package
# parcor (Kraemer and Schafer, 2015) that satisfies effect heredity
#
# Requires: library(parcor)
#
# Inputs:
# X: Model matrix
# Y: Vector of response values
#
# Output:
# model: Best model identified by adaptive lasso that obeys effect heredity
#
# Example:
# castFatigue = read.table("http://www2.isye.gatech.edu/~jeffwu/book/data/CastFatigue.dat")
# data = castFatigue[,-1]
#
# Y = data[,ncol(data)]
#
# X = NULL
# X[data=="+"] = 1
# X[data=="-"] = -1
# X = matrix(X,nrow = 12)
# X = X[,-ncol(X)]
#
# applyAdalasso(Y,X)
#
# model
# [1] "6" "6 X 7"
#
###################################################################################
applyAdalasso = function(Y,X)
{
# If X does not have names, assign each column its number as its name
if (length(colnames(X)) == 0) colnames(X) = as.character(c(1:ncol(X)))
# Used to identify interaction effects
# Breaks up strings into a vector of characters
separateBytes <- function(x)
{
j = NULL
for (i in 1:nchar(x))
{
j[i] <- substr(x,i,i)
}
return(j)
}
# Given a model matrix with K columns, generates an exanded model
# matrix of size K(K-1) which contains all main and two-level
# interaction effects for the model matrix
interactionMaker = function(X)
{
k = ncol(X)
names = colnames(X)
# For each main effect
for (i in 1:(k-1))
{
# For each subsequent main effect
for (j in (i+1):k)
{
# Compute their interaction term
X = cbind(X,X[,i]*X[,j])
# Name the interaction effect of I and J "I X J"
colnames(X)[ncol(X)] = paste(colnames(X)[i],"X",colnames(X)[j])
}
}
return(X)
}
# Create the expanded model matrix
XInter = interactionMaker(X)
# Given a list of models, calculates the model with the lowest Akaike Information Criterion
# value, and returns that model
findLowestAIC = function(G,Y,X)
{
AICgrid = matrix(c(1:10),ncol = 2)
sigEffects = list(NULL)
# For each of the 5 models found in the main function
for (i in 1:5)
{
# Stores a vector of the effects found significant in the Lasso regression
sigEffects[[i]] = colnames(X)[as.numeric(names(G[[i]]$coefficients.adalasso[abs(G[[i]]$coefficients.adalasso)>0]))]
# Running linear model with significant effects to get AIC
if (sum(abs(G[[i]]$coefficients.adalasso)>0) == 0) AICgrid[i,2] = 999999
else
{
g = lm(Y~X[,as.numeric(names(G[[i]]$coefficients.adalasso[abs(G[[i]]$coefficients.adalasso)>0]))])
AICgrid[i,2] = AIC(g)
}
}
# Extracting the model with that gives the lowest AIC value
model = (AICgrid[AICgrid[,2] == min(AICgrid[,2]),])[1]
return(sigEffects[[model]])
}
# Runs adalasso 5 times and selects model with lowest AIC
bestModel = function(Y,X)
{
G = list(NULL)
# Need to make the number of folds for adalasso less than nrow(X)-1
if (nrow(X) < 12)
{
nfolds = nrow(X) - 2
}
else
{
nfolds = 10
}
# Runs adalasso 5 times on the same data set
# This reduces the importance of the randomness in the cross-validation algorithm
# in adalasso.
for (k in 1:5)
{
G[[k]] = adalasso(X,Y,k=nfolds, use.Gram=T)
}
model = findLowestAIC(G,Y,X)
return(model)
}
# Best model found by adalasso according to AIC
# Still need to ensure the model obeys the effect heredity principle
model = bestModel(Y,XInter)
# Isolating significant main effects in the best model
main = c()
for (i in 1:length(model))
{
# If 'X' appears in the effect name it is an interaction and not included in main
if ("X" %in% separateBytes(model[i]) == 0) main = c(main,model[i])
}
####################################################################################
#
# effecHeredityMaker: A function that, given a vector of significant main effects,
# returns all ME and two way effects that obey effect heredity
# Inputs:
# X: Model matrix
# sigME: Vector of significant main effects
#
# Output:
# candidates: Effects that obey effect heredity
#
# Example:
# X = matrix(rep(c(0,1),times = 10),nrow = 5,ncol = 4)
# sigME = c("3")
#
# candidates
# [1] "3" "1 X 3" "2 X 3" "3 X 4"
#
####################################################################################
effectHeredityMaker = function(X,sigME)
{
names = colnames(X)
# Will contain models that obey effect heredity
candidates = c(NULL)
# For each significant main effect
for (i in 1:length(sigME))
{
# For each main effect in the model matrix
for (j in 1:length(names))
{
# delete significant main effect from names to avoid redundancies
if (sigME[i] %in% names[j]) names = names[-j]
}
for (j in 1:length(names))
{
# Creates a vector of interaction effects that contain at least one significant effect
# If statements purely cosmetic: maintain the original order of the columns of the model matrix
if (as.numeric(match(sigME[i],colnames(X))) < as.numeric(match(names[j],colnames(X))))
{
candidates = c(candidates,paste(sigME[i], "X", names[j]))
}
else
{
candidates = c(candidates,paste(names[j], "X", sigME[i]))
}
}
}
# Combine significant main effects with interaction effects that contain them
# The effects in candidates are the only effects that obey effect heredity
candidates = c(sigME,candidates)
return(candidates)
}
# Identify effects that obey effect heredity
candidates = effectHeredityMaker(X,main)
# Deleting effects identified by adaptive lasso that do not obey effect heredity
for (i in 1:length(model))
{
# If an effect in the model is not listed in candidates, delete it
if (model[i] %in% candidates == 0) model = model[-i]
}
return(model)
}