library(nlme) # In R there are user-contributed "packages" of function
# that do extend the basic capabilities of R with all sorts of
# different statistical methods. Some packages are built into the
# basic distribution of R (nlme is one such package), others
# have to be downloaded and installed. Either way, the functions
# and capabilities in a given package are not available in an R
# session until the pakcage has been loaded with the command
# library(package_name).
# The nlme package is a big one with lots of tools for
# exploring grouped or clustered data, and for fitting
# linear and nonlinear mixed effect models. For this example,
# we are just fitting a simple one-way analysis of variance
# model, which is a linear model (not a mixed model at all)
# but there are couple of capabilities in the nlme package
# that are still convenient and useful here.
# other useful packages used in this script:
library(MASS)
library(car)
library(lsmeans)
gasdata <- read.table(file="gasadd.dat",header=T) # read the data from an
# external file and grab the
# variable names from the
# 1st line of that file.
# The data are placed into
# a "data frame", which I've
# called gasdata
head(gasdata) #look at the first few rows of gasdata
gasdata$addfac <- factor(gasdata$add, ordered=T)
# by default a numeric variable is not treated when
# read in, so make a copy of the add variable that
# R will now treat as a factor, with levels that
# are assumed to be ordered (rather than qualitative)
attach(gasdata) # this makes the variables within gasdata available
# so you can refer to them as add or octane rather than
# gasadd$add or gasadd$octane
dotchart(octane,groups=addfac) # plot the octane values with a dotplot
title(main="Dotplot of Gasoline Additive Data",xlab="Octane",
ylab="Amt of Additive") # titles and x and y axis labels can be added
# with the title() function or the main, xlab
# and ylab arguments can be added to the original
# dotchart() function call
# I like the dotplot produced by default when you plot the data after they
# have been turned into a groupedData object. Such an object is a data frame
# but with extra information attached to it that comes in the form of a
# formula that identifies the response variable, (octane below), the
# covariate or explanatory variable that we want to regard as the "primary"
# covariate (for plotting and modeling purposes) and any grouping structure.
# Below the formula is "octane~1|addfac". Here the "~" plays the role of an
# equal sign, octane is the response (or y variable), 1 represents an
# intercept or constant term, and what comes after the vertical bar is
# the factor that identifies groups.
gasdata2 <- groupedData(octane~1|addfac, gasdata)
plot(gasdata2) # R is an "object-oriented" computing language. That means that
# quantities that we specify are treated as one of many
# different types of objects, and functions that operate
# on those quantities figure out what to do based upon what
# kind of object that quantity is. E.g., the plot function
# figure out how to plot its argument based upon the argument's
# object type. In this case, gasdata2 is a groupedData
# object, and the default way to handle it is to produce
# a dotplot.
title(main="Dotplot of Gasoline Additive Data")
# side-by-side boxplots are also useful:
plot(octane~addfac,data=gasdata) # this shows another use of the plot function
# it can take a formula as argument and
# figure out what to do
title(main="Boxplots of octane for each level of additive")
# Now fit a one way anova model using the aov() function. The aov() function
# is just a wrapper for the lm() function, which fits linear models of all
# sorts of varieties (regression models, anova models, ancova models).
# That is, aov() calls lm() where the model fitting is actually done. But
# this is convenient because the default reporting of a fitted lm is not as
# suitable for our purposes (not as suitable for an anova model) as the
# default reporting of a fitted aov model
m1 <- aov(octane~addfac,gasdata)
m1 # this "prints" the fitted model
summary(m1) # this summarizes the fitted model.
coefficients(m1) # this gives the actual fitted model parameters
summary.lm(m1) # this summarizes the fitted model treating it as generic lm
# not necessarily an aov model. This is an easy way to get some
# additional useful output.
# now estimate the mean for each level of additive:
lsmeans(m1, specs = ~ addfac)
# a contrast can be tested using the linearHypothesis function from the
# car package. We need to define contrasts we are interested and then
# stack them up as the rows of a matrix which we'll call hypmat1
# A command like
# x <- 3
# assigns 3 to the variable x and produces no output
# If we put parentheses around that assignment, the assignment is done
# but in addition, R prints out the value of the variable after the
# assignment has been executed:
# (x <- 3)
# Still assigns 3 to x, but also prints out 3
( hypmat1 <- matrix( c(0,1,0,0,0, 0,0,1,0,0, 0,0,0,1,0, 0,0,0,0,1),
nrow=4,byrow=T ) )
linearHypothesis(m1,hypothesis.matrix=hypmat1) # main effect of additive
( hypmat2 <- hypmat1[2:4,] )
linearHypothesis(m1,hypothesis.matrix=hypmat2) # nonlinear effects of add
# fit simple linear regression of octane on amount of additive
m2 <- lm (octane~add,data=gasdata)
summary(m2)