# qr1.R shows the perils of the (X'X)inv *X'y formula
# and illustrates how to obtain the qr decomposition
longley <- read.table(file="longley.dat",header=T)
# for info on this data set,
# see http://itl.nist.gov/div898/strd/lls/data/Longley.shtml
# this dataset is illconditioned so that the (X'X)inv X'y formula
# works poorly.
longley[1:3,]
nrow(longley)
library(MASS) #needed for the ginv() function
#(generalized inverse of a matrix)
X <-model.matrix(~x1+x2+x3+x4+x5+x6,data=longley)
# the above statement could also be accomplished via the command
# X <- as.matrix(longley[,2:7])
y <- longley$y
# a bad way to get betahat
betahat1 <- ginv(t(X)%*%X)%*%t(X)%*%y
# a good way to get betahat. The lm() function actually uses the qr
# decomposition to do the calculations
betahat2 <- coef(lm(y~x1+x2+x3+x4+x5+x6,data=longley))
# alternatively, here are the steps in the qr decomposition method
# of fitting the linear model that are "behind the scenes" in the
# lm() function
Xqr <- qr(X)
X.Q1 <- qr.Q(Xqr)
X.R1 <- qr.R(Xqr)
X.Q1
X.R1
betahat3 <- backsolve(X.R1,t(X.Q1)%*%y)
# compare betahats
cbind(betahat1,betahat2,betahat3)
# compare X*betahats (least squares fitted values, i.e., estimated means)
cbind(X%*%betahat1,X%*%betahat2,X%*%betahat3)
# for a more detailed comparison of algorithms for least-squares calculations
# in R, see the following webpage:
# http://cran.r-project.org/web/packages/Matrix/vignettes/Comparisons.pdf