***************************
* onecell1.sas ************
***************************;
* Example involving a four-way layout with a one-cel interaction;
* Considered in Lab 8;
*ods pdf file="mypath\onecell1.pdf";
ods pdf file="U:\Documents\courses\sta820\Fall13\onecell1.pdf";
options ls=78 nodate pageno=1 formdlim="*";
data one;
input A B C D y;
if (a=1 and b=1 and c=1 and d=1) then all_low=1;
else all_low=0;
cards;
1 1 1 1 26.1
1 1 1 1 27.5
1 1 1 2 23.5
1 1 1 2 21.1
1 1 2 1 22.8
1 1 2 1 23.8
1 1 2 2 30.6
1 1 2 2 32.5
1 2 1 1 22
1 2 1 1 20.2
1 2 1 2 28.1
1 2 1 2 29.9
1 2 2 1 30
1 2 2 1 29.3
1 2 2 2 38.3
1 2 2 2 38.5
2 1 1 1 11.4
2 1 1 1 11
2 1 1 2 20.4
2 1 1 2 22
2 1 2 1 22.3
2 1 2 1 20.2
2 1 2 2 28.7
2 1 2 2 28.8
2 2 1 1 18.9
2 2 1 1 16.4
2 2 1 2 26.6
2 2 1 2 26.5
2 2 2 1 29.6
2 2 2 1 29.8
2 2 2 2 34.5
2 2 2 2 34.9
;
run;
* First just analyze data from the three-way experiment that you get by omitting the data from
the D=2 (high) condition. This is just to keep things relatively simple (to have a 3-way layout
rather than a 4-way);
title 'Full 3-way ANOVA Model to data from D=1 (low)';
proc glm data=one;
where D=1; * the WHERE statement restricts the PROC to only the data satisfying the specified condition;
class A B C;
model y=A|B|C;
lsmeans a*b*c/out=trtmeans;
run;
* There's a significant three-way interaction.
This means the B*C profile plot looks different when A=1 than when A=2.
Let's look at those plots to see:
;
proc sort data=trtmeans;
by A;
run;
goptions reset=all;
symbol1 value=square color=black interpol=join;
symbol2 value=circle color=red interpol=join;
symbol3 value=star color=blue interpol=join;
symbol4 value=triangle color=green interpol=join;
title 'Profile plots (aka interaction plots) of joint means';
proc gplot data=trtmeans;
by A;
plot lsmean*B=C/vaxis=10 to 30 by 5;;
run;quit;
* OK. Now that we've learned something about 3-way interactions,
let's tackle the full data set
;
title 'Full 4-way ANOVA Model to entire data set';
proc glm data=one;
class A B C D;
model y=A|B|C|D;
lsmeans a*b*c*d/out=trtmeans2;
run;
*The significant 4-way interaction here means that the three-way interaction
between A, B and C looks different when D=low than when D=high. That's really
hard to wrap your mind around.
One thing that can be helpful is to collapse two factors down to one.
E.g, instead of thinking of A and B as two separate factors with 2 and 2 levels
respectively, think of the 4 combinations of A and B as four levels of a single
factor "AB". Do the same for a factor "CD". Then draw an "AB" by "CD" profile
plot to try and see what is going on in our 4 way interaction.
;
data trtmeans3;
set trtmeans2;
AB=10*A+B; * AB now has levels 11, 12, 21, 22;
CD=10*C+D; * CD now has levels 11, 12, 21, 22;
if ab>19 then ab=ab-18;
else ab=ab-10; * AB now has levels 1, 2, 3, 4;
if cd>19 then cd=cd-18;
else cd=cd-10; * CD now has levels 1, 2, 3, 4;
run;
title 'Estimated joint means from 4-way model';
proc print data=trtmeans3;
run;
goptions reset=all;
symbol1 value=square color=black interpol=join;
symbol2 value=circle color=red interpol=join;
symbol3 value=star color=blue interpol=join;
symbol4 value=triangle color=green interpol=join;
title 'Profile plots (aka interaction plots) of joint means';
proc gplot data=trtmeans3;
plot lsmean*AB=CD;
run;quit;
*
Notice that there is a one-cell interaction here.
If it weren't for the data from AB=CD=1 (A=1, B=1, C=1, D=1)
then the profiles look like they'd be parallel.
It looks like a model which assumes the factor effects are additive except
for that one treatment (all factors at their low level) would fit
the data addequately.
Fit that model, then the adequacy of the aditive plus one cell model versus
the full 4-way model can be compared via the principle of conditional
error (F test of nested models);
title 'Additive main effects model plus once cell interaction - all treatments';
proc glm data=one;
class a b c d;
model y=a b c d all_low; * all_low is an indicator (dummy variable) = 1 if A=B=C=D=1, = 0 otherwise;
output out=pred p=pred stdp=sepred;
run;
*
Principle of conditional error F test:
F= [(SSE(partial)-SSE(full))/(dfE(partial)-dfE(full))]/MSE(full)
= [(33.21-16.47)/(26-16)]/1.029
=1.63
Compare against F_.05(10,16)=2.49
Simpler model is adequate.
Factor effects are additive except for the all factors at low level treatment;
title 'Predicted values (treatment means) from one-cell model';
proc print data=pred;
run;
data pred2;
set pred;
if _N_/2 ne floor(_N_/2); *delete duplicate predictions, so we have one estimated mean/treatment;
AB=10*A+B; * AB now has levels 11, 12, 21, 22;
CD=10*C+D; * CD now has levels 11, 12, 21, 22;
if ab>19 then ab=ab-18;
else ab=ab-10; * AB now has levels 1, 2, 3, 4;
if cd>19 then cd=cd-18;
else cd=cd-10; * CD now has levels 1, 2, 3, 4;
run;
title 'Treatment means (again) from one-cell model.'; * same data as in pred, but I've now deleted extra rows
so that there's just one estimated treatment mean per treatment;
proc print data=pred2;
run;
goptions reset=all;
symbol1 value=square color=black interpol=join;
symbol2 value=circle color=red interpol=join;
symbol3 value=star color=blue interpol=join;
symbol4 value=triangle color=green interpol=join;
title 'Profile plots from one-cell interaction model';
proc gplot data=pred2;
plot pred*AB=CD;
run;quit;
ods pdf close;