setwd("C:/Documents and Settings/William Christensen/My Documents/stat230")

smallfish <- cbind( c(5.4, 5.2, 6.1, 4.8, 5.0, 4.8, 3.9, 4.0, 4.8, 5.4, 4.9, 5.7),
               c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3))
smallfish
tapply(smallfish[,1],smallfish[,2],mean)
tapply(smallfish[,1],smallfish[,2],sd)

boxplot(smallfish[,1] ~ smallfish[,2])


mean(smallfish[,1])
flavormat <- matrix( smallfish[,1], nrow=3, ncol=4, byrow=TRUE)
resids <- flavormat - c(tapply(smallfish[,1],smallfish[,2],mean))  # Using R's vectorizing property
hist(resids)

#OR#
smallfish.lm <- lm(smallfish[,1] ~ factor(smallfish[,2]))
residuals(smallfish.lm)
hist(residuals(smallfish.lm))




## Same EDA, but on the complete fish data set
fish <- read.table("fish.txt", header=TRUE)
# OR #
fish <- read.table(header=TRUE, text="
                   method     aroma flavor texture moisture
                   1         5.4  6.0  6.3  6.7 
                   1         5.2  6.5  6.0  5.8 
                   1         6.1  5.9  6.0  7.0 
                   1         4.8  5.0  4.9  5.0 
                   1         5.0  5.7  5.0  6.5 
                   1         5.7  6.1  6.0  6.6 
                   1         6.0  6.0  5.8  6.0 
                   1         4.0  5.0  4.0  5.0 
                   1         5.7  5.4  4.9  5.0 
                   1         5.6  5.2  5.4  5.8 
                   1         5.8  6.1  5.2  6.4 
                   1         5.3  5.9  5.8  6.0 
                   2         5.0  5.3  5.3  6.5 
                   2         4.8  4.9  4.2  5.6 
                   2         3.9  4.0  4.4  5.0 
                   2         4.0  5.1  4.8  5.8 
                   2         5.6  5.4  5.1  6.2 
                   2         6.0  5.5  5.7  6.0 
                   2         5.2  4.8  5.4  6.0 
                   2         5.3  5.1  5.8  6.4 
                   2         5.9  6.1  5.7  6.0 
                   2         6.1  6.0  6.1  6.2 
                   2         6.2  5.7  5.9  6.0 
                   2         5.1  4.9  5.3  4.8 
                   3         4.8  5.0  6.5  7.0 
                   3         5.4  5.0  6.0  6.4 
                   3         4.9  5.1  5.9  6.5 
                   3         5.7  5.2  6.4  6.4 
                   3         4.2  4.6  5.3  6.3 
                   3         6.0  5.3  5.8  6.4 
                   3         5.1  5.2  6.2  6.5 
                   3         4.8  4.6  5.7  5.7 
                   3         5.3  5.4  6.8  6.6 
                   3         4.6  4.4  5.7  5.6 
                   3         4.5  4.0  5.0  5.9 
                   3         4.4  4.2  5.6  5.5")
fish[,3] ## these are the flavor scores...also called fish$flavor
fish[,1] ## this is the method variable...also called fish$method

mean(fish[,3])
apply(fish[,2:5],2,mean)

head(fish)
tapply(fish[,3],fish[,1],mean)
tapply(fish[,3],fish[,1],sd)
boxplot(fish$flavor)
boxplot(fish[,3] ~ fish[,1])

fish.lm <- lm(fish[,3] ~ factor(fish[,1]))
anova(fish.lm)

hist(fish.lm$residuals)
# this next line does the same thing
hist(residuals(fish.lm))







## FRUIT FLY DATA

fly <- read.table("http://tofu.byu.edu/stat230/fruitfly3.txt",
   col.names=c("fly","companions","type","lifespan","thorax","sleeppercent","group"))
# OR #
fly <- read.table( col.names=c("fly","companions","type","lifespan","thorax","sleeppercent","group"),
                   text="
                   1 8 0 35 0.64 22 preg8
                   2 8 0 37 0.68  9 preg8
                   3 8 0 49 0.68 49 preg8
                   4 8 0 46 0.72  1 preg8
                   5 8 0 63 0.72 23 preg8
                   6 8 0 39 0.76 83 preg8
                   7 8 0 46 0.76 23 preg8
                   8 8 0 56 0.76 15 preg8
                   9 8 0 63 0.76  9 preg8
                   10 8 0 65 0.76 81 preg8
                   11 8 0 56 0.80 12 preg8
                   12 8 0 65 0.80 15 preg8
                   13 8 0 70 0.80 37 preg8
                   14 8 0 63 0.84 24 preg8
                   15 8 0 65 0.84 26 preg8
                   16 8 0 70 0.84 17 preg8
                   17 8 0 77 0.84 14 preg8
                   18 8 0 81 0.84 14 preg8
                   19 8 0 86 0.84  6 preg8
                   20 8 0 70 0.88 25 preg8
                   21 8 0 70 0.88 18 preg8
                   22 8 0 77 0.92 26 preg8
                   23 8 0 77 0.92 24 preg8
                   24 8 0 81 0.92 29 preg8
                   25 8 0 77 0.94 27 preg8
                   1 0 9 40 0.64 18 nocomp
                   2 0 9 37 0.70  6 nocomp
                   3 0 9 44 0.72 19 nocomp
                   4 0 9 47 0.72  7 nocomp
                   5 0 9 47 0.72 16 nocomp
                   6 0 9 47 0.76 13 nocomp
                   7 0 9 68 0.78 35 nocomp
                   8 0 9 47 0.80  2 nocomp
                   9 0 9 54 0.84 35 nocomp
                   10 0 9 61 0.84  6 nocomp
                   11 0 9 71 0.84 15 nocomp
                   12 0 9 75 0.84 14 nocomp
                   13 0 9 89 0.84 18 nocomp
                   14 0 9 58 0.88 50 nocomp
                   15 0 9 59 0.88 25 nocomp
                   16 0 9 62 0.88 10 nocomp
                   17 0 9 79 0.88 33 nocomp
                   18 0 9 96 0.88 43 nocomp
                   19 0 9 58 0.92 35 nocomp
                   20 0 9 62 0.92 17 nocomp
                   21 0 9 70 0.92 27 nocomp
                   22 0 9 72 0.92 22 nocomp
                   23 0 9 75 0.92 16 nocomp
                   24 0 9 96 0.92 20 nocomp
                   25 0 9 75 0.94 37 nocomp
                   1 1 0 46 0.64 23 preg1
                   2 1 0 42 0.68  4 preg1
                   3 1 0 65 0.72 20 preg1
                   4 1 0 46 0.76 42 preg1
                   5 1 0 58 0.76  9 preg1
                   6 1 0 42 0.80 32 preg1
                   7 1 0 48 0.80 66 preg1
                   8 1 0 58 0.80 28 preg1
                   9 1 0 50 0.82 10 preg1
                   10 1 0 80 0.82  4 preg1
                   11 1 0 63 0.84 12 preg1
                   12 1 0 65 0.84 17 preg1
                   13 1 0 70 0.84 12 preg1
                   14 1 0 70 0.84 23 preg1
                   15 1 0 72 0.84 40 preg1
                   16 1 0 97 0.84 18 preg1
                   17 1 0 46 0.88 10 preg1
                   18 1 0 56 0.88 38 preg1
                   19 1 0 70 0.88  7 preg1
                   20 1 0 70 0.88 23 preg1
                   21 1 0 72 0.88 36 preg1
                   22 1 0 76 0.88  9 preg1
                   23 1 0 90 0.88 21 preg1
                   24 1 0 76 0.92 62 preg1
                   25 1 0 92 0.92 36 preg1
                   1 1 1 21 0.68 23 virgin1
                   2 1 1 40 0.68 62 virgin1
                   3 1 1 44 0.72 28 virgin1
                   4 1 1 54 0.76 18 virgin1
                   5 1 1 36 0.78 10 virgin1
                   6 1 1 40 0.80 28 virgin1
                   7 1 1 56 0.80 22 virgin1
                   8 1 1 60 0.80 29 virgin1
                   9 1 1 48 0.84 15 virgin1
                   10 1 1 53 0.84 73 virgin1
                   11 1 1 60 0.84 10 virgin1
                   12 1 1 60 0.84  5 virgin1
                   13 1 1 65 0.84 13 virgin1
                   14 1 1 68 0.84 27 virgin1
                   15 1 1 60 0.88 20 virgin1
                   16 1 1 81 0.88 21 virgin1
                   17 1 1 81 0.88 12 virgin1
                   18 1 1 48 0.90 49 virgin1
                   19 1 1 48 0.90 17 virgin1
                   20 1 1 56 0.90 22 virgin1
                   21 1 1 68 0.90 71 virgin1
                   22 1 1 75 0.90 17 virgin1
                   23 1 1 81 0.90 10 virgin1
                   24 1 1 48 0.92 24 virgin1
                   25 1 1 68 0.92 18 virgin1
                   1 8 1 16 0.64 34 virgin8
                   2 8 1 19 0.64  6 virgin8
                   3 8 1 19 0.68  4 virgin8
                   4 8 1 32 0.72 22 virgin8
                   5 8 1 33 0.72 28 virgin8
                   6 8 1 33 0.74 31 virgin8
                   7 8 1 30 0.76 16 virgin8
                   8 8 1 42 0.76 27 virgin8
                   9 8 1 42 0.76  8 virgin8
                   10 8 1 33 0.78 32 virgin8
                   11 8 1 26 0.80 20 virgin8
                   12 8 1 30 0.80 35 virgin8
                   13 8 1 40 0.82 12 virgin8
                   14 8 1 54 0.82 14 virgin8
                   15 8 1 34 0.84 17 virgin8
                   16 8 1 34 0.84 29 virgin8
                   17 8 1 47 0.84 31 virgin8
                   18 8 1 47 0.84  6 virgin8
                   19 8 1 42 0.88 30 virgin8
                   20 8 1 47 0.88 27 virgin8
                   21 8 1 54 0.88 40 virgin8
                   22 8 1 54 0.88 19 virgin8
                   23 8 1 56 0.88  8 virgin8
                   24 8 1 60 0.88  8 virgin8
                   25 8 1 44 0.92 15 virgin8                   
                   ") 


mean(fly$lifespan)
tapply(fly$lifespan,fly$group,mean)
tapply(fly$lifespan,fly$group,sd)
plot( tapply(fly$lifespan,fly$group,mean) , tapply(fly$lifespan,fly$group,sd) , type ="n", ylim=c(0,20) )  
         ## Above plots the group means against the group sds, with "n" indicating no points plotted....
text( tapply(fly$lifespan,fly$group,mean) , tapply(fly$lifespan,fly$group,sd) , 
   names(tapply(fly$lifespan,fly$group,mean)) , cex=0.75 )
         ## ....instead, put the group labels at each point with font size .75 times the normal size

boxplot(fly$lifespan ~ fly$group, main="Lifespan in days")
boxplot(lifespan ~ group, data=fly)
points( 1:5, tapply(fly$lifespan,fly$group,mean), pch="x", col="red")




## ANOVA

fly.lm <- lm(lifespan ~ group, data=fly)
## OR ##
fly.lm <- lm(fly$lifespan ~ fly$group)
fly.lm
names(fly.lm)

anova(fly.lm)
#summary(fly.lm)



## Quiz Question
xpts <- seq(0,20,.01)
plot(xpts,df(xpts,3-1, 3*10 - 3),
     main="F(2,27) [black] and F(2,27,Phi=1.8619) [red]
     critical value for alpha=.05 noted in green",cex.main=.85,type="l",ylab="density")
abline(h=0,v=0)
abline(v=qf(.95,2,27),col="green")
lines(xpts,df(xpts,2,27, 3*1.8619^2),col="red")



## POWER & SAMPLE SIZE
# This is the laborious way to calculate power
n <- 30
siglevel <- 0.05
numgroups <- 5
alphai <- c(-2,-2,-2,-2,8)    
   # notice that sum(alphai^2) is the same as (5-1)*var(c(0,0,0,0,10))
sigma <- 11.67
Phi2 <- n * sum(alphai^2) / (numgroups * sigma^2)
Phi2
 
fcrit <- qf(1 - siglevel, numgroups-1, numgroups*n - numgroups)
fcrit
power <- 1 - pf(fcrit, numgroups-1, numgroups*n - numgroups, numgroups*Phi2)  
  # Notice that the non-centrality parameter
  #   required by R is numgroups*Phi2
power

xpts <- seq(0,20,.01)
plot(xpts,df(xpts,numgroups-1, numgroups*n - numgroups),
     main="F(4,120) [black] and F(4,120,Phi) [red]",type="l",ylab="density")
lines(xpts,df(xpts,numgroups-1, numgroups*n - numgroups, numgroups*Phi2),col="red")
#lines(density(rf(100000,numgroups-1, numgroups*n - numgroups, 2*numgroups*Phi2)),col="green")
abline(v=fcrit,col="green")
abline(h=0,v=0)


# This is the EASY way to calculate power or sample size #

## Give n and leave power out of list of arguments: get power calculated
power.anova.test(groups=5, between.var=var(c(0,0,0,0,10)), within.var=11.67^2, 
                  sig.level=0.05, n=25)

## Give power and leave n out of list of arguments: get n calculated
power.anova.test(groups=5, between.var=var(c(0,0,0,0,10)), within.var=(11.67)^2, 
    sig.level=0.05, power=.869)

power.anova.test(groups=5, between.var=var(c(0,0,0,0,10)), within.var=11.67^2, 
    sig.level=0.05, power=.995)


## Make a power curve (plot n on the x-axis and power on the y-axis)
junk <- power.anova.test(groups=5, between.var=var(c(0,0,0,0,10)), 
         within.var=11.67^2, sig.level=0.05, n=25)
names(junk)
junk$power  


# Power analysis for example in class
ns <- 2:50
powers <- power.anova.test(groups=5, between.var=var(c(0,0,0,0,10)), 
                           within.var=11.67^2, sig.level=0.05, n=ns)$power
plot(ns,powers,type="l",ylim=c(0,1))

# Power analysis...if we expect one of the groups to be 20 units from the other 4 groups
powersc <- power.anova.test(groups=5, between.var=var(c(0,0,0,0,20)), 
                            within.var=11.67^2, sig.level=0.05, n=ns)$power
lines(ns,powersc,col="brown",lty="dotted")

# Power analysis...if we expect alphas to be -5,0,0,0,5
powersd <- power.anova.test(groups=5, between.var=var(c(-5,0,0,0,5)), 
                            within.var=11.67^2, sig.level=0.05, n=ns)$power
lines(ns,powersd,col="green")

# Power analysis...if we expect copulation groups to have means 10 days longer than non-cop groups
powerse <- power.anova.test(groups=5, between.var=var(c(0,0,0,10,10)), 
                            within.var=11.67^2, sig.level=0.05, n=ns)$power
lines(ns,powerse,col="orange",lwd=2)


