-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path000_gxeonly_models.R
More file actions
165 lines (145 loc) · 8.25 KB
/
000_gxeonly_models.R
File metadata and controls
165 lines (145 loc) · 8.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
##Fit models with random effects of mom, population, and mom and population in soil treatment
##Testing also whether adding fixed effects of environmental variation explains population effects
library(MCMCglmm)
range01=function(x){
newnums <- sapply(1:length(x), function(z) ifelse(!is.na(x[z]), (x[z]-min(x,na.rm=T))/(max(x,na.rm=T)-min(x,na.rm=T)), NA ))
return(newnums)
}
fitmodelset <- function(traitcols,mainvarformula,dataframe) { #names of trait variables in data frame, fixed effects formula in text e.g. "~ 1", "~elevation", and data
r2fp <- list()
rbase <- list()
rpxs <- list()
rfxs <- list()
rfxspxs <- list()
rand2fp <- as.formula("~ dam + dam.pop")
randbase <- as.formula("~ dam.pop + dam + soiltrt")
randpxs <- as.formula("~ dam.pop + dam + soiltrt + dam.pop:soiltrt")
randfxs <- as.formula("~ dam.pop + dam + soiltrt + dam:soiltrt")
randfxspxs <- as.formula("~ dam.pop + dam + soiltrt + dam:soiltrt + dam.pop:soiltrt")
for(i in 1:length(traitcols)){
mainform <- as.formula(paste(traitcols[i], mainvarformula,sep=""))
r2fp[[i]] <- lapply(1:10, function(z)
MCMCglmm(mainform, random=rand2fp, family="gaussian", data=dataframe,verbose=FALSE,pr=TRUE,nitt=100000, thin=100, burnin=20000))
rbase[[i]] <- lapply(1:10, function(z)
MCMCglmm(mainform, random=randbase, family="gaussian", data=dataframe,verbose=FALSE,pr=TRUE,nitt=100000, thin=100, burnin=20000))
rpxs[[i]] <- lapply(1:10, function(z)
MCMCglmm(mainform, random=randpxs, family="gaussian", data=dataframe,verbose=FALSE,pr=TRUE,nitt=100000, thin=100, burnin=20000))
rfxs[[i]] <- lapply(1:10, function(z)
MCMCglmm(mainform, random=randfxs, family="gaussian", data=dataframe,verbose=FALSE,pr=TRUE,nitt=100000, thin=100, burnin=20000))
rfxspxs[[i]] <- lapply(1:10, function(z)
MCMCglmm(mainform, random=randfxspxs, family="gaussian",data=dataframe,verbose=FALSE,pr=TRUE,nitt=100000, thin=100, burnin=20000))
}
clusterlist <- list(r2fp,rbase,rpxs,rfxs,rfxspxs)
return(clusterlist)
}
mod.summs <- function(filepath,ntraits,nmodtype,nreps) { #lazy issue in that filepath is to saved rdata file with "clusterlist" object, structured exactly as the environmental ones
load(filepath)
diccl <- c()
pval <- c()
slope <- c()
counter <- 1
for(k in 1:ntraits){#traits inside lists of model type
for(j in 1:nmodtype){#model type is the first level of list
for(i in 1:nreps){#repeats innermost level
diccl[counter] <- clusterlist[[j]][[k]][[i]]$DIC
pval[counter] <- summary(clusterlist[[j]][[k]][[i]])$solutions[2,5]
slope[counter] <- summary(clusterlist[[j]][[k]][[i]])$solutions[2,1]
counter <- counter +1
}
}
}
arraydiccl<-array(diccl,dim=c(nreps,nmodtype,ntraits))#fills first array first(last dimension), first row first(first dimension)
arraypval <-array(pval,dim=c(nreps,nmodtype,ntraits))
arrayslp <-array(slope,dim=c(nreps,nmodtype,ntraits))
rm(clusterlist)
return(list(arraydiccl,arraypval,arrayslp))
}
####READ IN DATA
soils <- read.csv("~/GenoPheno/covmat.csv",stringsAsFactors=F,header=T)
allpheno <- read.csv("~/GenoPheno/phenomat.csv",stringsAsFactors=F,header=T)
allped <- read.csv("~/GenoPheno/pedmat.csv",stringsAsFactors=F,header=T)
Apopenvdat <-read.csv("AlphabeticalPopEnvDat.csv",stringsAsFactors=F,header=T)
greens <- (allpheno$green)/(allpheno$red + allpheno$blue + allpheno$green)
#swaps colors to ratios with total brightness
allpheno <- allpheno[,-c(3,10:12,14,16)]#cuts some traits (some colinear, some non-normal, some invariant), leaves 9 traits
allpheno$green <- greens
allpheno$germday <- allpheno$germday - 16 # is in days since july 1st, and later want it in Days to Germination, so subtract planting date
colnames(allpheno)[1]<- "ID"
cpheno <- scale(allpheno[,-1]) #center and scale
#soil treatment varible
soiltrt <- c()
soiltrt[which(soils$treatment==3)] <- "mt"
soiltrt[which(soils$treatment==5)] <- "mc"
soiltrt[which(soils$treatment==7)] <- "tc"
#soil trt and ped numbers reflect order in which populations were SAMPLED, not alphabet.
#redefining levels in pedigree file to match other files
allped$sire.pop[which(allped$sire.pop=="ml")]="m"
allped$dam.pop[which(allped$dam.pop=="ml")]="m"#"m"is 5th in the factorized, malinalco in the coancestry matrix is row/column 5. "ML" would come out 6th.
allped$dam.pop<-as.numeric(as.factor(allped$dam.pop))
allped$sire.pop<- as.numeric(as.factor(allped$sire.pop))
#now ped pop numbers reflect alphabet, although dams still reflect field sample order, and sire #s are unrelated and unique for all
# seeds in the same biota are extremely unlikely to share a father, since they come from different infructescences
# note that between biota, seeds sharing a mom may also share a father, since they often come from the same set of infructescences in each biota, but this is not reflected in the pedigree
#all analyses downstream that rely on relatedness are conducted separately across biota treatments for this reason.
envdat <- Apopenvdat[allped$dam.pop,]
cenvdat <- scale(envdat)
#since dam.pop now reflects alphabetical order, can pull out env info this way
# ####MODELS
alldat <- cbind(cpheno,allped,soiltrt,cenvdat)
alldat$dam <- as.character(alldat$dam)
alldat$dam.pop <- as.character(alldat$dam.pop)
traitcols <- c("flowerday","germday","espigalength","shoot","root","finalheight","finalstemw","width3leaf","green")#functional names, matching phenotype data frame
##INTERCEPT
clusterlist <- fitmodelset(traitcols,"~1",alldat)
save(clusterlist,file="~/GenoPheno/traitgxemodelsINT.Rdata")
# ELEVATION
clusterlist <- fitmodelset(traitcols,"~ elevation",alldat)
save(clusterlist,file="~/GenoPheno/traitgxemodelsELEV.Rdata")
#MAT
clusterlist <- fitmodelset(traitcols,"~ TAnn",alldat)
save(clusterlist,file="~/GenoPheno/traitgxemodelsTAnn.Rdata")
# Pann
clusterlist <- fitmodelset(traitcols,"~ Pann",alldat)
save(clusterlist,file="~/GenoPheno/traitgxemodelsPann.Rdata")
#SWC
clusterlist <- fitmodelset(traitcols,"~ CapacidadDeCampoPercent",alldat)
save(clusterlist,file="~/GenoPheno/traitgxemodelsSWC.Rdata")
traitcols <- c("flowerday","germday","espigalength","shoot","root","finalheight","finalstemw","width3leaf","green")#functional names, matching phenotype data frame
load("~/GenoPheno/traitgxemodelsINT.Rdata")
diccl <- c()
counter <- 1
for(k in 1:9){#traits inside lists of model type
for(j in 1:5){#model type is the first level of list
for(i in 1:10){#repeats innermost level
diccl[counter] <- clusterlist[[j]][[k]][[i]]$DIC
counter <- counter +1
}
}
}
rm(clusterlist)
arraydicclI<-array(diccl,dim=c(10,5,9))#fills first array first(last dimension), first row first(first dimension)
MATmods <- mod.summs("~/GenoPheno/traitgxemodelsTAnn.Rdata",9,5,10)
TAPmods <- mod.summs("~/GenoPheno/traitgxemodelsPann.Rdata",9,5,10)
SWCmods <- mod.summs("~/GenoPheno/traitgxemodelsSWC.Rdata",9,5,10)
ELVmods <- mod.summs("~/GenoPheno/traitgxemodelsELEV.Rdata",9,5,10)
traitcols <- c("flowerday","germday","espigalength","shoot","root","finalheight","finalstemw","width3leaf","green")#functional names, matching phenotype data frame
# ##SUMMARY
randtypes <- c("fp","fps","fpspxs","fpsfxs","fpspxsfxs")#same order as listed in for the cluster list
dicdf <- rbind( data.frame(apply(arraydicclI,c(2,3),mean)), data.frame(apply(MATmods[[1]],c(2,3),mean)), data.frame(apply(TAPmods[[1]],c(2,3),mean)),
data.frame(apply(SWCmods[[1]],c(2,3),mean)), data.frame(apply(ELVmods[[1]],c(2,3),mean)) )
colnames(dicdf) <- paste(traitcols,"DIC",sep=".")
pvaldf <- rbind(data.frame(matrix(NA,ncol=9,nrow=5)), data.frame(apply(MATmods[[2]],c(2,3),mean)), data.frame(apply(TAPmods[[2]],c(2,3),mean)),
data.frame(apply(SWCmods[[2]],c(2,3),mean)), data.frame(apply(ELVmods[[2]],c(2,3),mean)) )
colnames(pvaldf) <- paste(traitcols,"pval",sep=".")
slopedf <- rbind(data.frame(matrix(NA,ncol=9,nrow=5)), data.frame(apply(MATmods[[3]],c(2,3),mean)), data.frame(apply(TAPmods[[3]],c(2,3),mean)),
data.frame(apply(SWCmods[[3]],c(2,3),mean)), data.frame(apply(ELVmods[[3]],c(2,3),mean)) )
colnames(slopedf) <- paste(traitcols,"slp",sep=".")
resdf <- data.frame(cbind(dicdf,pvaldf,slopedf))
resdf$envvar <- rep(c("I","MAT","TAP","SWC","ELV"), each = nrow(resdf)/5 )
resdf$randtype <- rep(randtypes, times = 5 )
write.csv(resdf,"~/GenoPheno/04_02_gxeresults.csv",row.names=F)
sink("~/GenoPheno/04_02_gxeresults.txt")
for(i in 1:9){
print(head(resdf[order(resdf[,i]),c(i,i+9,i+18,ncol(resdf)-1,ncol(resdf))]))
}
sink()