-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyRF.Rmd
146 lines (117 loc) · 5.01 KB
/
myRF.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
```{r include=FALSE}
library(tidyverse)
library(readxl)
library(lme4)
library(cowplot)
library(philentropy)
library(dabestr)
library(rogme)
library(lqmm)
```
# Goals
We have a dataset contain axon diameters of neurons from the optic nerve of control and mutant zebrafish. We'd like to know if the mean axon diameter, or the distribution of axon diameters, differs between groups. We want to implement tests that take account of within and between animal variance.
Here, we focus on the myRF data set.
# Load and format data
```{r message = FALSE}
df <- read_excel("MyRF Axon Diameter Measurements 20240709.xlsx", range = "A3:H328") %>%
pivot_longer(cols = 1:8) %>%
drop_na() %>%
mutate(group = as_factor(ifelse(name %in% c("ON7", "ON12", "ON30", "ON55"), "control", "mutant")),
litter = as_factor(ifelse(name %in% c("ON7", "ON12"), "litter1",
ifelse(name %in% c("ON24-M", "ON25-M", "ON30"), "litter2",
ifelse(name %in% c("ON32-M"), "litter3",
ifelse(name %in% c("ON48-M"), "litter4",
ifelse(name %in% c("ON55"), "litter5", "error")))))),
processed = as_factor(ifelse(litter %in% c("litter1", "litter2"),"processed1", "processed2")))
df$group <- as.factor(df$group)
df$name <- as.factor(df$name)
```
# Plot the data
Focus here on plot of individual mice, colour coded by group.
```{r}
(plot_by_id <- ggplot(data = df, aes(name, value)) +
geom_violin(aes(colour = group), draw_quantiles = c(0.25, 0.5, 0.75)) +
theme_cowplot(font_size = 14) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = 'Animal', y = 'Diameter (\u00B5 m)', colour = "Group"))
ggsave('Plots/violins.jpeg', plot_by_id)
```
# Tests for differences in means
```{r}
mm <- lmer(value ~ group + (1 | name), data = df)
mm_null <- lmer(value ~ (1 | name), data = df)
summary(mm)
anova(mm, mm_null)
```
```{r}
mm_t <- lmer(log(value) ~ group + (1 | name), data = df)
mm_t_null <- lmer(log(value) ~ (1 | name), data = df)
summary(mm_t)
anova(mm_t, mm_t_null)
```
# Test again with more complicated random effects stuctures
```{r}
mm_t_2 <- lmer(log(value) ~ group + (1 | litter/name), data = df)
mm_t_null_2 <- lmer(log(value) ~ (1 | litter/name), data = df)
summary(mm_t_2)
anova(mm_t_2, mm_t_null_2)
mm_t_3 <- lmer(log(value) ~ group + (1 | processed/name), data = df)
mm_t_null_3 <- lmer(log(value) ~ (1 | processed/name), data = df)
summary(mm_t_3)
anova(mm_t_2, mm_t_null_3)
```
# Evaluate residuals
```{r fig.width = 7, fig.height = 2}
df$residuals_mm_t <- resid(mm_t)
df$fitted_mm_t <- fitted(mm_t)
(res_t_plot <- ggplot(data = df, aes(x = fitted_mm_t, y = residuals_mm_t)) +
geom_point(aes(colour = group)) +
theme_cowplot())
```
# Compare residual distributions between groups, for the untransformed data first and then for the log-transformed data.
```{r Compare distribtions}
### We could use a KS test:
ks.test(residuals_mm_t ~ group, data = df)
```
This suggests differences between groups in the variance, but doesn't account for within vs between subject effects in the residuals. But worry here is that we don't treat animals as indepwndent. Instead calculate SD for each animal and compare groups:
```{r}
gr_t <- df %>% group_by(group, name) %>% summarize(avg_r = sd(residuals_mm_t))
t.test(avg_r ~ group, data = gr_t)
```
Hard to interpret because group sizes are small. No firm evidence for difference in distributions.
Generate histograms for all animals
```{r make histograms}
### Make histograms for each animal
### Use log transformed data
names = unique(df$name)
### If submean = 1 then will substract means before making histograms
histfun <- function(name, df, submean = 0) {
sub <- ifelse(submean == 1, mean(df$value[df$name==name]), 0)
hist(log(df$value[df$name==name]-sub), seq(-2,2,0.1), plot = FALSE)
}
hists <- sapply(names, histfun, df, submean=0)
### Convert results to tibble for use with tidyverse functions
rns <- rownames(hists)
hists_tib <- as_tibble(hists) %>%
rownames_to_column(var = "rowname") %>%
pivot_longer(-rowname, names_to = "column", values_to = "value") %>%
pivot_wider(names_from = rowname, values_from = value)
colnames(hists_tib) <- c("animal", rns)
ggconvfun <- function(mids, density) {
gghist <- cbind(mids, density)
ggplot(gghist, aes(mids, density)) +
geom_col() +
theme_cowplot()
}
gghists <- map2(hists_tib$mids, hists_tib$density, ggconvfun)
plot_grid(plotlist = gghists)
```
# Evaluate distributions
```{r}
fit.lqmm <- lqmm(fixed = value ~ group, random = ~1, group = name, tau = c(0.25,0.5, 0.75), nK = 7, type = "normal", data = df, control = c(method = "df", LP_max_iter = 5000))
summary(fit.lqmm, R = 500, seed = 2)
```
```{r}
fit.log.lqmm <- lqmm(fixed = log(value) ~ group, random = ~1, group = name, tau = c(0.25,0.5, 0.75), nK = 7, type = "normal", data = df, control = c(method = "df", LP_max_iter = 5000))
summary(fit.log.lqmm, R = 500, seed = 2)
```