myRF.Rmd

```{r include=FALSE}
library(tidyverse)
library(readxl)
library(lme4)
library(cowplot)
library(philentropy)
library(dabestr)
library(rogme)
library(lqmm)
```

# Goals
We have a dataset contain axon diameters of neurons from the optic nerve of control and mutant zebrafish. We'd like to know if the mean axon diameter, or the distribution of axon diameters, differs between groups. We want to implement tests that take account of within and between animal variance.

Here, we focus on the myRF data set.

# Load and format data
```{r message = FALSE}
df <- read_excel("MyRF Axon Diameter Measurements 20240709.xlsx", range = "A3:H328") %>%
    pivot_longer(cols = 1:8) %>%
    drop_na() %>%
    mutate(group = as_factor(ifelse(name %in% c("ON7", "ON12", "ON30", "ON55"), "control", "mutant")),
           litter = as_factor(ifelse(name %in% c("ON7", "ON12"), "litter1",
                              ifelse(name %in% c("ON24-M", "ON25-M", "ON30"), "litter2",
                                                 ifelse(name %in% c("ON32-M"), "litter3",
                                                 ifelse(name %in% c("ON48-M"), "litter4",
                                                 ifelse(name %in% c("ON55"), "litter5", "error")))))),
           processed = as_factor(ifelse(litter %in% c("litter1", "litter2"),"processed1", "processed2")))

df$group <- as.factor(df$group)
df$name <- as.factor(df$name)
```

# Plot the data
Focus here on plot of individual mice, colour coded by group.
```{r}
(plot_by_id <- ggplot(data = df, aes(name, value)) +
     geom_violin(aes(colour = group), draw_quantiles = c(0.25, 0.5, 0.75)) +
     theme_cowplot(font_size = 14) +
     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(x = 'Animal', y = 'Diameter (\u00B5 m)', colour = "Group"))

ggsave('Plots/violins.jpeg', plot_by_id)
```


# Tests for differences in means
```{r}
mm <- lmer(value ~ group + (1 | name), data = df)
mm_null <- lmer(value ~ (1 | name), data = df)
summary(mm)
anova(mm, mm_null)
```

```{r}
mm_t <- lmer(log(value) ~ group + (1 | name), data = df)
mm_t_null <- lmer(log(value) ~ (1 | name), data = df)
summary(mm_t)
anova(mm_t, mm_t_null)
```

# Test again with more complicated random effects stuctures
```{r}
mm_t_2 <- lmer(log(value) ~ group + (1 | litter/name), data = df)
mm_t_null_2 <- lmer(log(value) ~ (1 | litter/name), data = df)
summary(mm_t_2)
anova(mm_t_2, mm_t_null_2)

mm_t_3 <- lmer(log(value) ~ group + (1 | processed/name), data = df)
mm_t_null_3 <- lmer(log(value) ~ (1 | processed/name), data = df)
summary(mm_t_3)
anova(mm_t_2, mm_t_null_3)
```

# Evaluate residuals
```{r fig.width = 7, fig.height = 2}
df$residuals_mm_t <- resid(mm_t)
df$fitted_mm_t <- fitted(mm_t)
(res_t_plot <- ggplot(data = df, aes(x = fitted_mm_t, y = residuals_mm_t)) +
    geom_point(aes(colour = group)) +
    theme_cowplot())
```

# Compare residual distributions between groups, for the untransformed data first and then for the log-transformed data. 
```{r Compare distribtions}
### We could use a KS test:
ks.test(residuals_mm_t ~ group, data = df)
```
This suggests differences between groups in the variance, but doesn't account for within vs between subject effects in the residuals.  But worry here is that we don't treat animals as indepwndent.  Instead calculate SD for each animal and compare groups:

```{r}
gr_t <- df %>% group_by(group, name) %>% summarize(avg_r = sd(residuals_mm_t))
t.test(avg_r ~ group, data = gr_t)
```
Hard to interpret because group sizes are small. No firm evidence for difference in distributions.


Generate histograms for all animals
```{r make histograms}
### Make histograms for each animal
### Use log transformed data
names = unique(df$name)
### If submean = 1 then will substract means before making histograms 
histfun <- function(name, df, submean = 0) {
    sub <- ifelse(submean == 1, mean(df$value[df$name==name]), 0)
    hist(log(df$value[df$name==name]-sub), seq(-2,2,0.1), plot = FALSE)
}
hists <- sapply(names, histfun, df, submean=0)

### Convert results to tibble for use with tidyverse functions
rns <- rownames(hists)
hists_tib <- as_tibble(hists) %>%
    rownames_to_column(var = "rowname") %>%
    pivot_longer(-rowname, names_to = "column", values_to = "value") %>%
    pivot_wider(names_from = rowname, values_from = value)
colnames(hists_tib) <- c("animal", rns)

    
ggconvfun <- function(mids, density) {
    gghist <- cbind(mids, density)
    ggplot(gghist, aes(mids, density)) +
        geom_col() +
        theme_cowplot()
}

gghists <- map2(hists_tib$mids, hists_tib$density, ggconvfun)

plot_grid(plotlist = gghists)
```


# Evaluate distributions
```{r}
fit.lqmm <- lqmm(fixed = value ~ group, random = ~1, group = name, tau = c(0.25,0.5, 0.75), nK = 7, type = "normal", data = df, control = c(method = "df", LP_max_iter = 5000))
summary(fit.lqmm, R = 500, seed = 2)
```

```{r}
fit.log.lqmm <- lqmm(fixed = log(value) ~ group, random = ~1, group = name, tau = c(0.25,0.5, 0.75), nK = 7, type = "normal", data = df, control = c(method = "df", LP_max_iter = 5000))
summary(fit.log.lqmm, R = 500, seed = 2)
```