-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfigure_3.R
155 lines (124 loc) · 6.95 KB
/
figure_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#read in the census table
if(!exists("rebuilt_amino")){
rebuilt_amino <- fread("~/mahmood/binom/analyzed data/codon_substitution_table.csv")
}
rebuilt_amino_observed = rebuilt_amino[AminoCountCosmicPan != 0 & SubType %in% c("Missense","Nonsense", "Silent")]
#make the boxplot for all
all <- ggplot(subset(rebuilt_amino,SubType %in% c("Missense","Nonsense", "Silent")), aes(x=group, y=log10(AminoMutabilityPan),fill = SubType)) +
geom_boxplot() +
facet_grid(. ~ SubType) +
theme(axis.title = element_blank(),legend.position = "none") +
scale_fill_manual(values = c("#00BFC4","#F8766D","#7CAE00")) +
#make the labes be positive
scale_y_continuous(breaks=c(-6.5, -6,-5.5,-5),labels = c("6.5","6","5.5","5")) +
#set the size of the axis label
theme(axis.title = element_text(face = "bold")) +
#et the size of the axis tick mark font
theme(axis.title.x = element_blank()) +
ylab(expression(bold(paste("-Log"[10], "Mutability"))))
#make a cdf plot of all the possible mutations splitting it by the SubType either missense, nonsense or silent
type_cdf <- ggplot(rebuilt_amino_observed, aes(x = log10(AminoMutabilityPan), color = SubType)) +
#set the color manuall so that miss == Blue, Nonsense == Red, and silent == green
scale_color_manual(values = c("#00BFC4","#F8766D","#7CAE00")) +
#set the size of the axis label
theme(axis.title = element_text(face = "bold")) +
#et the size of the axis tick mark font
ylab("Cumulative Probability") +
#xlab(expression(bold(paste("-Log"[10], "Mutability")))) +
#put the legend in the bottom right corner and set the size of the text
theme(
legend.position = c(.95, .25),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.title = element_blank()) +
#change the legend title
#lims(x = c(0,max(log10(rebuilt_amino$mutability)))) +
#make the ecdf and make the size of the lines a bit thicker
stat_ecdf(size = 3) +
scale_x_continuous(breaks=c(-6.5, -6,-5.5,-5),labels = c("6.5","6","5.5","5")) +
#forces the legend samples to be much larger
guides(colour = guide_legend(override.aes = list(size=5))) + theme(axis.title.x = element_blank())
#this is for where to draw the lines
y1 = -4.9
y2 = - 4.78
mbp <- ggplot(rebuilt_amino_observed, aes(y = log10(AminoMutabilityPan),x = SubType, fill = SubType)) +
geom_violin() +
scale_fill_manual(values = c("#00BFC4","#F8766D","#7CAE00")) +
theme(legend.position="none",axis.text.x=element_blank(), axis.title.x = element_blank()) +
#betwen silent and missense
geom_segment(aes(x = 1, y = y2, xend = 3, yend = y2),size = 1) +
annotate("text", x = 2, y = y2 + 0.01, label = "**", size = 5) +
#between missense and nonsense
geom_segment(aes(x = 1.1, y = y1, xend = 1.9, yend = y1),size = 1) +
annotate("text", x = 1.5, y = y1 + 0.01, label = "**", size = 5) +
#between nonsense and silent
geom_segment(aes(x = 2.1, y = y1, xend = 2.9, yend = y1),size = 1) +
annotate("text", x = 2.5, y = y1 + 0.01, label = "**", size = 5) +
ylab(expression(bold(paste("-Log"[10], "Mutability")))) +
xlab(NULL) +
scale_y_continuous(breaks=c(-6.5, -6,-5.5,-5),labels = c("6.5","6","5.5","5")) + theme(aspect.ratio = 1)
#add the mutability boxplot as a custom grob
type_cdf <- type_cdf + annotation_custom(ggplotGrob(mbp),xmin = -6.9, xmax = -5.8,ymin = 0.35)
aminoacid_plot <- plot_grid(type_cdf,all,align = "h",axis = 'b',labels = c("A","B"))
# ###for tests
#correlation between mutability and frequency
rebuilt_amino_observed$SubType <- as.factor(rebuilt_amino_observed$SubType)
kruskal.test(rebuilt_amino_observed$AminoMutabilityPan, rebuilt_amino_observed$SubType)
f <- dunn.test::dunn.test(rebuilt_amino_observed$AminoMutabilityPan, rebuilt_amino_observed$SubType)
#read in the census table
if(!exists("rebuilt_nuc")){
rebuilt_nuc = fread("/Users/browna6/mahmood/binom/analyzed\ data/nucleotide_mutation_table.csv")
}
rebuilt_nuc_observed <- rebuilt_nuc[CosmicPanCount != 0 & SubType %in% c("Missense","Nonsense", "Silent")]
#make the boxplot for all
all_nuc <- ggplot(subset(rebuilt_nuc,SubType %in% c("Missense","Nonsense", "Silent")),aes(x=CosmicPanGroup, y=log10(mutabilityPan),fill = SubType)) +
geom_boxplot() +
facet_grid(. ~ SubType) +
theme(axis.title = element_blank(),legend.position = "none") +
scale_fill_manual(values = c("#00BFC4","#F8766D","#7CAE00")) +
scale_y_continuous(breaks=c(-6.5, -6,-5.5,-5),labels = c("6.5","6","5.5","5")) +
#set the size of the axis label
theme(axis.title = element_text(face = "bold")) +
#et the size of the axis tick mark font
xlab("Observed Mutation Frequency") +
ylab(expression(bold(paste("-Log"[10], "Mutability"))))
#make a cdf plot of all the observed mutations splitting it by the SubType either missense, nonsense or silent
type_cdf_nuc <- ggplot(rebuilt_nuc_observed, aes(x = log10(mutabilityPan), color = SubType)) +
#set the color manuall so that miss == Blue, Nonsense == Red, and silent == green
scale_color_manual(values = c("#00BFC4","#F8766D","#7CAE00")) +
#set the size of the axis label
theme(axis.title = element_text(face = "bold")) +
#et the size of the axis tick mark font
ylab("Cumulative Probability") +
xlab(expression(bold(paste("-Log"[10], "Mutability")))) +
theme(legend.position = "none") +
#lims(x = c(0,max(log10(rebuilt_amino$mutability)))) +
#make the ecdf and make the size of the lines a bit thicker
stat_ecdf(size = 3) +
scale_x_continuous(breaks=c(-6.5, -6,-5.5,-5),labels = c("6.5","6","5.5","5"))
y1 = -4.9
y2 = - 4.7
mbp_nuc <- ggplot(rebuilt_nuc_observed, aes(y = log10(mutabilityPan),x = SubType, fill = SubType)) +
geom_violin() +
scale_fill_manual(values = c("#00BFC4","#F8766D","#7CAE00")) +
theme(legend.position="none",axis.text.x=element_blank(), axis.title.x = element_blank()) +
#betwen silent and missense
geom_segment(aes(x = 1, y = y2, xend = 3, yend = y2),size = 1) +
annotate("text", x = 2, y = y2 + 0.01, label = "**", size = 5) +
#between missense and nonsense
geom_segment(aes(x = 1.1, y = y1, xend = 1.9, yend = y1),size = 1) +
annotate("text", x = 1.5, y = y1 + 0.01, label = "**", size = 5) +
#between nonsense and silent
geom_segment(aes(x = 2.1, y = y1, xend = 2.9, yend = y1),size = 1) +
annotate("text", x = 2.5, y = y1 + 0.01, label = "**", size = 5) +
ylab(expression(bold(paste("-Log"[10], "Mutability")))) +
xlab(NULL) +
scale_y_continuous(breaks=c(-6.5, -6,-5.5,-5),labels = c("6.5","6","5.5","5")) + theme(aspect.ratio = 1)
#add the boxplot as a custom grob
type_cdf_nuc <- type_cdf_nuc + annotation_custom(ggplotGrob(mbp_nuc),xmin = -6.9, xmax = -5.8,ymin = 0.35)
nucleotide_plot <- plot_grid(type_cdf_nuc,all_nuc,align = "h",axis = 'b',labels = c("C","D"))
ggarrange(aminoacid_plot,nucleotide_plot, nrow = 2)
# ###for tests
# rebuilt_nuc_observed$SubType <- as.factor(rebuilt_nuc_observed$SubType)
# kruskal.test(rebuilt_nuc_observed$mutabilityPan, rebuilt_nuc_observed$SubType)
# test_result = dunn.test::dunn.test(rebuilt_nuc_observed$mutabilityPan, rebuilt_nuc_observed$SubType)