forked from xialuoke4062/FCBB2019Spring
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproject_lc.Rmd
128 lines (113 loc) · 3.91 KB
/
project_lc.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
## fcbb project
```{r message=F}
#preprocess
library(tidyverse)
library(magrittr)
dat = read.csv('C:/Users/lcqi/OneDrive/Desktop/Courses/foundations_of_computational_biology_and_bioinformatics/project/updated_encoding_BLCA.csv',header = 1,row.names = 1)
b = apply(dat,1,sum)
a = length(dat[1,])-b
rawdat = as.data.frame(cbind(a,b) %>% `colnames<-`(c('0','1')))
```
```{r}
#mutually co occurrence test
combinations = combn(1:length(dat[,1]),2)
fisher_results = c()
chisq_results = c()
for (i in 1:length(combinations[1,])){
id = combinations[,i]
ts = rawdat[id,]
if(fisher.test(ts)$p == 1){
fisher_results = rbind(fisher_results,c(row.names(ts),fisher.test(ts)$p))
}
if(chisq.test(ts,correct = FALSE)$p.value == 1){
chisq_results=rbind(chisq_results,c(row.names(ts),chisq.test(ts,correct = FALSE)$p.value))
}
}
# binomial_test_co_result = c()
# for(i in 1:length(combinations[1,])){
# id = combinations[,i]
# mtx = dat[id,]
# ts = apply(mtx,2,function(r){
# if(r[1]==r[2]&r[1]==1){return(1)}
# else if(sum(r[1]+r[2])==1){return(0)}
# })%>%unlist()
# # # bootstrap
# # ts = sample(ts,100,replace = T)
# n1 = sum(ts)
# n2 = length(ts)
# if(pbinom(n1-1,n2,0.5,lower.tail = F)<1){binomial_test_co_result = rbind(binomial_test_co_result,c(rownames(mtx),pbinom(n1,n2,0.5,lower.tail = F)))}
# }
# chisq_test_co = function(mtx){
# # expected = matrix(c(411,0,0,411),2,byrow = T)
# TS = (mtx[1,1]-mtx[1,1])^2/mtx[1,1] + (mtx[1,2]-mtx[1,2])^2/(mtx[1,2]) +
# (mtx[2,1]-mtx[1,1])^2/mtx[1,1] +(mtx[2,2]-mtx[1,2])^2/mtx[1,2]
# return(pchisq(TS,1,lower.tail = FALSE))
# }
#
# chisq_test_co_results=c()
# for (i in 1:length(combinations[1,])){
# id = combinations[,i]
# ts = rawdat[id,]
# # exclusive_results=c(exclusive_results,chisq_test_exclusive(ts))
# if(chisq_test_co(ts)<0.05){
# chisq_test_co_results=rbind(chisq_test_co_results,c(row.names(ts),chisq_test_co(ts)))
# }
# # exclusive_results[as.numeric(exclusive_results[,3])>1e-100,]
# }
# #mutually exclusive test
# chisq_test_exclusive = function(mtx){
# # expected = matrix(c(411,0,0,411),2,byrow = T)
# TS = (mtx[1,1]-mtx[1,1])^2/mtx[1,1] + (mtx[1,2]-mtx[1,2])^2/(mtx[1,2]) +
# (mtx[2,1]-(411-mtx[1,1]))^2/(411-mtx[1,1]) +(mtx[2,2]-mtx[1,1])^2/mtx[1,1]
# return(pchisq(TS,1,lower.tail = FALSE))
# }
#
# exclusive_results=c()
# for (i in 1:length(combinations[1,])){
# id = combinations[,i]
# ts = rawdat[id,]
# # exclusive_results=c(exclusive_results,chisq_test_exclusive(ts))
# if(chisq_test_exclusive(ts)>0){
# exclusive_results=rbind(exclusive_results,c(row.names(ts),chisq_test_exclusive(ts)))
# }
# # exclusive_results[as.numeric(exclusive_results[,3])>1e-100,]
# }
```
```{r message=F}
#heatmap
#fisher
library(data.table)
fisher_names = unique(c(fisher_results[,1],fisher_results[,2]))
fisher_map = melt(setDT(
dat[rownames(dat) %in% fisher_names,],
keep.rownames = T),'rn')
ggplot( fisher_map, aes(x = variable,y = factor(rn,levels = unique(rn))) )+
geom_tile(aes(fill = value))+
scale_fill_gradient(low="grey90", high="red") +
labs(x= 'Tumors',y = 'Genes')+
theme(
# axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
# axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank()
)
#chi
chisq_names = unique(c(chisq_results[,1],chisq_results[,2]))
chisq_map = melt(setDT(
dat[rownames(dat) %in% chisq_names,],
keep.rownames = T),'rn')
ggplot( chisq_map, aes(x = variable,y = factor(rn,levels = unique(rn))) )+
geom_tile(aes(fill = value))+
scale_fill_gradient(low="grey90", high="red") +
labs(x= 'Tumors',y = 'Genes')+
theme(
# axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
# axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank()
)
```