-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCHIS.R
71 lines (60 loc) · 2.21 KB
/
CHIS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#Download CHISAdult 2015 Data set from UCLS site
# Load SAS data into R
library(Hmisc)
mydata <- sasxport.get("ADULT.xpt")
mydata2 <- sasxport.get("ADULTf.xpt")
#NOte: DAta is not the same is in DataCamp Course!!
# Load all packages
library(ggplot2)
library(reshape2)
library(dplyr)
library(ggthemes)
# Script generalized into a function
mosaicGG <- function(data, X, FILL) {
# Proportions in raw data
DF <- as.data.frame.matrix(table(data[[X]], data[[FILL]]))
DF$groupSum <- rowSums(DF)
DF$xmax <- cumsum(DF$groupSum)
DF$xmin <- DF$xmax - DF$groupSum
DF$X <- row.names(DF)
DF$groupSum <- NULL
DF_melted <- melt(DF, id = c("X", "xmin", "xmax"), variable.name = "FILL")
library(dplyr)
DF_melted <- DF_melted %>%
group_by(X) %>%
mutate(ymax = cumsum(value/sum(value)),
ymin = ymax - value/sum(value))
# Chi-sq test
results <- chisq.test(table(data[[FILL]], data[[X]])) # fill and then x
resid <- melt(results$residuals)
names(resid) <- c("FILL", "X", "residual")
# Merge data
DF_all <- merge(DF_melted, resid)
# Positions for labels
DF_all$xtext <- DF_all$xmin + (DF_all$xmax - DF_all$xmin)/2
index <- DF_all$xmax == max(DF_all$xmax)
DF_all$ytext <- DF_all$ymin[index] + (DF_all$ymax[index] - DF_all$ymin[index])/2
# plot:
g <- ggplot(DF_all, aes(ymin = ymin, ymax = ymax, xmin = xmin,
xmax = xmax, fill = residual)) +
geom_rect(col = "white") +
geom_text(aes(x = xtext, label = X),
y = 1, size = 3, angle = 90, hjust = 1, show.legend = FALSE) +
geom_text(aes(x = max(xmax), y = ytext, label = FILL),
size = 3, hjust = 1, show.legend = FALSE) +
scale_fill_gradient2("Residuals") +
scale_x_continuous("Individuals", expand = c(0,0)) +
scale_y_continuous("Proportion", expand = c(0,0)) +
theme_tufte() +
theme(legend.position = "bottom")
print(g)
}
# BMI described by age
mosaicGG(adult, "SRAGE_P","RBMI")
# Poverty described by age
mosaicGG(adult, "SRAGE_P","POVLL")
# mtcars: am described by cyl
mosaicGG(mtcars, "cyl","am")
# Vocab: vocabulary described by education
library(car)
mosaicGG(Vocab, "education","vocabulary")