forked from pjsio/ME114
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapriori_example.R
38 lines (31 loc) · 1.3 KB
/
apriori_example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
## Association Rules example
## book data can be found from https://github.com/WinVector/zmPDSwR/tree/master/Bookdata
# load in book purchase transactions
require(arules)
bookbaskets <- read.transactions("bookdata.tsv", format = "single", sep = "\t", cols = c("userid", "title"), rm.duplicates = TRUE)
bookbaskets
# summarize basket sizes
basketSizes <- size(bookbaskets)
summary(basketSizes)
# plot the distribution of basket sizes (log10 scale)
require(ggplot2)
ggplot(data.frame(count = basketSizes)) +
geom_density(aes(x = count), binwidth = 1) +
scale_x_log10()
# which books are people reading?
bookFreq <- itemFrequency(bookbaskets)
bookCount <- (bookFreq / sum(bookFreq)) * sum(basketSizes)
orderedBooks <- sort(bookCount, decreasing = TRUE)
head(orderedBooks, 10)
# restrict dataset to two-book transactions
dim(bookbaskets)
bookbaskets_use <- bookbaskets[basketSizes > 1]
dim(bookbaskets_use)
# mine the rules using the apriori algorithm
rules <- apriori(bookbaskets_use,
parameter = list(support = 0.002, confidence = 0.75))
summary(rules)
inspect(head((sort(rules, by = "confidence")), n = 5))
## Note: "lift" compares the frequency of the observed pattern with how often we
## would expect to observe the pattern by chance. Larger "lift" is less
## likely to occur by chance.