We need 'arules' and 'arulesViz' packages for this test, you may need to install them.
> library(arules)
> library(arulesViz)
Association rule model works with transaction data. An example of transaction data can be customers shopping history. I prefer to use simulated data rather than real data. Because the first thing in data analysis is to understand target data structure and it is content. For learning purpose, I think it is better to use a simple dataset. Once we know the model then we can easily apply it in real data sets.Here, we generate random transaction data of an imaginary grocery. First, we create a data frame and convert it to transaction type.
n=500 # transaction number
goods <- c("apple", "melon", "banana", "strawberry", "tomato",
"cabbage", "carrot") # items in transaction
trans <- data.frame() # data frame to collect data
Randomly creating data and collecting it into trans data frame.
for(i in 1:n)
{
count <- sample(1:3, 1) # item count from 1 to 3 items
selected <- sample(goods, count)
tran <- data.frame(items = paste(selected, collapse = ","), tid = i)
trans <- rbind(trans, tran)
if(i %% 2 == 1)
{
add_product <- sample(goods, 1)
if(!add_product %in% selected)
{
tran <- data.frame(items = add_product, tid = i)
trans <- rbind(trans, tran)
}
}
}
Checking data in trans data frame.
> head(trans, 10) items tid 1 tomato 1 2 cabbage 1 3 tomato 2 4 strawberry,apple,tomato 3 5 melon 4 6 banana,carrot,apple 5 7 strawberry 5 8 apple 6 9 tomato 7 10 melon 7
Next, we need to convert a generated data frame into the transaction data type.
> grocery_trans <- as(split(trans[, "items"], trans[, "tid"]), "transactions")
> grocery_trans transactions in sparse format with 500 transactions (rows) and 170 items (columns)
To check the transaction data content, we use an inspect() command.
> inspect(grocery_trans)
items transactionID [1] {cabbage,tomato,banana} 1 [2] {melon,cabbage,banana} 2 [3] {banana,melon} 3 [4] {melon} 4 [5] {banana,carrot,tomato} 5 [6] {tomato,melon} 6
Mining the rules
> rules_1 <- apriori(grocery_trans, parameter = list(supp = 0.001, conf = 0.1)) Apriori Parameter specification: confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext 0.1 0.1 1 none FALSE TRUE 5 0.001 1 10 rules FALSE Algorithmic control: filter tree heap memopt load sort verbose 0.1 TRUE TRUE FALSE TRUE 2 TRUE Absolute minimum support count: 0 set item appearances ...[0 item(s)] done [0.00s]. set transactions ...[158 item(s), 500 transaction(s)] done [0.00s]. sorting and recoding items ... [158 item(s)] done [0.00s]. creating transaction tree ... done [0.00s]. checking subsets of size 1 2 done [0.00s]. writing ... [105 rule(s)] done [0.00s]. creating S4 object ... done [0.00s].
> rules_1 set of 105 rules
> rules_1 <- sort(rules_1, decreasing = TRUE, by = "confidence") > inspect(rules_1) lhs rhs support confidence lift count [1] {apple,strawberry,banana} => {melon} 0.002 1.0000000 10.4166667 1 [2] {banana,apple,melon} => {cabbage} 0.002 1.0000000 11.9047619 1 [3] {tomato,strawberry,cabbage} => {carrot} 0.002 1.0000000 8.1967213 1 [4] {apple,banana,melon} => {carrot} 0.002 1.0000000 8.1967213 1 [5] {carrot,apple,melon} => {banana} 0.002 1.0000000 10.0000000 1 [6] {strawberry,apple,banana} => {cabbage} 0.002 1.0000000 11.9047619 1 [7] {carrot,banana} => {strawberry} 0.002 1.0000000 9.2592593 1 [8] {tomato,apple,cabbage} => {carrot} 0.002 1.0000000 8.1967213 1 [9] {melon,cabbage,carrot} => {apple} 0.002 1.0000000 10.2040816 1 [10] {carrot,strawberry,tomato} => {cabbage} 0.002 1.0000000 11.9047619 1 [11] {apple,tomato,melon} => {banana} 0.002 1.0000000 10.0000000 1 [12] {cabbage,tomato,apple} => {melon} 0.002 1.0000000 10.4166667 1 [13] {tomato,apple,strawberry} => {banana} 0.002 1.0000000 10.0000000 1 [14] {banana,tomato,cabbage} => {apple} 0.002 1.0000000 10.2040816 1 [15] {carrot,apple} => {tomato} 0.002 1.0000000 9.8039216 1 [16] {melon,banana,tomato} => {carrot} 0.002 1.0000000 8.1967213 1 [17] {carrot,melon,strawberry} => {banana} 0.002 1.0000000 10.0000000 1 [18] {banana,tomato,carrot} => {cabbage} 0.002 1.0000000 11.9047619 1 [19] {carrot,tomato,strawberry} => {cabbage} 0.002 1.0000000 11.9047619 1 [20] {banana,cabbage,tomato} => {carrot} 0.002 1.0000000 8.1967213 1 [21] {apple,tomato,carrot} => {banana} 0.002 1.0000000 10.0000000 1 [22] {banana,cabbage,apple} => {carrot} 0.002 1.0000000 8.1967213 1 [23] {strawberry,cabbage,carrot} => {apple} 0.002 1.0000000 10.2040816 1 [24] {tomato,banana,melon} => {cabbage} 0.002 1.0000000 11.9047619 1 [25] {cabbage,banana} => {strawberry} 0.002 1.0000000 9.2592593 1 [26] {apple,strawberry,melon} => {carrot} 0.002 1.0000000 8.1967213 1 [27] {banana,carrot,apple} => {tomato} 0.002 1.0000000 9.8039216 1 [28] {cabbage,banana,carrot} => {melon} 0.002 1.0000000 10.4166667 1 [29] {cabbage,apple,carrot} => {strawberry} 0.002 1.0000000 9.2592593 1 [30] {strawberry,carrot} => {banana} 0.004 0.6666667 6.6666667 2 [31] {melon,apple,carrot} => {tomato} 0.004 0.6666667 6.5359477 2 [32] {strawberry,cabbage,banana} => {melon} 0.004 0.6666667 6.9444444 2 [33] {melon,banana} => {carrot} 0.004 0.6666667 5.4644809 2 [34] {melon,cabbage,tomato} => {strawberry} 0.002 0.5000000 4.6296296 1 [35] {strawberry,banana,carrot} => {cabbage} 0.002 0.5000000 5.9523810 1
..............
We get first rhs item from above list to check the rules for that item. Here, I will get it dynamically, but if you know the target item, it can be written just rhs="melon" in a parameter of apriori function.
> rhs <- inspect(rules_1@rhs[1]) items [1] {melon} > rhs_item <- gsub("\\{","", as.character(rhs$items[1])) > rhs_item <- gsub("\\}","", rhs_item)
> rhs_item [1] "melon"
We build rule for our rhs_item
> rules_2 <- apriori(data = grocery_trans, parameter = list(supp = 0.001, conf = 0.1), + appearance = list(default = "lhs", rhs = rhs_item))
Sorting by "confidence" and inspecting the rule
> rules_2<-sort(rules_2, decreasing = TRUE, by = "confidence") > inspect(rules_2) lhs rhs support confidence lift count [1] {apple,strawberry,banana} => {melon} 0.002 1.0000000 10.416667 1 [2] {cabbage,tomato,apple} => {melon} 0.002 1.0000000 10.416667 1 [3] {cabbage,banana,carrot} => {melon} 0.002 1.0000000 10.416667 1 [4] {strawberry,cabbage,banana} => {melon} 0.004 0.6666667 6.944444 2 [5] {carrot,banana,cabbage} => {melon} 0.002 0.5000000 5.208333 1 [6] {apple,strawberry,tomato} => {melon} 0.002 0.5000000 5.208333 1 [7] {banana,strawberry} => {melon} 0.004 0.3333333 3.472222 2 [8] {banana,cabbage,strawberry} => {melon} 0.002 0.2500000 2.604167 1 [9] {tomato,cabbage} => {melon} 0.002 0.2500000 2.604167 1 [10] {strawberry,tomato} => {melon} 0.002 0.2000000 2.083333 1 [11] {banana} => {melon} 0.010 0.1000000 1.041667 5
Finally, we plot a top 5 rules from the rules_2
> plot(rules_2[1:5], method = "graph")
Thank you for reading! Leave your comment about the post below.
No comments:
Post a Comment