Everyday R code (11)-Association rule/Market basket

##Association rule or Market basket
library(“arules”)
removewords=c(names(termFrequency)[which(termFrequency==1)],’en’,’f’,’nicht’,’es’,’luck’,’giving’,’thought’,’value’,’indeed’,’almost’,’apparently’,’exist’,’d’,’net’,’ture’,’dans’,’des’,’et’,’ne’,’une’,’le’)

VerbList=sapply(DATA_.input, function(x){strsplit(x[[1]],’ ‘)})
VerbList=sapply(VerbList, function(x){
Idx=which(x==”” | x %in% removewords)
if(length(Idx)>0)x=x[-Idx]
else x=x
x=unique(x)}
)
VerbList=sapply(VerbList,function(x){
paste(x,collapse=’,’)})

temp=which(VerbList==”)
VerbList=VerbList[-temp]
head(VerbList)
write(VerbList,file=’C:\\Users\\folder\\Desktop\\VerbList_a’)
verbWordList<- read.transactions(“C:\\Users\\folder\\Desktop\\VerbList_a”,
format=”basket”,sep=”,”)

rules <- apriori(verbWordList, parameter = list(support = 0.01,confidence = 0.01,minlen=2))

rules.sorted <- sort(rules, by=”support”)
inspect(rules.sorted)
#inspect(rules.sorted[1:5])

if(length(rules.sorted)>0){
rules.table=list(Keywords=lapply(1:length(rules.sorted), function(i){
wlist=do.call(‘c’,c(LIST(lhs(rules.sorted[i])),LIST(rhs(rules.sorted[i]))))
}),
quality=quality(rules.sorted))
}

#changed to rules.sorted, up part

ruleReduction <- function(ruleList,support,confidence) {
rlength <- length(ruleList)
keep <- rep(TRUE,rlength)
if (rlength > 1) {
for (i in 1:(rlength-1)) {
for (j in (i+1):rlength) {
if (support[i] == support[j]) {
if (length(ruleList[[i]]) < length(ruleList[[j]])) {
if (all(ruleList[[i]] %in% ruleList[[j]])) keep[i] <- FALSE
} else if (length(ruleList[[i]]) > length(ruleList[[j]])) {
if (all(ruleList[[j]] %in% ruleList[[i]])) keep[j] <- FALSE
} else if (length(ruleList[[i]]) == length(ruleList[[j]])) {
if (all(ruleList[[i]] %in% ruleList[[j]])) {
if (confidence[i] >= confidence[j]) keep[j] <- FALSE
else keep[i] <- FALSE
}
}
}
}
}
}
return(keep)
}
keep <- ruleReduction(rules.table$Keywords,
rules.table$quality[,”support”],
rules.table$quality[,”confidence”])

rule.rtn=data.frame(Keywords=sapply(rules.table$Keywords[which(keep)],
function(x){paste(x, collapse=’ ‘)}),
rules.table$quality[which(keep),],stringsAsFactors=FALSE)

##– quality(rules) shows the support, confidence and lift

write.csv(rule.rtn,”C:\\Users\\folder\\Desktop\\boxcomments5.csv”)

library(xlsx)
write.xlsx(rule.rtn,file=’C:\\Users\\folder\\Desktop\\2089ruleMA.xlsx’,sheetName=’Rules’,row.names=FALSE,append=T)

##–Extract Descriptions # need to change the file name to the original file name every time.

VerbList=sapply(DATA_.input, function(x){strsplit(x[[1]],’ ‘)})
VerbList=sapply(VerbList, function(x){
Idx=which(x==””)
if(length(Idx)>0)x=x[-Idx]
else x=x
x=unique(x)}
)

DATA_.inputCopy=DATA_.input
dData <- list(IssueWordList=VerbList,
IssueDescription=DATA_.comments)

library(rJava)

source(‘C:\\Users\\folder\\Desktop\\Rcodefiles\\rule2Description.R’)

idx.byRule <- idxByRules(rule.rtn$Keywords,dData$IssueWordList)

ruleData=rule.rtn
descriptionData=dData
idname=NULL
append=TRUE
rule2Desc <-
do.call(“rbind”,lapply(1:length(ruleData$Keywords),function(i) {
if (is.null(idname)) {
ans <- data.frame(ruleData[i,],
description=descriptionData$IssueDescription[idx.byRule[[i]]])
} else {
ans <- data.frame(ruleData[i,],
ID=descriptionData[[idname]][idx.byRule[[i]]],
description=descriptionData$IssueDescription[idx.byRule[[i]]])
}
ans$IncidentCount <- length(idx.byRule[[i]])
return(ans)
}))

library(xlsx)

write.xlsx(rule2Desc,
file=’C:\\Users\\folder\\Desktop\\rulesa.xlsx’,
sheetName=”Description_short”,
col.names=TRUE,row.names=FALSE,
append=T)

Leave a Comment