-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathapriori.py
83 lines (74 loc) · 3.14 KB
/
apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class Apriori:
def __init__(self, min_support=0.618, min_conf=0.618):
self.min_conf = min_conf
self.min_support = min_support
def find_freqsets(self, T):
condidates = self._init_condidates(T)
freqsets, support_counter = self._select_freq_sets(
condidates, T)
freqsets_lst = [freqsets]
k = 2
while len(freqsets_lst[k - 2]) > 0:
condidates = self._generate_condidates(freqsets_lst[k - 2], k)
freqsets, counter = self._select_freq_sets(
condidates, T)
freqsets_lst.append(freqsets)
support_counter.update(counter)
k += 1
return freqsets_lst, support_counter
def generate_rules(self, freqsets_lst, support_counter):
rules = []
for i in xrange(1, len(freqsets_lst)):
for freqset in freqsets_lst[i]:
conseq_lst = [frozenset([item]) for item in freqset]
len_conseq = 1
while True:
rule_conf_lst = self._cal_rule_conf(
freqset, conseq_lst, support_counter)
conseq_lst = []
for rule_conf in rule_conf_lst:
if rule_conf[1] > self.min_conf:
rules.append(rule_conf)
conseq_lst.append(rule_conf[0][1])
if len_conseq == i or len(conseq_lst) <= 1:
break
len_conseq += 1
conseq_lst = self._generate_condidates(
conseq_lst, len_conseq)
return rules
def _init_condidates(self, T):
condidates = set()
for t in T:
for i in t:
if i not in condidates:
condidates.add(i)
return [frozenset([item]) for item in condidates]
def _select_freq_sets(self, condidates, T):
support_counter = {}
for t in T:
for c in condidates:
if c.issubset(t):
support_counter.setdefault(c, 0)
support_counter[c] += 1
freqsets = [
c for c in condidates if c in support_counter and float(support_counter[c]) / len(T) >= self.min_support]
support_counter = {c: support_counter[c] for c in freqsets}
return freqsets, support_counter
def _generate_condidates(self, freqsets, k):
condidates = []
for i in xrange(len(freqsets)):
for j in xrange(i + 1, len(freqsets)):
freqset_a = list(freqsets[i])[:k - 2]
freqset_a.sort()
freqset_b = list(freqsets[j])[:k - 2]
freqset_b.sort()
if freqset_a == freqset_b:
condidates.append(freqsets[i] | freqsets[j])
return condidates
def _cal_rule_conf(self, freqset, conseq_lst, support_counter):
rule_conf_lst = []
for conseq in conseq_lst:
conf = float(support_counter[freqset]) / support_counter[
freqset - conseq]
rule_conf_lst.append(((freqset - conseq, conseq), conf))
return rule_conf_lst