from typing import *
from typing import List, Tuple
from itertools import combinations
def loadDataSet():
return [{1, 2, 4, 5}, {2, 3, 5}, {1, 2, 3, 5}, {2, 5}]
def loadCharDataSet():
return [{ord(x) for x in item} for item in
[
list("abde"),
list("bcd"),
list("abde"),
list("acde"),
list("bcde"),
list("bde"),
list("cd"),
list("abc"),
list("ade"),
list("bd"),
]
]
def get_k_item(data: List[Set[int]], k: int) -> List[Set[int]]:
"""
找出所有的k项集
"""
single_element = set([item for items in data for item in items])
return [
set(item) for item in combinations(single_element, k)
]
def get_one_support_rate(dataset: List[Set[int]], item: Set[int]):
return sum([1 for dataset_item in dataset if len(item & dataset_item) == len(item)]) / len(dataset)
def get_support_rate(dataset: List[Set[int]], data: List[Set[int]]) -> List[Tuple[tuple, float]]:
"""
计算给定k项集的支持度
"""
return [
(
tuple(item),
get_one_support_rate(dataset, item)
)
for item in data
]
def get_filter_items(data: List[Tuple[tuple, float]], rate: float) -> List[Set[int]]:
"""
通过支持度,筛选给定的k项集
"""
return [set(item[0]) for item in data if item[1] >= rate]
def get_item_confidence(items_after):
result = []
for max_feq_item in items_after:
n = len(max_feq_item)
all_subsets = [list(combinations(max_feq_item, k)) for k in range(1, n)]
all_subsets = list(set(item) for subset in all_subsets for item in subset)
li = []
for subset in all_subsets:
complement = set(max_feq_item) - subset
li.append([subset, complement])
for item in li:
rate = get_one_support_rate(dataset, item[0] | item[1]) / get_one_support_rate(dataset, item[0])
item.append(rate)
result.extend(li)
return result
def get_filter_relation(data: List[List[Union[dict, dict, float]]], rate: float, is_log=True) -> List[
List[Union[dict, dict]]]:
"""
通过支持度,筛选给定的k项集
"""
relations = [[item[0], item[1]] for item in data if item[2] >= rate]
if is_log:
for relation in relations:
print(f"{relation[0]} -> {relation[1]}")
return relations
dataset = loadDataSet()
print(f"dataset: {dataset}")
k = 0
MIN_SUPPORT = 0.5
MIN_CONFIDENCE = 0.8
print(f"最小支持度: {MIN_SUPPORT}")
print(f"最小置信度: {MIN_CONFIDENCE}")
items_after = dataset # 初始化为数据集
while True:
k += 1
item_or_not = get_k_item(items_after, k)
if len(item_or_not) == 0:
print("结束")
break
items = item_or_not
print(f"候选{k}项集: {items}")
items_pre = get_support_rate(dataset, items)
print(f"候选{k}项集的支持度: {items_pre}")
items_after_or_not = get_filter_items(items_pre, rate=MIN_SUPPORT)
if len(items_after_or_not) == 0:
print("结束")
break
items_after = items_after_or_not
print(f"频繁{k}项集: {items_after}")
relations = get_item_confidence(items_after)
print(f"关联规则及置信度: {relations}")
print(f"筛选后的关联规则: ")
get_filter_relation(relations, rate=MIN_CONFIDENCE, is_log=True)
output:
dataset: [{1, 2, 4, 5}, {2, 3, 5}, {1, 2, 3, 5}, {2, 5}]
最小支持度: 0.5
最小置信度: 0.8
候选1项集: [{1}, {2}, {3}, {4}, {5}]
候选1项集的支持度: [((1,), 0.5), ((2,), 1.0), ((3,), 0.5), ((4,), 0.25), ((5,), 1.0)]
频繁1项集: [{1}, {2}, {3}, {5}]
关联规则及置信度: []
筛选后的关联规则:
候选2项集: [{1, 2}, {1, 3}, {1, 5}, {2, 3}, {2, 5}, {3, 5}]
候选2项集的支持度: [((1, 2), 0.5), ((1, 3), 0.25), ((1, 5), 0.5), ((2, 3), 0.5), ((2, 5), 1.0), ((3, 5), 0.5)]
频繁2项集: [{1, 2}, {1, 5}, {2, 3}, {2, 5}, {3, 5}]
关联规则及置信度: [[{1}, {2}, 1.0], [{2}, {1}, 0.5], [{1}, {5}, 1.0], [{5}, {1}, 0.5], [{2}, {3}, 0.5], [{3}, {2}, 1.0], [{2}, {5}, 1.0], [{5}, {2}, 1.0], [{3}, {5}, 1.0], [{5}, {3}, 0.5]]
筛选后的关联规则:
{1} -> {2}
{1} -> {5}
{3} -> {2}
{2} -> {5}
{5} -> {2}
{3} -> {5}
候选3项集: [{1, 2, 3}, {1, 2, 5}, {1, 3, 5}, {2, 3, 5}]
候选3项集的支持度: [((1, 2, 3), 0.25), ((1, 2, 5), 0.5), ((1, 3, 5), 0.25), ((2, 3, 5), 0.5)]
频繁3项集: [{1, 2, 5}, {2, 3, 5}]
关联规则及置信度: [[{1}, {2, 5}, 1.0], [{2}, {1, 5}, 0.5], [{5}, {1, 2}, 0.5], [{1, 2}, {5}, 1.0], [{1, 5}, {2}, 1.0], [{2, 5}, {1}, 0.5], [{2}, {3, 5}, 0.5], [{3}, {2, 5}, 1.0], [{5}, {2, 3}, 0.5], [{2, 3}, {5}, 1.0], [{2, 5}, {3}, 0.5], [{3, 5}, {2}, 1.0]]
筛选后的关联规则:
{1} -> {2, 5}
{1, 2} -> {5}
{1, 5} -> {2}
{3} -> {2, 5}
{2, 3} -> {5}
{3, 5} -> {2}
候选4项集: [{1, 2, 3, 5}]
候选4项集的支持度: [((1, 2, 3, 5), 0.25)]
结束
标签:1.0,python,items,apriori,0.5,项集,item,算法,dataset
From: https://www.cnblogs.com/aminor/p/17126214.html