首页 > 其他分享 >第八章

第八章

时间:2023-03-19 22:13:59浏览次数:41  
标签:第八章 freqSet tt Lk len supportData sup

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

from numpy import *
 
def loadDataSet():
    return [['a', 'c', 'e'], ['b', 'd'], ['b', 'c'], ['a', 'b', 'c', 'd'], ['a', 'b'], ['b', 'c'], ['a', 'b'],
            ['a', 'b', 'c', 'e'], ['a', 'b', 'c'], ['a', 'c', 'e']]
 
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
    C1.sort()

    return list(map(frozenset, C1))     
    

def scanD(D, Ck, minSupport):
    ssCnt = {}
    for tid in D:  
        for can in Ck:  
            if can.issubset(tid): 
                if not can in ssCnt:
                    ssCnt[can] = 1 
                else:
                    ssCnt[can] += 1  
    numItems = float(len(D))  
    retList = []  
    supportData = {}  
    for key in ssCnt:
        support = ssCnt[key] / numItems  
        if support >= minSupport:
            retList.insert(0, key)  
            supportData[key] = support  
    return retList, supportData
 
def calSupport(D, Ck, min_support):
    dict_sup = {}
    for i in D:
        for j in Ck:
            if j.issubset(i):
                if not j in dict_sup:
                    dict_sup[j] = 1
                else:
                    dict_sup[j] += 1
    sumCount = float(len(D))
    supportData = {}
    relist = []
    for i in dict_sup:
        temp_sup = dict_sup[i] / sumCount
        if temp_sup >= min_support:
            relist.append(i)
            
            supportData[i] = temp_sup
    return relist, supportData
 

def aprioriGen(Lk, k):
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i + 1, lenLk):  
            L1 = list(Lk[i])[:k - 2]
            L2 = list(Lk[j])[:k - 2]
            L1.sort()
            L2.sort()
            if L1 == L2:  
                
                a = Lk[i] | Lk[j] 
                a1 = list(a)
                b = []
               
                for q in range(len(a1)):
                    t = [a1[q]]
                    tt = frozenset(set(a1) - set(t))
                    b.append(tt)
                t = 0
                for w in b:
                    
                    if w in Lk:
                        t += 1
                if t == len(b):
                    retList.append(b[0] | b[1])
    return retList

def apriori(dataSet, minSupport=0.2):

    C1 = createC1(dataSet)
    D = list(map(set, dataSet))  
    L1, supportData = calSupport(D, C1, minSupport)
    L = [L1]  
    k = 2
    while (len(L[k - 2]) > 0):  
        Ck = aprioriGen(L[k - 2], k)
        Lk, supK = scanD(D, Ck, minSupport) 
        supportData.update(supK)  
        L.append(Lk)  
        k += 1
    del L[-1]  
    return L, supportData  


def getSubset(fromList, toList):
    for i in range(len(fromList)):
        t = [fromList[i]]
        tt = frozenset(set(fromList) - set(t))
        if not tt in toList:
            toList.append(tt)
            tt = list(tt)
            if len(tt) > 1:
                getSubset(tt, toList)
def calcConf(freqSet, H, supportData, ruleList, minConf=0.7):
    for conseq in H: 
        conf = supportData[freqSet] / supportData[freqSet - conseq]  
        
        lift = supportData[freqSet] / (supportData[conseq] * supportData[freqSet - conseq])
 
        if conf >= minConf and lift > 1:
            print(freqSet - conseq, '-->', conseq, '支持度', round(supportData[freqSet], 6), '置信度:', round(conf, 6),
                  'lift值为:', round(lift, 6))
            ruleList.append((freqSet - conseq, conseq, conf))
 
# 生成规则
def gen_rule(L, supportData, minConf = 0.7):
    bigRuleList = []
    for i in range(1, len(L)): 
        for freqSet in L[i]:
           
            H1 = list(freqSet)
            all_subset = []
            getSubset(H1, all_subset)  
            calcConf(freqSet, all_subset, supportData, bigRuleList, minConf)
    return bigRuleList
 
if __name__ == '__main__':
    dataSet = data_translation
    L, supportData = apriori(dataSet, minSupport = 0.02)
    rule = gen_rule(L, supportData, minConf = 0.35)

 

 

 

 

 

 

 

标签:第八章,freqSet,tt,Lk,len,supportData,sup
From: https://www.cnblogs.com/cl3109/p/17234513.html

相关文章

  • 第八章随笔
    第一部分——商品零售购物篮分析代码一:查看数据特征importpandasaspdimportnumpyasnpimportseabornassnsinputfile='D:\JupyterLab-Portable-3.1.0-3.9\新建......
  • 第七第八章
    第7章安全信道7.1安全信道的性质大致定义:A与B间安全的连接7.1.1角色双向连接,同时有不对称(区别)存在攻击者,可以读取并操纵内容存储可看作向未来发送数据7.1.2密钥......
  • 第八章 商品零售购物篮分析
     #代码8-1查看数据特征importnumpyasnpimportpandasaspdinputfile="E:\\anaconda3\\jupyterFile\\数据分析\\data\\GoodsOrder.csv"#输入的数据文件dat......
  • python数据分析与挖掘实战第八章
    #8-1importnumpyasnpimportpandasaspdinputfile='data4/GoodsOrder.csv'data=pd.read_csv(inputfile,encoding='gbk')data.info()data=data['id']de......
  • 第八章 多级反馈队列调度
    1.限制作业长度并关闭IOpython3mlfq.py-j2-n2-cpython3mlfq.py--jlist0,10,0:0,5,0-n2-c2.实现书上示例python3mlfq.py-l0,200,0-cpython3mlfq.py-......
  • 第八章假设检验
    ......
  • 《程序是怎样跑起来的》第八章读后感
    在第八章中主要讲述了源文件与可执行文件及其转换,内容有下:计算机只能运行本地代码,程序员键入的源代码,计算机无法直接识别运行。编译器负责转换源代码,每个编写源代码的编程......
  • 《程序是怎样跑起来的》第八章
        这章讲了从源代码到可执行代码,CPU能运行的只是转化成本地代码的程序内容,用任何编程语言编写的源代码最后都要翻译成本地代码。    用某种编程语言编写......
  • 第八章:仙人归,刺晶成
    岳阳还在震惊于这箭矢的威力,身后忽传来一阵掌声。“不错,一介凡人,又没有神之眼,单凭借自身武艺便可击杀一只螭,着实难得。”岳阳回头一看,并没有人影,却看见了一只大鸟——哦不,......
  • 第八章 从源文件到可执行文件
        机器运行的是本地代码(NativeCode)用某种编程语言编写出来的程序是源代码,保存源代码的文件是源文件。源文件只是文本文件,并不能直接运行,因为CPU只能运行本地代......