首页 > 其他分享 >商品分析(关联规则)

商品分析(关联规则)

时间:2023-03-19 19:56:57浏览次数:45  
标签:sort plt 0.03 supportData 关联 商品 规则 csv data

  1 # -*- coding: utf-8 -*-
  2 """
  3 Created on Wed Feb 22 10:56:39 2023
  4 
  5 @author: admin
  6 """
  7 
  8 #代码8-1
  9 import numpy as np
 10 import pandas as pd
 11 
 12 inputfile = 'D:/anaconda/data/GoodsOrder.csv'
 13 data = pd.read_csv(inputfile,encoding='gbk')
 14 data.info()
 15 
 16 data = data['id']
 17 description = [data.count(),data.min(),data.max()]
 18 description = pd.DataFrame(description,index=['Count','Min','Max']).T
 19 print('描述性统计结果:\n',np.round(description))
 20 
 21 #代码8-2
 22 import pandas as pd
 23 inputfile = 'D:/anaconda/data/GoodsOrder.csv'
 24 data = pd.read_csv(inputfile,encoding='gbk')
 25 group = data.groupby(['Goods']).count().reset_index()
 26 sorted = group.sort_values('id',ascending=False)
 27 print('销售排行前10商品的销量:\n',sorted[:10])
 28 
 29 import matplotlib.pyplot as plt
 30 x = sorted[:10]['Goods']
 31 y = sorted[:10]['id']
 32 plt.figure(figsize=(8,4))
 33 plt.barh(x,y)
 34 plt.rcParams['font.sans-serif'] = 'SimHei'
 35 plt.xlabel('销量')
 36 plt.ylabel('商品类别')
 37 plt.title('商品的销量TOP10学号2020310143040')
 38 plt.savefig('D:/anaconda/data/top10.png')
 39 plt.show()
 40 
 41 data_nums = data.shape[0]
 42 for idnex,row in sorted[:10].iterrows():
 43     print(row['Goods'],row['id'],row['id']/data_nums)
 44 
 45 
 46 
 47 #代码8-3
 48 import pandas as pd
 49 inputfile1 = 'D:/anaconda/data/GoodsOrder.csv'
 50 inputfile2 = 'D:/anaconda/data/GoodsTypes.csv'
 51 data = pd.read_csv(inputfile1,encoding='gbk')
 52 types = pd.read_csv(inputfile2,encoding='gbk')
 53 
 54 group = data.groupby(['Goods']).count().reset_index()
 55 sort = group.sort_values('id',ascending=False).reset_index()
 56 data_nums = data.shape[0]
 57 del sort['index']
 58 
 59 sort_links = pd.merge(sort,types)
 60 
 61 sort_link = sort_links.groupby(['Types']).sum().reset_index()
 62 sort_link = sort_link.sort_values('id',ascending=False).reset_index()
 63 del sort_link['index']
 64 
 65 sort_link['count'] = sort_link.apply(lambda line:line['id']/data_nums,axis=1)
 66 sort_link.rename(columns={'count':'percent'},inplace=True)
 67 print('各类别商品的销量及其占比学号2020310143040:\n',sort_link)
 68 outfile1 = 'D:/anaconda/data/percent.csv'
 69 sort_link.to_csv(outfile1,index=False,header=True,encoding='gbk')
 70 
 71 import matplotlib.pyplot as plt
 72 data = sort_link['percent']
 73 labels = sort_link['Types']
 74 plt.figure(figsize=(8,6))
 75 plt.pie(data,labels=labels,autopct='%1.2f%%')
 76 plt.rcParams['font.sans-serif'] = 'SimHei'
 77 plt.title('每类商品销售占比学号2020310143040')
 78 plt.savefig('D:/anaconda/data/percent.png')
 79 plt.show()
 80 
 81 #代码8-4
 82 selected = sort_links.loc[sort_links['Types'] == '非酒精饮料']
 83 child_nums = selected['id'].sum()
 84 selected['child_percent'] = selected.apply(lambda line:line['id']/child_nums,axis=1)
 85 selected.rename(columns={'id':'count'},inplace=True)
 86 print('非酒精饮料内部商品的销量及其占比:\n',selected)
 87 outfile2 = 'D:/anaconda/data/child_percent.csv'
 88 sort_link.to_csv(outfile2,index=False,header=True,encoding='gbk')
 89 
 90 import matplotlib.pyplot as plt
 91 data = selected['child_percent']
 92 labels = selected['Goods']
 93 plt.figure(figsize=(8,6))
 94 explode = (0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.08,0.3,0.1,0.3)
 95 
 96 plt.pie(data,explode=explode,labels=labels,autopct='%1.2f%%',pctdistance=1.1,labeldistance=1.2)
 97 plt.rcParams['font.sans-serif'] = 'SimHei'
 98 plt.title("非酒精饮料内部各商品的销售占比学号2020310143040")
 99 plt.axis('equal')
100 plt.savefig('D:/anaconda/data/child_persent.png')
101 plt.show()
102 
103 
104 import pandas as pd
105 inputfile = 'D:/anaconda/data/GoodsOrder.csv'
106 data = pd.read_csv(inputfile,encoding='gbk')
107 
108 data['Goods'] = data['Goods'].apply(lambda x:','+x)
109 data = data.groupby('Goods').sum().reset_index()
110 data
111 
112 data['Goods'] = data['Goods'].apply(lambda x:[x[1:]])
113 data_list = list(data['Goods'])
114 data_list[:5]
115 
116 data_translation = []
117 for i in data_list:
118     p=i[0].split(',')
119     data_translation.append(p)
120 print('数据转换结果的前5个元素:\n',data_translation[0:5])
121 
122 from numpy import *
123  
124 def loadDataSet():
125     return [['a', 'c', 'e'], ['b', 'd'], ['b', 'c'], ['a', 'b', 'c', 'd'], ['a', 'b'], ['b', 'c'], ['a', 'b'],
126             ['a', 'b', 'c', 'e'], ['a', 'b', 'c'], ['a', 'c', 'e']]
127  
128 def createC1(dataSet):
129     C1 = []
130     for transaction in dataSet:
131         for item in transaction:
132             if not [item] in C1:
133                 C1.append([item])
134     C1.sort()
135     # 映射为frozenset唯一性的,可使用其构造字典
136     return list(map(frozenset, C1))   
137 
138   # 从候选K项集到频繁K项集(支持度计算)
139 def scanD(D, Ck, minSupport):
140     ssCnt = {}
141     for tid in D:   # 遍历数据集
142         for can in Ck:  # 遍历候选项
143             if can.issubset(tid):  # 判断候选项中是否含数据集的各项
144                 if not can in ssCnt:
145                     ssCnt[can] = 1  # 不含设为1
146                 else:
147                     ssCnt[can] += 1  # 有则计数加1
148     numItems = float(len(D))  # 数据集大小
149     retList = []  # L1初始化
150     supportData = {}  # 记录候选项中各个数据的支持度
151     for key in ssCnt:
152         support = ssCnt[key] / numItems  # 计算支持度
153         if support >= minSupport:
154             retList.insert(0, key)  # 满足条件加入L1中
155             supportData[key] = support  
156     return retList, supportData
157 
158 def calSupport(D, Ck, min_support):
159     dict_sup = {}
160     for i in D:
161         for j in Ck:
162             if j.issubset(i):
163                 if not j in dict_sup:
164                     dict_sup[j] = 1
165                 else:
166                     dict_sup[j] += 1
167     sumCount = float(len(D))
168     supportData = {}
169     relist = []
170     for i in dict_sup:
171         temp_sup = dict_sup[i] / sumCount
172         if temp_sup >= min_support:
173             relist.append(i)
174             # 此处可设置返回全部的支持度数据(或者频繁项集的支持度数据)
175             supportData[i] = temp_sup
176     return relist, supportData
177 
178 # 改进剪枝算法
179 def aprioriGen(Lk, k):
180     retList = []
181     lenLk = len(Lk)
182     for i in range(lenLk):
183         for j in range(i + 1, lenLk):  # 两两组合遍历
184             L1 = list(Lk[i])[:k - 2]
185             L2 = list(Lk[j])[:k - 2]
186             L1.sort()
187             L2.sort()
188             if L1 == L2:  # 前k-1项相等,则可相乘,这样可防止重复项出现
189                 # 进行剪枝(a1为k项集中的一个元素,b为它的所有k-1项子集)
190                 a = Lk[i] | Lk[j]  # a为frozenset()集合
191                 a1 = list(a)
192                 b = []
193                 # 遍历取出每一个元素,转换为set,依次从a1中剔除该元素,并加入到b中
194                 for q in range(len(a1)):
195                     t = [a1[q]]
196                     tt = frozenset(set(a1) - set(t))
197                     b.append(tt)
198                 t = 0
199                 for w in b:
200                     # 当b(即所有k-1项子集)都是Lk(频繁的)的子集,则保留,否则删除。
201                     if w in Lk:
202                         t += 1
203                 if t == len(b):
204                     retList.append(b[0] | b[1])
205     return retList
206 
207 def apriori(dataSet, minSupport=0.2):
208 # 前3条语句是对计算查找单个元素中的频繁项集
209     C1 = createC1(dataSet)
210     D = list(map(set, dataSet))  # 使用list()转换为列表
211     L1, supportData = calSupport(D, C1, minSupport)
212     L = [L1]  # 加列表框,使得1项集为一个单独元素
213     k = 2
214     while (len(L[k - 2]) > 0):  # 是否还有候选集
215         Ck = aprioriGen(L[k - 2], k)
216         Lk, supK = scanD(D, Ck, minSupport)  # scan DB to get Lk
217         supportData.update(supK)  # 把supk的键值对添加到supportData里
218         L.append(Lk)  # L最后一个值为空集
219         k += 1
220     del L[-1]  # 删除最后一个空集
221     return L, supportData  # L为频繁项集,为一个列表,1,2,3项集分别为一个元素
222 
223 # 生成集合的所有子集
224 def getSubset(fromList, toList):
225     for i in range(len(fromList)):
226         t = [fromList[i]]
227         tt = frozenset(set(fromList) - set(t))
228         if not tt in toList:
229             toList.append(tt)
230             tt = list(tt)
231             if len(tt) > 1:
232                 getSubset(tt, toList)
233 
234 def calcConf(freqSet, H, supportData, ruleList, minConf=0.7):
235     for conseq in H:  #遍历H中的所有项集并计算它们的可信度值
236         conf = supportData[freqSet] / supportData[freqSet - conseq]  # 可信度计算,结合支持度数据
237         # 提升度lift计算lift = p(a & b) / p(a)*p(b)
238         lift = supportData[freqSet] / (supportData[conseq] * supportData[freqSet - conseq])
239  
240         if conf >= minConf and lift > 1:
241             print(freqSet - conseq, '-->', conseq, '支持度', round(supportData[freqSet], 6), '置信度:', round(conf, 6),
242                   'lift值为:', round(lift, 6))
243             ruleList.append((freqSet - conseq, conseq, conf))
244  
245 # 生成规则
246 def gen_rule(L, supportData, minConf = 0.7):
247     bigRuleList = []
248     for i in range(1, len(L)):  # 从二项集开始计算
249         for freqSet in L[i]:  # freqSet为所有的k项集
250             # 求该三项集的所有非空子集,1项集,2项集,直到k-1项集,用H1表示,为list类型,里面为frozenset类型,
251             H1 = list(freqSet)
252             all_subset = []
253             getSubset(H1, all_subset)  # 生成所有的子集
254             calcConf(freqSet, all_subset, supportData, bigRuleList, minConf)
255     return bigRuleList
256  
257 if __name__ == '__main__':
258     dataSet = data_translation
259     L, supportData = apriori(dataSet, minSupport = 0.02)
260     rule = gen_rule(L, supportData, minConf = 0.35)
261     
262 selected = sort_links.loc[sort_links['Types'] == '西点']
263 child_nums = selected['id'].sum()
264 selected['child_percent'] = selected.apply(lambda line:line['id']/child_nums,axis=1)
265 selected.rename(columns={'id':'count'},inplace=True)
266 print('西点类商品的销量及其占比:\n',selected)
267 outfile3 = 'D:/anaconda/data/child_percent.csv'
268 sort_link.to_csv(outfile3,index=False,header=True,encoding='gbk')
269 
270 import matplotlib.pyplot as plt
271 data = selected['child_percent']
272 labels = selected['Goods']
273 plt.figure(figsize=(8,6))
274 explode = (0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03)
275 
276 plt.pie(data,explode=explode,labels=labels,autopct='%1.2f%%',pctdistance=1.1,labeldistance=1.2)
277 plt.rcParams['font.sans-serif'] = 'SimHei'
278 plt.title("西点类各商品的销售占比学号2020310143040")
279 plt.axis('equal')
280 plt.savefig('D:/anaconda/data/child_persent_西点.png')
281 plt.show()

 

 

 

 

 

 

# -*- coding: utf-8 -*-"""Created on Wed Feb 22 10:56:39 2023
@author: admin"""
#代码8-1import numpy as npimport pandas as pd
inputfile = 'D:/anaconda/data/GoodsOrder.csv'data = pd.read_csv(inputfile,encoding='gbk')data.info()
data = data['id']description = [data.count(),data.min(),data.max()]description = pd.DataFrame(description,index=['Count','Min','Max']).Tprint('描述性统计结果:\n',np.round(description))
#代码8-2import pandas as pdinputfile = 'D:/anaconda/data/GoodsOrder.csv'data = pd.read_csv(inputfile,encoding='gbk')group = data.groupby(['Goods']).count().reset_index()sorted = group.sort_values('id',ascending=False)print('销售排行前10商品的销量:\n',sorted[:10])
import matplotlib.pyplot as pltx = sorted[:10]['Goods']y = sorted[:10]['id']plt.figure(figsize=(8,4))plt.barh(x,y)plt.rcParams['font.sans-serif'] = 'SimHei'plt.xlabel('销量')plt.ylabel('商品类别')plt.title('商品的销量TOP10学号2020310143040')plt.savefig('D:/anaconda/data/top10.png')plt.show()
data_nums = data.shape[0]for idnex,row in sorted[:10].iterrows():    print(row['Goods'],row['id'],row['id']/data_nums)


#代码8-3import pandas as pdinputfile1 = 'D:/anaconda/data/GoodsOrder.csv'inputfile2 = 'D:/anaconda/data/GoodsTypes.csv'data = pd.read_csv(inputfile1,encoding='gbk')types = pd.read_csv(inputfile2,encoding='gbk')
group = data.groupby(['Goods']).count().reset_index()sort = group.sort_values('id',ascending=False).reset_index()data_nums = data.shape[0]del sort['index']
sort_links = pd.merge(sort,types)
sort_link = sort_links.groupby(['Types']).sum().reset_index()sort_link = sort_link.sort_values('id',ascending=False).reset_index()del sort_link['index']
sort_link['count'] = sort_link.apply(lambda line:line['id']/data_nums,axis=1)sort_link.rename(columns={'count':'percent'},inplace=True)print('各类别商品的销量及其占比学号2020310143040:\n',sort_link)outfile1 = 'D:/anaconda/data/percent.csv'sort_link.to_csv(outfile1,index=False,header=True,encoding='gbk')
import matplotlib.pyplot as pltdata = sort_link['percent']labels = sort_link['Types']plt.figure(figsize=(8,6))plt.pie(data,labels=labels,autopct='%1.2f%%')plt.rcParams['font.sans-serif'] = 'SimHei'plt.title('每类商品销售占比学号2020310143040')plt.savefig('D:/anaconda/data/percent.png')plt.show()
#代码8-4selected = sort_links.loc[sort_links['Types'] == '非酒精饮料']child_nums = selected['id'].sum()selected['child_percent'] = selected.apply(lambda line:line['id']/child_nums,axis=1)selected.rename(columns={'id':'count'},inplace=True)print('非酒精饮料内部商品的销量及其占比:\n',selected)outfile2 = 'D:/anaconda/data/child_percent.csv'sort_link.to_csv(outfile2,index=False,header=True,encoding='gbk')
import matplotlib.pyplot as pltdata = selected['child_percent']labels = selected['Goods']plt.figure(figsize=(8,6))explode = (0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.08,0.3,0.1,0.3)
plt.pie(data,explode=explode,labels=labels,autopct='%1.2f%%',pctdistance=1.1,labeldistance=1.2)plt.rcParams['font.sans-serif'] = 'SimHei'plt.title("非酒精饮料内部各商品的销售占比学号2020310143040")plt.axis('equal')plt.savefig('D:/anaconda/data/child_persent.png')plt.show()

import pandas as pdinputfile = 'D:/anaconda/data/GoodsOrder.csv'data = pd.read_csv(inputfile,encoding='gbk')
data['Goods'] = data['Goods'].apply(lambda x:','+x)data = data.groupby('Goods').sum().reset_index()data
data['Goods'] = data['Goods'].apply(lambda x:[x[1:]])data_list = list(data['Goods'])data_list[:5]
data_translation = []for i in data_list:    p=i[0].split(',')    data_translation.append(p)print('数据转换结果的前5个元素:\n',data_translation[0:5])
from numpy import * def loadDataSet():    return [['a', 'c', 'e'], ['b', 'd'], ['b', 'c'], ['a', 'b', 'c', 'd'], ['a', 'b'], ['b', 'c'], ['a', 'b'],            ['a', 'b', 'c', 'e'], ['a', 'b', 'c'], ['a', 'c', 'e']] def createC1(dataSet):    C1 = []    for transaction in dataSet:        for item in transaction:            if not [item] in C1:                C1.append([item])    C1.sort()    # 映射为frozenset唯一性的,可使用其构造字典    return list(map(frozenset, C1))   
  # 从候选K项集到频繁K项集(支持度计算)def scanD(D, Ck, minSupport):    ssCnt = {}    for tid in D:   # 遍历数据集        for can in Ck:  # 遍历候选项            if can.issubset(tid):  # 判断候选项中是否含数据集的各项                if not can in ssCnt:                    ssCnt[can] = 1  # 不含设为1                else:                    ssCnt[can] += 1  # 有则计数加1    numItems = float(len(D))  # 数据集大小    retList = []  # L1初始化    supportData = {}  # 记录候选项中各个数据的支持度    for key in ssCnt:        support = ssCnt[key] / numItems  # 计算支持度        if support >= minSupport:            retList.insert(0, key)  # 满足条件加入L1中            supportData[key] = support      return retList, supportData
def calSupport(D, Ck, min_support):    dict_sup = {}    for i in D:        for j in Ck:            if j.issubset(i):                if not j in dict_sup:                    dict_sup[j] = 1                else:                    dict_sup[j] += 1    sumCount = float(len(D))    supportData = {}    relist = []    for i in dict_sup:        temp_sup = dict_sup[i] / sumCount        if temp_sup >= min_support:            relist.append(i)            # 此处可设置返回全部的支持度数据(或者频繁项集的支持度数据)            supportData[i] = temp_sup    return relist, supportData
# 改进剪枝算法def aprioriGen(Lk, k):    retList = []    lenLk = len(Lk)    for i in range(lenLk):        for j in range(i + 1, lenLk):  # 两两组合遍历            L1 = list(Lk[i])[:k - 2]            L2 = list(Lk[j])[:k - 2]            L1.sort()            L2.sort()            if L1 == L2:  # 前k-1项相等,则可相乘,这样可防止重复项出现                # 进行剪枝(a1为k项集中的一个元素,b为它的所有k-1项子集)                a = Lk[i] | Lk[j]  # a为frozenset()集合                a1 = list(a)                b = []                # 遍历取出每一个元素,转换为set,依次从a1中剔除该元素,并加入到b中                for q in range(len(a1)):                    t = [a1[q]]                    tt = frozenset(set(a1) - set(t))                    b.append(tt)                t = 0                for w in b:                    # 当b(即所有k-1项子集)都是Lk(频繁的)的子集,则保留,否则删除。                    if w in Lk:                        t += 1                if t == len(b):                    retList.append(b[0] | b[1])    return retList
def apriori(dataSet, minSupport=0.2):# 前3条语句是对计算查找单个元素中的频繁项集    C1 = createC1(dataSet)    D = list(map(set, dataSet))  # 使用list()转换为列表    L1, supportData = calSupport(D, C1, minSupport)    L = [L1]  # 加列表框,使得1项集为一个单独元素    k = 2    while (len(L[k - 2]) > 0):  # 是否还有候选集        Ck = aprioriGen(L[k - 2], k)        Lk, supK = scanD(D, Ck, minSupport)  # scan DB to get Lk        supportData.update(supK)  # 把supk的键值对添加到supportData里        L.append(Lk)  # L最后一个值为空集        k += 1    del L[-1]  # 删除最后一个空集    return L, supportData  # L为频繁项集,为一个列表,1,2,3项集分别为一个元素
# 生成集合的所有子集def getSubset(fromList, toList):    for i in range(len(fromList)):        t = [fromList[i]]        tt = frozenset(set(fromList) - set(t))        if not tt in toList:            toList.append(tt)            tt = list(tt)            if len(tt) > 1:                getSubset(tt, toList)
def calcConf(freqSet, H, supportData, ruleList, minConf=0.7):    for conseq in H:  #遍历H中的所有项集并计算它们的可信度值        conf = supportData[freqSet] / supportData[freqSet - conseq]  # 可信度计算,结合支持度数据        # 提升度lift计算lift = p(a & b) / p(a)*p(b)        lift = supportData[freqSet] / (supportData[conseq] * supportData[freqSet - conseq])         if conf >= minConf and lift > 1:            print(freqSet - conseq, '-->', conseq, '支持度', round(supportData[freqSet], 6), '置信度:', round(conf, 6),                  'lift值为:', round(lift, 6))            ruleList.append((freqSet - conseq, conseq, conf)) # 生成规则def gen_rule(L, supportData, minConf = 0.7):    bigRuleList = []    for i in range(1, len(L)):  # 从二项集开始计算        for freqSet in L[i]:  # freqSet为所有的k项集            # 求该三项集的所有非空子集,1项集,2项集,直到k-1项集,用H1表示,为list类型,里面为frozenset类型,            H1 = list(freqSet)            all_subset = []            getSubset(H1, all_subset)  # 生成所有的子集            calcConf(freqSet, all_subset, supportData, bigRuleList, minConf)    return bigRuleList if __name__ == '__main__':    dataSet = data_translation    L, supportData = apriori(dataSet, minSupport = 0.02)    rule = gen_rule(L, supportData, minConf = 0.35)    selected = sort_links.loc[sort_links['Types'] == '西点']child_nums = selected['id'].sum()selected['child_percent'] = selected.apply(lambda line:line['id']/child_nums,axis=1)selected.rename(columns={'id':'count'},inplace=True)print('西点类商品的销量及其占比:\n',selected)outfile3 = 'D:/anaconda/data/child_percent.csv'sort_link.to_csv(outfile3,index=False,header=True,encoding='gbk')
import matplotlib.pyplot as pltdata = selected['child_percent']labels = selected['Goods']plt.figure(figsize=(8,6))explode = (0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03)
plt.pie(data,explode=explode,labels=labels,autopct='%1.2f%%',pctdistance=1.1,labeldistance=1.2)plt.rcParams['font.sans-serif'] = 'SimHei'plt.title("西点类各商品的销售占比学号2020310143040")plt.axis('equal')plt.savefig('D:/anaconda/data/child_persent_西点.png')plt.show()

标签:sort,plt,0.03,supportData,关联,商品,规则,csv,data
From: https://www.cnblogs.com/i3wood/p/17234039.html

相关文章

  • 商品的零售购物篮分析
    importnumpyasnpimportpandasaspdimportmatplotlib.pyplotaspltplt.rcParams["font.sans-serif"]=["SimHei"]plt.rcParams["axes.unicode_minus"]=Fals......
  • 商品数据分析
    importnumpyasnpimportpandasaspdinputfile=r'C:\Users\admin\Desktop\新建文件夹\GoodsOrder.csv'#输入的数据文件data=pd.read_csv(inputfile,encodi......
  • 商品零售购物篮分析
    一、背景与挖掘目标购物篮分析是商业领域最前沿、最具挑战性的问题之一,也是许多企业重点研究的问题。购物篮分析是通过发现顾客在一次购买行为中放入购物篮中不同商品之间......
  • python中两个不同shape的数组间运算规则
    1前言声明:本博客讨论的数组间运算是指四则运算,如:a+b、a-b、a*b、a/b,不包括a.dot(b)等运算,由于numpy和tensorflow中都遵循相同的规则,本博客以numpy为例。众所周......
  • div + css命名规则
    页头:header登录条:loginBar标志:logo侧栏:sideBar广告:banner导航:nav子导航:subNav菜单:menu子菜单:subMenu搜索:search滚动:scroll页面......
  • Python爬虫采集商品评价信息--京东
    1.数据采集逻辑在进行数据采集之前,明确哪些数据为所需,制定数据Schema为爬取工作做出要求,并根据数据Schema制定出有针对性的爬取方案和采集逻辑。  2.数据Schema3.......
  • 第八章 商品零售购物篮分析
     #代码8-1查看数据特征importnumpyasnpimportpandasaspdinputfile="E:\\anaconda3\\jupyterFile\\数据分析\\data\\GoodsOrder.csv"#输入的数据文件dat......
  • mysql 查询1个订单 存在3件及以上商品一样的其它订单
    1、需求要查1个订单存在3件及以上商品一样的其它订单2、数据表表:order_sku字段:order_code,skuCREATETABLE`order_sku`(`id`int(10)NOTNULL,`order_c......
  • 淘宝/天猫商品详情数据接口 API调用示例 参数说明
    item_get-获得淘宝商品详情请求示例#coding:utf-8"""Compatibleforpython2.xandpython3.xrequirement:pipinstallrequests"""from__future__importprin......
  • 编码规则的范例
    范例1:yyyyMddxxxx TxnDate当前时间DatePart(?,"?")截取时间的的一部分String()转字符串strsubstring(对象,开始位置,长度)StrPadLeft(对象,长度,补字串),向左补满某字串......