"""
数据挖掘的流程:
1- 获取数据
1- 从固有的本地获取
2- 互联网动态获取
2- 存储数据
1- 大量数据 数据库
2- 部署太大数据--使用文件存储:
1- txt
2- excel
3- csv
3- 清洗数据
4- 算法的介入
5- 结果展示
6- 分析汇总
"""
import requests,time
import pprint,csv
import threading
1- 新建一个文件
fo = open('股票数据.csv',mode='w',encoding='utf-8',newline='')
csv_write = csv.DictWriter(fo,fieldnames=['股票名称','股票代码','当前价格','成交量'])
写入列名
csv_write.writeheader()
urlList = ["https://xueqiu.com/service/v5/stock/screener/quote/list?page={}&size=30&order=desc&orderby=percent&order_by=percent&market=CN&type=sh_sz&_={}".format(page,round(time.time()*1000)) for page in range(1,3)]
def request_data(url):
#1- 请求头---做一些处理防扒机制:
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
#2- 发送请求
resp = requests.get(url,headers=header)
#print(resp.json())
dataList = resp.json()['data']['list']
# print(dataList)
# pprint.pprint(dataList)
#3- 获取你需要的信息
for one in dataList:#one 就是一只股票
pass
#1- 股票名称
#2- 股票代码
#3- 当前价格
#4- 成交量
dictTmp = {}
dictTmp['股票名称'] = one['name']#股票名称
dictTmp['股票代码'] = one['symbol']#股票代码
dictTmp['当前价格'] = one['current']#当前价格
dictTmp['成交量'] = one['volume']#成交量
csv_write.writerow(dictTmp)
2- 多线程操作爬虫
def request_threads():
threadList = []
for url in urlList:
threadList.append(threading.Thread(target=request_data,args=(url,)))
#1- 启动线程
for one in threadList:
one.start()
time.sleep(1)
#2- 阻塞
for one in threadList:
one.join()
import pandas
import matplotlib.pyplot as plt
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus']=False
def show_data(fileName='股票数据.csv'):
#4- 数据清洗---针对csv文件
data_pd= pandas.read_csv(fileName)#从CSV文件导入数据
df = data_pd.dropna()#剔除缺失的行
df1 = df[['股票名称','当前价格']]#需要获取对应的数据
df2 = df1.iloc[:10]
print(df2)
#5- 算法介入----每一个操盘机构都有自己一套规则
#6- 展示效果:web端
# 绘制图形
plt.bar(df2['股票名称'].values, df2['当前价格'].values,label='股票分析结果')#(横坐标,纵坐标)
for a,b in zip(df2['股票名称'].values, df2['当前价格'].values):
print(a,b)
plt.text(a,b+5,b,horizontalalignment='center',verticalalignment='bottom',fontsize=10,rotation=0)
plt.legend()#设置生效
plt.xticks(rotation=-90) # 设置x轴标签旋转角度
plt.xlabel('股票名称')
plt.ylabel('当前价格')
plt.show()
if name == 'main':
request_threads()#爬取数据
fo.close()#
show_data()