开始之前要先在MySQL创建一个名为spider的数据库,在里面创建一个名caipiao的表,表里面三个字段,data,red,blue
点击查看代码
import requests
import pymysql
from lxml import etree
# 连接数据库
conn = pymysql.connect(
host='localhost', port=3306, user='root', password='root', database='spider'
)
# 创建游标
cursor = conn.cursor()
url = 'http://datachart.500.com/ssq/'
resp = requests.get(url).text
# 解析数据
tree = etree.HTML(resp)
trs = tree.xpath('//*[@id="tdata"]/tr')
for tr in trs:
if len(tr) != 1: # 判断是否为空
# 拿到想要的数据
data = tr.xpath('./td[@align="center"]/text()')[0].strip()
red = '_'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
blue = tr.xpath('./td[@class="chartBall02"]/text()')[0]
# 执行sql语句
sql = "insert into caipiao(data, red, blue) values(%s, %s, %s)"
cursor.execute(sql, (data, red, blue))
print("OK!")
# 关闭数据库连接
conn.close()