Scrapy保存数据到多个数据库
目标网站:中国福利彩票网 双色球往期数据
阳光开奖 (cwl.gov.cn) http://www.cwl.gov.cn/ygkj/wqkjgg/
代码
class MongoPipeline:
def open_spider(self, spider):
self.client = pymongo.MongoClient()
self.ssq = self.client.bjsxt.ssq
def process_item(self, item, spider):
if item.get('code') =='2022086':
self.ssq.insert_one(item)
return item
def close_spider(self, spider):
self.client.close()
# pip install pymysql==1.0.2
import pymysql
from scrapy.exceptions import DropItem
class MySQLPipeline:
def open_spider(self, spider):
# 创建数据库连接
self.client = pymysql.connect(host='192.168.31.151',port=3306,user='root',password='123',db='bjsxt',charset='utf8')
# 获取游标
self.cursor = self.client.cursor()
def process_item(self, item, spider):
if item.get('code') =='2022086':
raise DropItem('2022086 数据已经在mongo保存过了')
# 写入数据库SQL
sql = 'insert into t_ssq (id,code,red,blue) values (0,%s,%s,%s)'
# 写的数据参数
args = (item['code'],item['red'],item['blue'])
# 执行SQL
self.cursor.execute(sql,args)
# 提交事务
self.client.commit()
return item
def close_spider(self, spider):
self.cursor.close()
self.client.close()
标签:self,spider,保存,item,Scrapy,client,close,数据库,def From: https://www.cnblogs.com/jiangjiayun/p/17520760.html