Python爬虫实现爬取网站是如何进行数据写入保存的?今天我将利用我所学的知识把写入数据的一些代码教程供大家参考。
Python爬虫之数据写入
#写入到Excel import xlsxwriter #创建文件,并添加一个工作表 workbook=xlsxwriter.Workbook('demo.xlsx') worksheet=workbook.add_worksheet() #在指定位置写入数据 worksheet.write("A1","这是A1的数据") worksheet.write("A2","这是A2的数据") #关闭表格文件 workbook.close()
#爬取便民查询网常用号码,并写入到Excel import re import requests import xlsxwriter headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Ap\ pleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Sa\ fari/537.36" } response=requests.get("http://jshk.com.cn/mb/reg.asp?kefu=xjy",headers=headers).text pat1=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>(.*?)</td>[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?</tr>' pat2=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?<td>(.*?)</td>[\s\S]*?</tr>' pattern1=re.compile(pat1) pattern2=re.compile(pat2) data1=pattern1.findall(response) data2=pattern2.findall(response) resultlist=[] #创建表格 workbook=xlsxwriter.Workbook("demo2.xlsx") worksheet=workbook.add_worksheet() for i in range(0,len(data1)): resultlist.append(data1[i]+data2[i]) #写入数据 worksheet.write("A"+str(i+1),data1[i]) worksheet.write("B"+str(i+1),data2[i]) print(resultlist) # 关闭表格资源,这样才会完成创建 workbook.close()
#爬取便民查询网常用号码,并写入到Mysql #注意:需要提前创建对应字段的数据库 import re import requests import pymysql #建立数据库连接 db=pymysql.Connect(host="localhost",port=3306,user="root",passwd="AA123456",db="spider_test",charset="utf8") cursor=db.cursor() #爬取数据 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Ap\ pleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Sa\ fari/537.36" } response=requests.get("http://jshk.com.cn/mb/reg.asp?kefu=xjy",headers=headers).text #处理数据 pat1=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>(.*?)</td>[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?</tr>' pat2=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?<td>(.*?)</td>[\s\S]*?</tr>' pattern1=re.compile(pat1) pattern2=re.compile(pat2) data1=pattern1.findall(response) data2=pattern2.findall(response) #清空数据库原来的内容 sqll="delete from tel" cursor.execute(sqll) db.commit() resultlist=[] for i in range(0,len(data1)): resultlist.append(data1[i]+data2[i]) sql="insert into tel(name,phone) values('"+data1[i]+"','"+data2[i]+"')" cursor.execute(sql) print(resultlist) db.commit()
标签:Python,worksheet,写入,爬虫,resultlist,re,import,data1 From: https://www.cnblogs.com/q-q56731526/p/17282432.html