Python操作MySQL
import pymysql
# 打开数据库连接
db = pymysql.connect(host='localhost', user='root', passwd='...', port=3306,datebase='...')
print('连接成功!')
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
# 使用 execute() 方法执行 SQL 查询
cursor.execute("SELECT VERSION()")
# 使用 fetchone() 方法获取单条数据.
data = cursor.fetchone()
print("Database version : %s " % data)
# 关闭数据库连接
db.close()
class版本
import pymysql
class DBHandler:
def __init__(self, host, port, user, password,
database, charset, **kwargs):
# 连接数据库服务器
self.conn = pymysql.connect(host=host, port=port, user=user, password=password,
database=database, cursorclass=pymysql.cursors.DictCursor,
charset=charset, **kwargs)
# 获取游标
self.cursor = self.conn.cursor()
def query(self, sql, args=None, one=True):
self.cursor.execute(sql, args)
if one:
return self.cursor.fetchone()
else:
return self.cursor.fetchall()
def close(self):
self.cursor.close()
self.conn.close()
if __name__ == "__main__":
db = DBHandler(host='127.0.0.1', port=3306,
user='root', password='....',
database='uric', charset='utf8')
sql = 'SELECT VERSION()'
data = db.query(sql)
print(data)
爬虫小说存储案例
import requests
from lxml import etree
import pymysql
class Spider(object):
def __init__(self, host, port, user, password,
database, charset, **kwargs):
# 连接数据库服务器
self.conn = pymysql.connect(host=host, port=port, user=user, password=password,
database=database, cursorclass=pymysql.cursors.DictCursor,
charset=charset, **kwargs)
# 获取游标
self.cursor = self.conn.cursor()
# 初始化表、
self.init_table()
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
}
self.session = requests.session()
def exec(self, sql, args=None, one=True):
self.cursor.execute(sql, args)
self.conn.commit()
if one:
return self.cursor.fetchone()
else:
return self.cursor.fetchall()
def close(self):
self.cursor.close()
self.conn.close()
def init_table(self):
# 使用 execute() 方法执行 SQL 查询
sql1 = """
create table if not exists book(
id int primary key auto_increment,
bookName varchar(32),
coverImg varchar(255),
authorPenName varchar(32)
)character set=utf8;
"""
self.exec(sql1)
sql2 = """
create table if not exists chapter(
id int primary key auto_increment ,
chapter_name varchar(32),
chapter_content text,
book_id INT NOT NULL
)character set=utf8;
"""
self.exec(sql2)
def login(self):
self.session.post("https://passport.17k.com/ck/user/login", data={
"loginName": ".....",
"password": "...."
}, headers=self.headers)
def get_shelf_books(self):
'''
params:
:return: data [{},{},{}]
'''
res = self.session.get("https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919")
res.encoding = "utf8"
# print(res.text)
data = res.json().get("data")
for book_dict in data:
self.handle_one_book(book_dict)
def handle_one_book(self, book_dict):
# 循环处理每一本书
print(book_dict)
bookId = book_dict.get("bookId")
bookName = book_dict.get("bookName")
coverImg = book_dict.get("coverImg")
authorPenName = book_dict.get("authorPenName")
sql = f"""insert into book (bookName,coverImg,authorPenName) values (
"{bookName}","{coverImg}","{authorPenName}");"""
print("sql:::", sql)
self.exec(sql)
# 爬取每一章的文本信息并下载
self.get_chapters(bookId)
def get_chapters(self, book_id):
# 爬虫每一本书架书籍的章节页面
res = requests.get(f"https://www.17k.com/list/{book_id}.html")
res.encoding = "utf8"
# 解析该书籍的章节页面中章节链接
selector = etree.HTML(res.text)
items = selector.xpath('//dl[@class="Volume"][position()>1]/dd/a')
for item in items:
self.handle_one_chapter(item, book_id)
def handle_one_chapter(self, item, book_id):
# 每一本书籍的每一章节的信息
chapter_href = item.xpath("./@href")[0]
chapter_name = item.xpath("./span/text()")[0].strip()
# 爬取章节内容
res = requests.get("https://www.17k.com" + chapter_href)
res.encoding = "utf8"
chapter_html = res.text
print(chapter_html)
selector = etree.HTML(res.text)
chapter_content_list = selector.xpath(
'//div[contains(@class,"content")]/div[@class="p"]/p[position()<last()]/text()')
chapter_content_str = "\n".join(chapter_content_list)
# 章节进行下载,写入到一个文件中
sql = f"""insert into chapter (chapter_name,chapter_content,book_id) values (
"{chapter_name}",'{chapter_content_str}',"{book_id}");"""
print("sql:::", sql)
self.exec(sql)
def run(self):
# (1) 模拟登录,获取Cookie
self.login()
# (2) 爬取书架上的书籍信息
self.get_shelf_books()
if __name__ == "__main__":
s = Spider(host='127.0.0.1', port=3306,
user='root', password='.....',
database='xiaoshuo', charset='utf8')
s.run()
标签:20.3,20,get,Python,res,self,cursor,book,def
From: https://www.cnblogs.com/dream-ze/p/17281187.html