import requests
import bs4
import re
import pandas as pd
import xlwt
#
# def l():
def heavy_recommendation():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'https://www.17k.com/quanben/'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
list_total = []
list1 = []
list_word = []
list_fan = []
li1 = soup.find('ul', attrs={'class': 'BJTJ_CONT Top1'})
li1_list = li1.find_all('li')
for item in li1_list:
url_book = item.find('a').get('href')
url_book = url_book.replace('//', 'https://')
url_book1 = url_book.replace(' ', '')
list1.append(url_book1)
for i in range(0, 16):
url2 = list1[i]
dict1 = {}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
response = requests.get(url=url2, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
reader = soup.find('em', attrs={'class': 'blue'}).text
word = soup.find('em', attrs={'class': 'red'}).text
# print(reader.text)
# print(word.text)
name1 = soup.find('h1')
name2 = name1.find('a').text
writer = soup.find('a', attrs={'class': 'name'}).text
fan = soup.find('span', attrs={'id': 'fansScore'}).text
fan = fan.replace('万', "0000")
fan = fan.replace('.', '')
recommender = soup.find('span', attrs={'id': 'recommentCount'}).text
# print(fan.text)
# print(recommender.text)
# print(writer.text)
dict1['小说名字'] = name2
dict1['作者'] = writer
dict1['粉丝数'] = int(fan)
dict1['阅读数'] = reader
dict1['小说字数'] = int(word)
dict1['推荐票数'] = recommender
list_word.append(word)
list_fan.append(fan)
list_total.append(dict1)
# print(list_word)
# print(list_fan)
df = pd.DataFrame(list_total)
# print(df)
# print("over!-----------------------------------------------------------------")
df2 = df.sort_values(by=["小说字数"], ascending=[False], kind="stable")
df3 = df.sort_values(by=["粉丝数"], ascending=[False], kind='stable')
df2.to_excel('heavy_recommendation1.xls')
df3.to_excel('heavy_recommendation2.xls')
# print(df.head())
# # print(df)
# for i in range(0,15):
# flag = i
# for j in range(i + 1, 16):
# if int(list_fan[i]) < int(list_fan[j]):
# flag = j
# t = int(list_fan[i])
# list_fan[i] = int(list_fan[j])
# list_fan[j] = t
def Great_potential():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'https://www.17k.com/quanben/'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
list_total = []
list1 = []
list_word = []
list_fan = []
li1 = soup.find_all('ul', attrs={'class': 'BJTJ_CONT Top1'})[1]
li1_list = li1.find_all('li')
for item in li1_list:
url_book = item.find('a').get('href')
url_book = url_book.replace('//', 'https://')
url_book1 = url_book.replace(' ', '')
list1.append(url_book1)
for i in range(0, 16):
url2 = list1[i]
dict1 = {}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
response = requests.get(url=url2, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
reader = soup.find('em', attrs={'class': 'blue'}).text
word = soup.find('em', attrs={'class': 'red'}).text
name1 = soup.find('h1')
name2 = name1.find('a').text
# print(reader.text)
# print(word.text)
writer = soup.find('a', attrs={'class': 'name'}).text
fan = soup.find('span', attrs={'id': 'fansScore'}).text
fan1 = fan.replace('.', '')
fan2 = fan1.replace('万', '0000')
recommender = soup.find('span', attrs={'id': 'recommentCount'}).text
# print(fan.text)
# print(recommender.text)
# print(writer.text)
dict1['小说名字'] = name2
dict1['阅读数'] = reader
dict1['小说字数'] = int(word)
dict1['作者'] = writer
dict1['粉丝数'] = int(fan2)
dict1['推荐票数'] = recommender
list_total.append(dict1)
df = pd.DataFrame(list_total)
# print(df)
# print("over!-----------------------------------------------------------------")
df.to_excel('Great_potential.xls')
df2 = df.sort_values(by=["小说字数"], ascending=[False], kind="stable")
df3 = df.sort_values(by=["粉丝数"], ascending=[False], kind='stable')
df2.to_excel('Great_potential11.xls')
df3.to_excel('Great_potential12.xls')
def Boys_finished_the_book():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'https://www.17k.com/top/refactor/top100/18_popularityListScore/18_popularityListScore_finishBook_top_100_pc.html?TabIndex=1&typeIndex=0'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
list_total = []
list1 = []
list_word = []
list_fan = []
li1 = soup.find_all('a', attrs={'class': 'red'})
for item in li1:
url_book = item.get('href')
url_book1 = url_book.replace('//', 'https://')
list1.append(url_book1)
for i in range(100):
url2 = list1[i]
dict1 = {}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
response = requests.get(url=url2, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
reader = soup.find('em', attrs={'class': 'blue'}).text
word = soup.find('em', attrs={'class': 'red'}).text
# print(reader.text)
# print(word.text)
name = soup.find('a', attrs={'class': 'red'}).text
writer = soup.find('a', attrs={'class': 'name'}).text
fan = soup.find('span', attrs={'id': 'fansScore'}).text
fan = fan.replace('.', '')
fan = fan.replace('万', '0000')
recommender = soup.find('span', attrs={'id': 'recommentCount'}).text
# print(fan.text)
# print(recommender.text)
# print(writer.text)
dict1['小说名称'] = name
dict1['阅读数'] = reader
dict1['小说字数'] = int(word)
dict1['作者'] = writer
dict1['粉丝数'] = int(fan)
dict1['推荐票数'] = recommender
list_total.append(dict1)
df = pd.DataFrame(list_total)
df2 = df.sort_values(by=["小说字数"], ascending=[False], kind="stable")
df3 = df.sort_values(by=["粉丝数"], ascending=[False], kind='stable')
df2.to_excel('Boys_finished_the_book1.xls')
df3.to_excel('Boys_finished_the_book2.xls')
def Girls_finished_the_book():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'https://www.17k.com/top/refactor/top100/18_popularityListScore/18_popularityListScore_finishBook_top_100_pc.html?TabIndex=1&typeIndex=0'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
list_total = []
list1 = []
list_word = []
list_fan = []
li1 = soup.find_all('a', attrs={'class': 'red'})
for item in li1:
url_book = item.get('href')
url_book1 = url_book.replace('//', 'https://')
list1.append(url_book1)
for i in range(100):
url2 = list1[i]
dict1 = {}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
response = requests.get(url=url2, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
reader = soup.find('em', attrs={'class': 'blue'}).text
word = soup.find('em', attrs={'class': 'red'}).text
# print(reader.text)
# print(word.text)
name = soup.find('a', attrs={'class': 'red'}).text
writer = soup.find('a', attrs={'class': 'name'}).text
fan = soup.find('span', attrs={'id': 'fansScore'}).text
fan = fan.replace('.', '')
fan = fan.replace('万', '0000')
recommender = soup.find('span', attrs={'id': 'recommentCount'}).text
# print(fan.text)
# print(recommender.text)
# print(writer.text)
dict1['小说名称'] = name
dict1['阅读数'] = reader
dict1['小说字数'] = int(word)
dict1['作者'] = writer
dict1['粉丝数'] = int(fan)
dict1['推荐票数'] = recommender
list_total.append(dict1)
df = pd.DataFrame(list_total)
df2 = df.sort_values(by=["小说字数"], ascending=[False], kind="stable")
df3 = df.sort_values(by=["粉丝数"], ascending=[False], kind='stable')
df2.to_excel('Boys_finished_the_book1.xls')
df3.to_excel('Boys_finished_the_book2.xls')
def Finish_this_list():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'https://www.17k.com/top/refactor/top100/18_popularityListScore/18_popularityListScore_finishBook_top_100_pc.html?TabIndex=1&typeIndex=0'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
list_total = []
list1 = []
list_word = []
list_fan = []
li1 = soup.find_all('a', attrs={'class': 'red'})
for item in li1:
url_book = item.get('href')
url_book1 = url_book.replace('//', 'https://')
list1.append(url_book1)
for i in range(100):
url2 = list1[i]
dict1 = {}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
response = requests.get(url=url2, headers=headers)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, 'html.parser')
reader = soup.find('em', attrs={'class': 'blue'}).text
word = soup.find('em', attrs={'class': 'red'}).text
# print(reader.text)
# print(word.text)
writer = soup.find('a', attrs={'class': 'name'}).text
name = soup.find('a', attrs={'class': 'red'}).text
fan = soup.find('span', attrs={'id': 'fansScore'}).text
fan = fan.replace('.', '')
fan = fan.replace('万', '0000')
recommender = soup.find('span', attrs={'id': 'recommentCount'}).text
# print(fan.text)
# print(recommender.text)
# print(writer.text)
dict1['小说名字'] = name
dict1['作者'] = writer
dict1['粉丝数'] = int(fan)
dict1['阅读数'] = reader
dict1['小说字数'] = int(word)
dict1['推荐票数'] = recommender
list_total.append(dict1)
df = pd.DataFrame(list_total)
# print(df)
# print("over!-----------------------------------------------------------------")
df2 = df.sort_values(by=["小说字数"], ascending=[False], kind="stable")
df3 = df.sort_values(by=["粉丝数"], ascending=[False], kind='stable')
df2.to_excel('Finish_this_list1.xls')
df3.to_excel('Finish_this_list2.xls')
if __name__ == '__main__':
li = []
heavy_recommendation()
Great_potential()
Girls_finished_the_book()
Finish_this_list()
Boys_finished_the_book()