from bs4 import BeautifulSoup
from openpyxl import load_workbook
import pandas as pd
import requests
URL = "https://www.dszuqiu.com/league/252"
FILE_NAME= "/Users/shaomengchen/bet/美职联.xlsx"
RANK_HEAD = ["排名","球队","赛","进球","失球","场均进球","场均失球"]
BET_HEAD = ["时间","赛事","主队","客队","实时","比分","类型","描述","赔率","本金","返还",]
TEAM_HEAD = ["比赛时间","赛事","主队","客队","让球","大小球","角球","比赛结果"]
def requestUrl(url):
head = {
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
}
response = requests.get(url,headers= head, verify= False)
return response.content
#比赛
def initGame(requestData, fileName):
sheet_name = "比赛"
game_head = ["比赛时间","赛事","主队","客队","让球","大小球","角球"]
game_rows = []
book = load_workbook(FILE_NAME)
soup = BeautifulSoup(requestData, "html.parser")
for tr in soup.select('table[class="live-list-table diary-table"]')[0].select("tbody>tr"):
game_time = tr.select('td')[2].get_text().strip()
game_name = tr.select('td')[0].get_text().strip()
team_home = tr.select('td')[3].find("a").get_text().strip()
team_away = tr.select('td')[5].find("a").get_text().strip()
game_rang_gt = tr.select('td')[7].get_text().strip()
game_rang_num = tr.select('td')[8].get_text().strip()
game_rang_lt = tr.select('td')[9].get_text().strip()
game_rang = game_rang_gt + " " + game_rang_num + " " + game_rang_lt
game_goals_gt = tr.select('td')[10].get_text().strip()
game_goals_num = tr.select('td')[11].get_text().strip()
game_goals_lt = tr.select('td')[12].get_text().strip()
game_goals = game_goals_gt + " " + game_goals_num + " " + game_goals_lt
game_corner_gt = tr.select('td')[10].get_text().strip()
game_corner_num = tr.select('td')[11].get_text().strip()
game_corner_lt = tr.select('td')[12].get_text().strip()
game_corner = game_corner_gt + " " + game_corner_num + " " + game_corner_lt
game_row = [game_time,game_name,team_home,team_away,game_rang,game_goals,game_corner]
#print(game_row)
game_rows.append(game_row)
writer_data = pd.DataFrame(game_rows)
with pd.ExcelWriter(fileName) as writer:
writer.book = book
writer_data.to_excel(writer, sheet_name=sheet_name, header=game_head, index=False)
return game_rows
#投注
def initBet(requestData):
pass
#排名
def initRank(requestData, fileName, book):
sheet_name = "排名"
rank_head = ["排名","赛事","主队","客队","让球","大小球","角球"]
team_rows = []
team_names = []
soup = BeautifulSoup(requestData, "html.parser")
for tr in soup.select('table[class="responsive live-list-table MB0"]')[0].select("tbody>tr"):
team_rank = tr.select('td')[0].get_text().strip()
team_name = tr.select('td')[3].get_text().strip()
team_round = tr.select('td')[4].get_text().strip()
team_gs = tr.select('td')[6].get_text().strip().split(":")[0].strip()
team_ga = tr.select('td')[6].get_text().strip().split(":")[1].strip()
team_avg_gs = round(int(team_gs)/int(team_round), 2)
team_avg_ga = round(int(team_ga)/int(team_round), 2)
team_row = [team_rank, team_name,team_round,team_gs,team_ga,team_avg_gs,team_avg_ga]
team_names.append(team_name)
team_rows.append(team_row)
writer_data = pd.DataFrame(team_rows)
with pd.ExcelWriter(fileName) as writer:
writer.book = book
writer_data.to_excel(writer, sheet_name=sheet_name, header=rank_head, index=False)
return team_names
#球队
def initTeam(teams, games, fileName, book):
for name in teams:
sheet_name = "排名"
game_head = ["比赛时间","赛事","主队","客队","让球","大小球","角球","比赛结果"]
for game in games:
if name == game[2]:
game.append("0:0")
writer_data = pd.DataFrame(game)
with pd.ExcelWriter(fileName) as writer:
writer.book = book
writer_data.to_excel(writer, sheet_name=sheet_name, header=game_head, index=False)
elif name == game[3]:
game.append("0:0")
writer_data = pd.DataFrame(game)
with pd.ExcelWriter(fileName) as writer:
writer.book = book
writer_data.to_excel(writer, sheet_name=sheet_name, header=game_head, index=False)
def main():
#book = load_workbook(FILE_NAME)
html = requestUrl(URL)
games_rows = initGame(html, FILE_NAME)
#team_names = initRank(html, FILE_NAME, book)
#initTeam(team_names, games_rows, FILE_NAME, book)
if __name__ == "__main__":
main()
print("execute over")