import requests
from bs4 import BeautifulSoup
import bs4
def getedhtml(url, code='utf-8'):
kv = {'user-agent': 'Mozilla/5.0'}
try:
r = requests.get(url, headers=kv, timeout=30)
r. raise_for_status()
r.encoding = code
return r.text
except:
return ' '
def returned(html, list, num):
count = 0
soup = BeautifulSoup(html, 'html.parser')
info = soup.find('tbody', 'hidden_zhpm').children
for tr in info:
if count >= num:
break
if isinstance(tr, bs4.element.Tag):
count += 1
tds = tr.find_all('td')
list.append([tds[0].contents[0], tds[1],string, tds[3].string])
def printed(list, num):
print('{0:^10}\t{1:{3}^10}\t{2:^10}'.format('排名', '高校', '分数', chr(12288)))
for i in range(num):
L = list[i]
print('{0:^10}\t{1:{3}^10}\t{2:^10}'.format(L[0], L[1], L[2], chr(12288)))
def main():
list = [ ]
url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html'
num = int(input('请问要查询2019前多少名的高校呢: '))
html = getedhtml(url)
returned(html, list, num)
printed(list, num)
main()
标签:10,num,url,list,爬虫,html,tds From: https://www.cnblogs.com/Sjt2023/p/17892405.html