爬豆瓣
import requests
head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.139 Safari/537.36"}#修改请求头,伪装成浏览器
#response =requests.get("http://books.toscrape.com/",headers=head) #这是个练习爬虫的网站
response =requests.get("https://movie.douban.com/top250",headers=head)
if response.ok:
print(response.text)
else:
print("请求失败")
print(response.status_code)
爬价格和书名
from bs4 import BeautifulSoup
import requests
head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.139 Safari/537.36"}#修改请求头,伪装成浏览器
response =requests.get("http://books.toscrape.com/",headers=head) #这是个练习爬虫的网站
content=response.text
soup=BeautifulSoup(content,"html.parser")#BeautifulSoup用来解析,html.parser指定解析的是html
#输出所有价格
allPrices=soup.findAll("p",attrs={"class":"price_color"})#p是选择的标签,attrs指定要选class是price_color的标签
for price in allPrices:
print(price.string)
#输出书名
all_titles=soup.findAll("h3")
for title in all_titles:
all_links=title.findAll("a")
for link in all_links:
print(link.string)
标签:head,入门,price,爬虫,537.36,print,requests,response
From: https://www.cnblogs.com/wljss/p/18133155