import urllib.request url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=0&limit=20' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62', } request = urllib.request.Request(url=url,headers=headers) res = urllib.request.urlopen(request) content = res.read().decode('utf-8') file = open('movie.json','w',encoding='utf-8') file.write(content)
获取豆瓣电影1-10页
import urllib.request import urllib.parse def getMovieTest(page): data = { 'start':(page - 1) * 20, 'limit':20 } reqData =urllib.parse.urlencode(data) url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&' + reqData print(url) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62', } request = urllib.request.Request(url=url, headers=headers) res = urllib.request.urlopen(request) content = res.read().decode('utf-8') file = open('moviessqw' + str(page) + '.json', 'w', encoding='utf-8') file.write(content) file.close() for i in range(1,11): getMovieTest(i)
标签:url,res,request,urllib,爬取,headers,json,豆瓣,file From: https://www.cnblogs.com/sgj191024/p/17738154.html