import requests
import re
import time
import hashlib
import threading
import json
from lxml import etree
from pymysql.converters import escape_string
from fake_useragent import UserAgent
from mylib.module import *
headers = {
'user-agent': UserAgent().random
}
url = 'https://www.ivsky.com/tupian/'
res = requests.get(url=url, headers=headers)
# 获取第一次请求网站的cookie信息
dt = res.cookies.get_dict()
# 请求头携带
'''
这里先要在浏览器里清空Application里面的Cookies
然后刷新页面,会看到第一个和第三个两个同名的框架文件(这里是tupian/)
我们需要打开第一个,查看Response Headers选项里面的Set-Cookie选项
要以第三个框架文件中的cookie为准,得出首次发送的cookie与正确的cookie之间适当的变化规律,分析得出结论
'''
cookies = {
't': dt['token'],
'r': str(int(dt['secret']) - 100)
}
# 第二次请求
res = requests.get(url=url, headers=headers, cookies=cookies)
html = res.text
e = etree.HTML(html)
lst = e.xpath('//div[@class="il_img"]/a/img/@src')
for i in range(len(lst)):
url = 'https:' + lst[i]
res = requests.get(url=url, headers=headers)
code = res.content
with open('./img/' + str(i) + '.jpg', 'wb') as f:
f.write(code)
time.sleep(1)
标签:get,url,res,爬虫,headers,cookie,import,运行
From: https://www.cnblogs.com/hacker-dvd/p/17568322.html