登录常用手段就是:
- request.post请求
- selenium等自动化工具
这里讲一下使用requests实现自动登录
网站
现在网站登录基本上都有验证码,requests登录的难点在于将验证码与账号联系起来,这里是cookie和referer。
def login_dianxiaomi(): """ 店小秘 将code与账号联系起来的纽带就在cookie和referer,需要考虑登录主页,code获取和登录ajax三方 """ i = 0 while i < 3: ts = int(time.time()) homepage = f'https://www.dianxiaomi.com/index.htm?ts={ts}' # 主页,这里作为后面的referer headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951; ' 'JSESSIONID=857D325C586104913A42E668792A8870; dxm_vc=NjlhZDZiOGYwMzk0YzdiYjRlOWJlY2M2OWJiMGY4Y2MhMTY3MDQwMzcxMzg2NQ; ' 'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670403714', # 直接网页复制获取 'Host': 'www.dianxiaomi.com', 'Referer': 'https://www.dianxiaomi.com/home.htm', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' } res = requests.get(homepage, headers=headers, timeout=30) cookie = res.cookies.items() jsessionid = cookie[0][1] print('homepage jsessionid:', jsessionid) # 获取验证码 url = f'https://www.dianxiaomi.com/verify/code.htm?t={int(time.time() * 1000)}' headers = {'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951;' f'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670402174; JSESSIONID={jsessionid}; ' # 这里的jessionid来自主页cookie 'dxm_vc=NjlhZDZiOGYwMzk0YzdiYjRlOWJlY2M2OWJiMGY4Y2MhMTY3MDQwMzcxMzg2NQ', 'Host': 'www.dianxiaomi.com', 'Referer': f'https://www.dianxiaomi.com/index.htm?ts={ts}', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' } res = requests.get(url, headers=headers, timeout=30) cookie = res.cookies.items() print('code cookie:', cookie) if len(cookie) == 1: i += 1 print('code url cookie not right', f'try {i} times') continue code_jsessionid = cookie[0][1] code_dxm_vc = cookie[1][1] with open('dianxiaomi.png', 'wb') as f: f.write(res.content) # 图像二值化 im = Image.open('dianxiaomi.png') img_gray = im.convert('L') img_gray.save('dianxiaomi.png') code = get_code('dianxiaomi.png') print('code:', code) login_url = 'https://www.dianxiaomi.com/user/userLoginNew2.json' headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 'Referer': f'https://www.dianxiaomi.com/index.htm?ts={ts}', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '_dxm_ad_client_id=C480E8334FD936CCDEB3BA1AD1F98E49F; Hm_lvt_f8001a3f3d9bf5923f780580eb550c0b=1670383137,1670400951; ' f'Hm_lpvt_f8001a3f3d9bf5923f780580eb550c0b=1670402174; JSESSIONID={code_jsessionid}; ' # 这里的jessionid和dxm_vc来自请求code的cookie f'dxm_vc={code_dxm_vc}' } data = { 'account': 'xx', 'password': 'xxx', 'dxmVerify': code, 'loginVerifyCode': '', 'remeber': 'remeber', 'url': '' } response = requests.post(login_url, data=data, headers=headers, timeout=30) print(response.text) # if '有用户在其它地方,登录此账号' in response.text: # print('有用户在其它地方,登录此账号') # return res = response.json() if res['code'] == -1: i += 1 print(res['error'], f'try {i} times') continue else: # 获取requests请求返回的cookie cookie = response.cookies.items() print(cookie) cookies = '' for c in cookie: cookies += c[0] + '=' + c[1] + ';' print(11, cookies) return cookies
欢迎关注,爬虫王者
标签:www,code,店小秘,验证码,headers,cookie,dianxiaomi,print,requests From: https://www.cnblogs.com/crawler-king/p/16990128.html