爬虫学习伊始,参考网上素材,自己写出来的,比较简单:主要运用了request爬取,正则表达式解析
import requests
import re
import os
from urllib import error
def main():
dirPath = "E:\python\girl-images"
url = "https://www.dbmeinv.com/?pager_offset="
i = 1
j = 0
while i < 10:
url = url + str(i)
try:
result = requests.get(url, timeout=10)
except error.HTTPError as e:
i += 1
continue
else:
text = result.text
list = re.findall('src="(.*?.jpg)"', text, re.S)
if len(list) == 0:
i += 1
continue
else:
for enum in list:
image = requests.get(enum, timeout=7)
filePath = os.path.join(dirPath, "girl_image_" + str(j) + ".jpg")
f = open(filePath, 'wb')
f.write(image.content)
f.close()
j += 1
i += 1
if __name__ == '__main__':
main()
结果效果:
标签:__,python,text,image,list,爬虫,url,妹子,main From: https://blog.51cto.com/u_13946099/6081452