# -*- coding: utf-8 -*- # 爬虫爬取图片 import requests import re def crawl_image(image_url, image_local_path): r = requests.get(image_url, stream=True) # 打开流 with open(image_local_path, "wb") as f: # 打开路径 wb是 write binary f.write(r.content) # with 类似于try catch 帮助规避错误 def crwal(page): url = "http://www.qiushibaike.com/imgrank/page/" + str(page) res = requests.get(url) # text 返回的是Unicode的数据,而content返回的是二进制数据 # decode 以默认编码格式解码字符串,默认编码为字符串编码 content_list = re.findall("<div class=\"thumb\">(.*?)</div>", res.content.decode("utf-8"), re.S) # print("\t".join(content_list)) for content in content_list: image_list = re.findall("<img src=\"(.*?)\"", content) # print("\t".join(image_list)) for image_url in image_list: crawl_image("http:"+image_url, "./images/" + image_url.strip().split('/')[-1]) if __name__ == '__main__': crwal(1)
分类: Python
0 条评论