第二十讲：爬取图片

# -*- coding: utf-8 -*-
# 爬虫爬取图片
import requests
import re
def crawl_image(image_url, image_local_path):
    r = requests.get(image_url, stream=True)    # 打开流
    with open(image_local_path, "wb") as f:    # 打开路径  wb是 write binary
        f.write(r.content)                      # with 类似于try catch  帮助规避错误
def crwal(page):
    url = "http://www.qiushibaike.com/imgrank/page/" + str(page)
    res = requests.get(url)
    # text 返回的是Unicode的数据，而content返回的是二进制数据
    # decode 以默认编码格式解码字符串，默认编码为字符串编码
    content_list = re.findall("<div class=\"thumb\">(.*?)</div>", res.content.decode("utf-8"), re.S)
    # print("\t".join(content_list))
    for content in content_list:
        image_list = re.findall("<img src=\"(.*?)\"", content)
        # print("\t".join(image_list))
        for image_url in image_list:
            crawl_image("http:"+image_url, "./images/" + image_url.strip().split('/')[-1])
if __name__ == '__main__':
    crwal(1)

第二十讲：爬取图片

于2018-07-17由admin发布

0 条评论

发表回复取消回复

Python

【Python】python内置函数与装饰器汇总

Python

【python】协程Coroutines

Python

【python】一些好玩又有用的库合集

第二十讲：爬取图片

于2018-07-17由admin发布

0 条评论

发表回复 取消回复

相关文章

Python

【Python】python内置函数与装饰器汇总

Python

【python】协程Coroutines

Python

【python】一些好玩又有用的库合集

发表回复取消回复