第二十一讲：爬虫实战

# -*- coding: utf-8 -*-
# beautifulsoup
from bs4 import BeautifulSoup
import html
import time
import requests
# def crowlJokeListUseBs4(page=1):
# url = "http://www.qiushibaike.com/8hr/page/" + str(page)
# res = requests.get(url)
# soup = BeautifulSoup(res.text, "html5lib")
# jokeList = soup.find_all("div", class_="article block untagged mb15")
# for child in jokeList:
# print(child.find("h2").string + "\t" + "".join(child.find("div",class_="content").stripped_strings))
# time.sleep(1)
def crawl_joke_list_use_bs4(page=1):
req = requests.get("http://www.qiushibaike.com/shr").text
soup = BeautifulSoup(req, "html5lib")
for i in soup.find_all('div', 'main-text'):
# print(str(i.span).replace("<br/>", "")[6:-7]+"\n")
print(i)
time.sleep(1)
if __name__ == '__main__':
# for i in range(1, 1):
crawl_joke_list_use_bs4(1)

第二十一讲：爬虫实战

于2018-07-17由admin发布

0 条评论

发表回复取消回复

Python

【Python】python内置函数与装饰器汇总

Python

【python】协程Coroutines

Python

【python】一些好玩又有用的库合集

第二十一讲：爬虫实战

于2018-07-17由admin发布

0 条评论

发表回复 取消回复

相关文章

Python

【Python】python内置函数与装饰器汇总

Python

【python】协程Coroutines

Python

【python】一些好玩又有用的库合集

发表回复取消回复