# -*- coding: utf-8 -*-
# beautifulsoup
from bs4 import BeautifulSoup
import html
import time
import requests
# def crowlJokeListUseBs4(page=1):
# url = "http://www.qiushibaike.com/8hr/page/" + str(page)
# res = requests.get(url)
# soup = BeautifulSoup(res.text, "html5lib")
# jokeList = soup.find_all("div", class_="article block untagged mb15")
# for child in jokeList:
# print(child.find("h2").string + "\t" + "".join(child.find("div",class_="content").stripped_strings))
# time.sleep(1)
def crawl_joke_list_use_bs4(page=1):
req = requests.get("http://www.qiushibaike.com/shr").text
soup = BeautifulSoup(req, "html5lib")
for i in soup.find_all('div', 'main-text'):
# print(str(i.span).replace("<br/>", "")[6:-7]+"\n")
print(i)
time.sleep(1)
if __name__ == '__main__':
# for i in range(1, 1):
crawl_joke_list_use_bs4(1)
分类: Python

0 条评论

发表回复

Avatar placeholder

您的电子邮箱地址不会被公开。 必填项已用*标注