# -*- coding: utf-8 -*- # beautifulsoup from bs4 import BeautifulSoup import html import time import requests # def crowlJokeListUseBs4(page=1): # url = "http://www.qiushibaike.com/8hr/page/" + str(page) # res = requests.get(url) # soup = BeautifulSoup(res.text, "html5lib") # jokeList = soup.find_all("div", class_="article block untagged mb15") # for child in jokeList: # print(child.find("h2").string + "\t" + "".join(child.find("div",class_="content").stripped_strings)) # time.sleep(1) def crawl_joke_list_use_bs4(page=1): req = requests.get("http://www.qiushibaike.com/shr").text soup = BeautifulSoup(req, "html5lib") for i in soup.find_all('div', 'main-text'): # print(str(i.span).replace("<br/>", "")[6:-7]+"\n") print(i) time.sleep(1) if __name__ == '__main__': # for i in range(1, 1): crawl_joke_list_use_bs4(1)
分类: Python
0 条评论