# -*- coding:utf-8 -*- # 正则表达式01 import re # 正则表达式包 m = re.findall("abc", "aaaaabcccabcc") print(m) m = re.findall("\d", "123abc1234abc") # print m m = re.findall("\d\d\d\d", "123abc1234abc") # print m m = re.findall(r"<div>(.*)</div>", "<div>hello</div>") # r的意思是原始的 没有经过转义 # print m m = re.findall(r"<div>(.*)</div>", "<div>hello</div><div>world</div>") # 贪婪匹配 # print m m = re.findall(r"<div>(.*?)</div>", "<div>hello</div><div>world</div>") # 非贪婪模式(加个“?”) # print m # import urllib2 # 爬虫的包 # # res = urllib2.urlopen('http://blog.leanote.com/qq-alan') # ret = res.read() # # m = re.findall(r'title="全文">(.*?)</a>', ret) # # m = re.findall(r'title="全文">(.*)</a>', ret,re.S) # m = re.findall(r'<div class="title">.*?title="全文">(.*?)</a>', ret, re.S) # re.S 多行都可以算作一个字符串(不区别\n # for t in m: # # print t.strip()
分类: Python
0 条评论