# -*- coding:utf-8 -*-
# 正则表达式01
import re # 正则表达式包
m = re.findall("abc", "aaaaabcccabcc")
print(m)
m = re.findall("\d", "123abc1234abc")
# print m
m = re.findall("\d\d\d\d", "123abc1234abc")
# print m
m = re.findall(r"<div>(.*)</div>", "<div>hello</div>") # r的意思是原始的 没有经过转义
# print m
m = re.findall(r"<div>(.*)</div>", "<div>hello</div><div>world</div>") # 贪婪匹配
# print m
m = re.findall(r"<div>(.*?)</div>", "<div>hello</div><div>world</div>") # 非贪婪模式(加个“?”)
# print m
# import urllib2 # 爬虫的包
#
# res = urllib2.urlopen('http://blog.leanote.com/qq-alan')
# ret = res.read()
# # m = re.findall(r'title="全文">(.*?)</a>', ret)
# # m = re.findall(r'title="全文">(.*)</a>', ret,re.S)
# m = re.findall(r'<div class="title">.*?title="全文">(.*?)</a>', ret, re.S) # re.S 多行都可以算作一个字符串(不区别\n
# for t in m:
# # print t.strip()
分类: Python
0 条评论