# -*- coding:utf-8 -*-
# 正则表达式01
import re  # 正则表达式包
m = re.findall("abc", "aaaaabcccabcc")
print(m)
m = re.findall("\d", "123abc1234abc")
# print m
m = re.findall("\d\d\d\d", "123abc1234abc")
# print m
m = re.findall(r"<div>(.*)</div>", "<div>hello</div>")  # r的意思是原始的 没有经过转义
# print m
m = re.findall(r"<div>(.*)</div>", "<div>hello</div><div>world</div>")  # 贪婪匹配
# print m
m = re.findall(r"<div>(.*?)</div>", "<div>hello</div><div>world</div>")  # 非贪婪模式(加个“?”)
# print m
# import urllib2  # 爬虫的包
#
# res = urllib2.urlopen('http://blog.leanote.com/qq-alan')
# ret = res.read()
# # m = re.findall(r'title="全文">(.*?)</a>', ret)
# # m = re.findall(r'title="全文">(.*)</a>', ret,re.S)
# m = re.findall(r'<div class="title">.*?title="全文">(.*?)</a>', ret, re.S)  # re.S 多行都可以算作一个字符串(不区别\n
# for t in m:
# # print t.strip()
分类: Python

0 条评论

发表回复

Avatar placeholder

您的电子邮箱地址不会被公开。 必填项已用 * 标注