{"id":934,"date":"2018-07-21T22:21:17","date_gmt":"2018-07-21T14:21:17","guid":{"rendered":"http:\/\/www.sniper97.cn\/?p=934"},"modified":"2018-07-21T22:21:17","modified_gmt":"2018-07-21T14:21:17","slug":"%e7%ac%ac%e4%b8%89%e8%8a%82%ef%bc%9a%e5%9f%ba%e6%9c%ac%e5%ba%93%e4%bd%bf%e7%94%a8","status":"publish","type":"post","link":"http:\/\/www.sniper97.cn\/index.php\/note\/carwler\/934\/","title":{"rendered":"\u7b2c\u4e09\u8282\uff1a\u57fa\u672c\u5e93\u4f7f\u7528"},"content":{"rendered":"<p>\u672c\u9875\u6e90\u7801\u53ef\u4ee5\u5728<a href=\"https:\/\/github.com\/Sniper970119\/Spider\/tree\/master\/20180721\" target=\"_blank\" rel=\"noopener\">\u8fd9\u91cc<\/a>\u67e5\u770b\u6216\u4e0b\u8f7d\u3002<\/p>\n<h3>1.urllib\u5e93<\/h3>\n<h4>1.\u53d1\u9001\u8bf7\u6c42<\/h4>\n<p>\uff081\uff09request\uff1a\u53ef\u4ee5\u65b9\u4fbf\u5730\u5b9e\u73b0\u8bf7\u6c42\u7684\u53d1\u9001\u5e76\u5f97\u5230\u54cd\u5e94\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlopen test\nimport urllib.request\nresponce = urllib.request.urlopen('https:\/\/www.python.org')\nprint(responce.read().decode('utf-8'))\n<\/pre>\n<p>\u6211\u4eec\u53ef\u4ee5\u770b\u89c1\u5728\u63a7\u5236\u53f0\u4e0a\u8f93\u51fa\u4e86\u7f51\u9875\u7684\u6e90\u7801\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/1.png\" alt=\"\" class=\"alignnone size-full wp-image-935\" width=\"1226\" height=\"609\" \/><br \/>\n\u6211\u4eec\u5728\u4e0a\u9762\u4ee3\u7801\u540e\u52a0\u4e0a\u4e00\u53e5<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">print(responce.status)\nprint(responce.getheaders())\nprint(responce.getheader('Server'))\n<\/pre>\n<p>\u8f93\u51fa<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/2-1.png\" alt=\"\" class=\"alignnone size-full wp-image-937\" width=\"1858\" height=\"113\" \/><br \/>\n\u53ef\u4ee5\u770b\u5230responce\u4e2d\u7684\u72b6\u6001\u7801\u3001\u62a5\u5934\u3001\u62a5\u5934\u4e2d\u7684\u67d0\u4e00\u9879\u3002<br \/>\n\u6211\u4eec\u8fd8\u53ef\u4ee5\u4f20\u9012\u4e00\u4e9b\u53c2\u6570<br \/>\nurllib.request.urlopen(url,data=None,[timeout, ]*,cafile=None,capath=None,cadefault=Flase,context=None)<br \/>\n\u5176\u4e2d\uff1a<br \/>\ndata\uff1a<br \/>\n\u53ef\u9009\u53c2\u6570\uff0c\u4ee5POST\u65b9\u5f0f\u63d0\u4ea4\u8868\u5355\u3002<br \/>\n\u6dfb\u52a0\u4ee3\u7801\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">import urllib.parse\ndata = bytes(urllib.parse.urlencode({'word': 'hello'}),encoding= 'utf-8')\nresponce = urllib.request.urlopen(\"http:\/\/httpbin.org\/post\",data=data)\nprint(responce.read())<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">b'{\n\"args\":{},\n\"data\":\"\",\n\"files\":{},\n\"form\":{\n\"word\":\"hello\"\n},\n\"headers\":{\"Accept-Encoding\":\"identity\",\n\"Connection\":\"close\",\n\"Content-Length\":\"10\",\n\"Content-Type\":\"application\/x-www-form-urlencoded\",\n\"Host\":\"httpbin.org\",\n\"User-Agent\":\"Python-urllib\/3.6\"\n},\n\"json\":null,\n\"origin\":\"124.93.197.137\",\n\"url\":\"http:\/\/httpbin.org\/post\"\n}\\n'<\/pre>\n<p>\u8fd9\u91cc\u6211\u4eec\u4f20\u9012\u4e86\u4e00\u4e2a\u53c2\u6570word\uff0c\u503c\u662fhello\uff0c\u5b83\u88ab\u8f6c\u7801\u6210bytes\uff08\u5b57\u8282\u6d41\uff09\u7c7b\u578b\u3002<br \/>\n\u8fd9\u91cc\u7684\u8bf7\u6c42\u7ad9\u70b9\u662fhttpbin.org\uff0c\u63d0\u4f9bHTTP\u8bf7\u6c42\u6d4b\u8bd5\uff0c\u6211\u4eec\u770b\u5230\u8868\u5355\u4e2d\u5df2\u7ecf\u6709 &#8220;word&#8221;:&#8221;hello&#8221;\u3002<br \/>\n&nbsp;<br \/>\ntimeout\uff1a<br \/>\n\u7528\u4e8e\u8bbe\u7f6e\u8d85\u65f6\u65f6\u95f4\uff0c\u5355\u4f4d\u4e3a\u79d2\uff0c\u5230\u65f6\u6ca1\u5f97\u5230\u54cd\u5e94\u5219\u629b\u51fa\u5f02\u5e38\uff0c\u5982\u679c\u4e0d\u6307\u5b9a\u9ed8\u8ba4\u5168\u5c40\u53c2\u6570\u3002<br \/>\n\u6dfb\u52a0\u4ee3\u7801\uff1a<br \/>\n\u6211\u778e\u6253\u7684\u7f51\u7ad9\uff0c\u8fd9\u662f\u4e2a\u6839\u672c\u4e0d\u5b58\u5728\u7684\u7f51\u7ad9\uff0c\u6240\u4ee5\u80af\u5b9a\u4f1a\u8d85\u65f6\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlopen-timeout test\nresponce = urllib.request.urlopen('http:\/\/www.google.com', timeout=1)\nprint(responce.read())<\/pre>\n<p>\u4e8e\u662f\u6211\u4eec\u610f\u6599\u4e4b\u4e2d\u7684\u5f97\u5230\u4e86\u62a5\u9519\u3002<br \/>\nurllib.error.URLError: &lt;urlopen error timed out&gt;<br \/>\ncontext\uff1a\u5fc5\u987b\u662fssl.SSLContext\u7c7b\u578b\uff0c\u7528\u6765\u6307\u5b9aSSL\u8bbe\u7f6e\u3002<br \/>\ncafile\u548ccapath\uff1a\u6307\u5b9aCA\u8bc1\u4e66\u548c\u5b83\u7684\u8def\u5f84\u3002<br \/>\ncadefault\uff1a\u5df2\u7ecf\u5f03\u7528\uff0c\u9ed8\u8ba4\u4e3aFlase\u3002<br \/>\n&nbsp;<br \/>\n\uff082\uff09request<br \/>\nrequest\u5bf9\u8c61\u53ef\u4ee5\u66f4\u52a0\u7075\u6d3b\u7684\u914d\u7f6e\u53c2\u6570<br \/>\nclass urllib.request.Request(rtl, data=None, header={}, origin_req_host=None, unverifiable=Flase, method=None)<br \/>\n\u5176\u4e2d\uff1a<br \/>\nurl\uff1a\u8bf7\u6c42\u7684URL\uff0c\u5fc5\u9009\u53c2\u6570<br \/>\ndata:\u53ef\u9009\u53c2\u6570\uff0c\u4e00\u5b9a\u662fbytes\u7c7b\u578b\uff0c\u5982\u679c\u662f\u5b57\u5178\uff0c\u9700\u8981\u5148\u7528urllib.parase\u6a21\u5757\u91cc\u7684urlencode\uff08\uff09\u7f16\u7801\u3002<br \/>\nheaders\uff1a\u8bf7\u6c42\u5934\uff0c\u53ef\u4ee5\u5728\u6784\u9020\u8bf7\u6c42\u65f6\u76f4\u63a5\u6784\u9020\uff0c\u4e5f\u53ef\u4ee5\u8c03\u7528add_header()\u65b9\u6cd5\u6dfb\u52a0\uff0c\u901a\u5e38\u7528\u6765\u4f2a\u88c5\u6210\u6d4f\u89c8\u5668\u3002<br \/>\norigin_req_host\uff1a\u8bf7\u6c42\u65b9\u7684host\u540d\u79f0\u6216\u8005ip\u5730\u5740\u3002<br \/>\nunverifiable\uff1a\u8868\u793a\u8bf7\u6c42\u662f\u5426\u662f\u65e0\u6cd5\u9a8c\u8bc1\u7684\uff0c\u9ed8\u8ba4\u662fFlase\uff0c\u610f\u601d\u662f\u7528\u6237\u6ca1\u6709\u8db3\u591f\u7684\u6743\u9650\u6765\u9009\u62e9\u63a5\u6536\u8fd9\u4e2a\u8bf7\u6c42\u7684\u7ed3\u679c\u3002<br \/>\nmethod\uff1a\u8bf7\u6c42\u7684\u65b9\u6cd5\u3002POST\u3001GET\u3001PUT\u7b49\u3002<br \/>\n\u6dfb\u52a0\u4ee3\u7801\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># request test with factor\nurl = 'http:\/\/httpbin.org\/post'\nheaders = {\n    \"User-Agent\": 'Mozilla\/4.0(compatible;MSIE 5.5;Windows NT)',\n    'Host': 'httpbin.org'\n}\ndict = {\n    'name': 'Sniper'\n}\ndata = bytes(urllib.parse.urlencode(dict), encoding='utf-8')\nreq = urllib.request.Request(url=url, data=data, headers=headers, method='POST')\nresponce = urllib.request.urlopen(req)\nprint(responce.read().decode('utf-8'))\n<\/pre>\n<p>\u53d1\u73b0\u8fd4\u56de\u7684\u6570\u636e\u5df2\u7ecf\u6839\u636e\u6211\u4eec\u53d1\u8fc7\u53bb\u7684\u62a5\u5934\u53d8\u5316\u4e86\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">{\n\"args\":{},\n\"data\":\"\",\n\"files\":{},\n\"form\":{\n\"name\":\"Sniper\"\n},\n\"headers\":{\n\"Accept-Encoding\":\"identity\",\n\"Connection\":\"close\",\n\"Content-Length\":\"11\",\n\"Content-Type\":\"application\/x-www-form-urlencoded\",\n\"Host\":\"httpbin.org\",\n\"User-Agent\":\"Mozilla\/4.0(compatible;MSIE 5.5;Windows NT)\"\n},\n\"json\":null,\n\"origin\":\"124.93.197.193\",\n\"url\":\"http:\/\/httpbin.org\/post\"\n}\n<\/pre>\n<p>&nbsp;<br \/>\n\u6211\u4eec\u8fd8\u53ef\u4ee5\u4f7f\u7528\u66f4\u9ad8\u7ea7\u7684\u5de5\u5177Handler\uff0c\u53ef\u4ee5\u7406\u89e3\u4e3a\u5404\u79cd\u5904\u7406\u5668\uff0c\u6709\u4e13\u95e8\u5904\u7406\u767b\u9646\u8bf7\u6c42\u7684\uff0c\u6709\u4e13\u95e8\u5904\u7406Cookies\u7684\uff0c\u6709\u4e13\u95e8\u5904\u7406\u4ee3\u7406\u7684\u7b49\u3002<br \/>\nBaseHandler\u7c7b\uff0c\u5b83\u662f\u6240\u6709\u5176\u4ed6Handler\u7684\u7236\u7c7b\uff0c\u5b83\u63d0\u4f9b\u4e86\u6700\u57fa\u672c\u7684\u65b9\u6cd5\uff0c\u4f8b\u5982default_open()\u3001protocol_request\u7b49\u3002<br \/>\n<strong>HTTPDefaultErrorHandler<\/strong>\uff1a\u7528\u4e8e\u5904\u7406HTTP\u54cd\u5e94\u9519\u8bef\uff0c\u9519\u8bef\u90fd\u4f1a\u629b\u51fa<code>HTTPError<\/code>\u7c7b\u578b\u7684\u5f02\u5e38\u3002<br \/>\n<strong>HTTPRedirectHandler<\/strong>\uff1a\u7528\u4e8e\u5904\u7406\u91cd\u5b9a\u5411\u3002<br \/>\n<strong>HTTPCookieProcessor<\/strong>\uff1a\u7528\u4e8e\u5904\u7406Cookies\u3002<br \/>\n<strong>ProxyHandler<\/strong>\uff1a\u7528\u4e8e\u8bbe\u7f6e\u4ee3\u7406\uff0c\u9ed8\u8ba4\u4ee3\u7406\u4e3a\u7a7a\u3002<br \/>\n<strong>HTTPPasswordMgr<\/strong>\uff1a\u7528\u4e8e\u7ba1\u7406\u5bc6\u7801\uff0c\u5b83\u7ef4\u62a4\u4e86\u7528\u6237\u540d\u548c\u5bc6\u7801\u7684\u8868\u3002<br \/>\n<strong>HTTPBasicAuthHandler<\/strong>\uff1a\u7528\u4e8e\u7ba1\u7406\u8ba4\u8bc1\uff0c\u5982\u679c\u4e00\u4e2a\u94fe\u63a5\u6253\u5f00\u65f6\u9700\u8981\u8ba4\u8bc1\uff0c\u90a3\u4e48\u53ef\u4ee5\u7528\u5b83\u6765\u89e3\u51b3\u8ba4\u8bc1\u95ee\u9898\u3002<br \/>\n\u9a8c\u8bc1\uff1a<br \/>\n\u6dfb\u52a0\u4ee3\u7801\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># Opener test\nfrom urllib.request import HTTPPasswordMgrWithPriorAuth, HTTPBasicAuthHandler, build_opener\nfrom urllib.error import URLError\nusername = \"admin\"\npassword = ''\nurl = 'http:\/\/192.168.0.1\/index.asp'\np = HTTPPasswordMgrWithPriorAuth()\np.add_password(None, url, username, password)\nauth_handler = HTTPBasicAuthHandler(p)\nopener = build_opener(auth_handler)\ntry:\n    result = opener.open(url)\n    html = result.read().decode('utf-8')\n    print(html)\nexcept URLError as e:\n    print(e.reason)\n<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff0c\u53ef\u4ee5\u770b\u5230\u4e86\u663e\u793a\u4e86\u901a\u8fc7\u9a8c\u8bc1\u7684\u6e90\u7801\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/QQ\u622a\u56fe20180722215221.png\" alt=\"\" class=\"alignnone size-full wp-image-949\" width=\"1090\" height=\"310\" \/><br \/>\n\u4ee3\u7406\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># Proxy test\nproxy_handler = ProxyHandler({\n    'http': 'http:\/\/127.0.0.1:9743',\n    'https': 'https:\/\/127.0.0.1:9743'\n})\nopener = build_opener(proxy_handler)\ntry:\n    # responce = opener.open('http:\/\/www.sniper97.cn')\n    responce = opener.open('https:\/\/python.org')\n    print(responce.read().decode('utf-8'))\nexcept URLError as e:\n    print(e.reason)<\/pre>\n<p>&nbsp;<br \/>\nCookies\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># Cookies test\ncookie = http.cookiejar.CookieJar()\nhandler = urllib.request.HTTPCookieProcessor(cookie)\nopener = build_opener(handler)\nresponce = opener.open('http:\/\/www.baidu.com')\nfor item in cookie:\n    print(item.name+\"=\"+item.value)<\/pre>\n<p><img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/2-2.png\" alt=\"\" class=\"alignnone size-full wp-image-950\" width=\"451\" height=\"161\" \/><br \/>\n\u6211\u4eec\u8fd8\u53ef\u4ee5\u5c06Cookies\u8f93\u51fa\u81f3\u6587\u4ef6\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># Cookies test : save in file\nfilename = 'cookies.txt'\ncookie = http.cookiejar.MozillaCookieJar(filename) # LWPCookieJar is always ok,they are two style\nhandler = urllib.request.HTTPCookieProcessor(cookie)\nopener = build_opener(handler)\nresponce = opener.open('https:\/\/www.baidu.com')\ncookie.save(ignore_discard=True, ignore_expires=True)<\/pre>\n<p>MozillaCookieJar\uff08\uff09\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/3-1.png\" alt=\"\" class=\"alignnone size-full wp-image-951\" width=\"813\" height=\"249\" \/><br \/>\nLWPCookieJar\uff08\uff09\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/4-1.png\" alt=\"\" class=\"alignnone size-full wp-image-952\" width=\"1046\" height=\"159\" \/><br \/>\n\u6211\u4eec\u4f7f\u7528\u8fd9\u4e2a\u6587\u4ef6\u91cc\u7684Cookies\u8bbf\u95ee\u767e\u5ea6\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># Cookies test : use Cookies\ncookie = http.cookiejar.LWPCookieJar()\ncookie.load('cookies.txt',ignore_discard=True,ignore_expires=True)\nhandler = urllib.request.HTTPCookieProcessor(cookie)\nopener = build_opener(handler)\nresponce = opener.open('http:\/\/www.baidu.com')\nprint(responce.read().decode('utf-8'))<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff0c\u767e\u5ea6\u6e90\u7801\u88ab\u8fd4\u56de\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/5-1.png\" alt=\"\" class=\"alignnone size-full wp-image-953\" width=\"706\" height=\"356\" \/><br \/>\n&nbsp;<\/p>\n<h4>2.\u5904\u7406\u5f02\u5e38<\/h4>\n<p>urllib\u7684error\u6a21\u5757\u5b9a\u4e49\u4e86\u7531request\u6a21\u5757\u4ea7\u751f\u7684\u5f02\u5e38\u3002<br \/>\n\uff081\uff09URLError<br \/>\n\u50cf\u8fd9\u6837\uff0c\u5982\u679cURL\u4e0d\u5b58\u5728\uff0c\u5c31\u4f1a\u629b\u51faNot Found\u5f02\u5e38\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># URLError test\ntry:\n    responce = urllib.request.urlopen('https:\/\/www.baidu.com')\nexcept URLError as e:\n    print(e.reason)\n<\/pre>\n<p>\uff082\uff09HTTPError<br \/>\n\u662fURLError\u7684\u5b50\u7c7b\uff0c\u4e13\u95e8\u7528\u6765\u5904\u7406HTTP\u8bf7\u6c42\u9519\u8bef\uff0c\u6bd4\u5982\u8ba4\u8bc1\u8bf7\u6c42\u7b49\u3002<br \/>\n\u50cf\u8fd9\u6837\uff0c\u5982\u679cURL\u4e0d\u5b58\u5728\uff0c\u5c31\u4f1a\u629b\u51faNot Found\u5f02\u5e38\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># HTTPError test\ntry:\n    responce = urllib.request.urlopen('http:\/\/www.baidu.com')\nexcept HTTPError as e:\n    print(e.reason, e.code, e.headers, sep='\\n')\n<\/pre>\n<p>&nbsp;<\/p>\n<h4>3.\u89e3\u6790\u94fe\u63a5<\/h4>\n<p>\uff081\uff09urlparse\uff08\uff09<br \/>\n\u8be5\u65b9\u6cd5\u53ef\u4ee5\u5b9e\u73b0URL\u7684\u8bc6\u522b\u548c\u5206\u6bb5\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlparse test\nresult = urlparse('http:\/\/www.baidu.com\/index.html;user?id=5#comment')\nprint(type(result), result)<\/pre>\n<p>\u7a0b\u5e8f\u8f93\u51fa\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">&lt;class 'urllib.parse.ParseResult'&gt;\nParseResult(scheme='http', netloc='www.baidu.com', path='\/index.html', params='user', query='id=5', fragment='comment')<\/pre>\n<p>\u5176\u4e2d\uff0c\u6700\u540e\u4e00\u4e2afragment\u662f\u951a\u70b9\uff0c\u7528\u4e8e\u76f4\u63a5\u786e\u5b9a\u754c\u9762\u7684\u4e0b\u62c9\u4f4d\u7f6e\u3002<br \/>\n\u5b9e\u9645\u4e0a\u8fd4\u56de\u7684result\u662f\u4e00\u4e2a\u5143\u7ec4\uff0c\u6211\u4eec\u53ef\u4ee5\u7528\u4e0b\u6807\u6216\u8005\u5c5e\u6027\u540d\u6765\u83b7\u53d6\u3002<br \/>\n\uff082\uff09urlunparse\uff08\uff09<br \/>\n\u8fd9\u76f8\u5f53\u4e8eurlparse\u7684\u9006\u5411\uff0c\u4f20\u5165\u53c2\u6570\u7684\u957f\u5ea6\u5fc5\u987b\u662f6<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlunparse test\ndata = ['http', 'www.baidu.com', 'index.html', 'user', 'a=6', 'comment']\nprint(urlunparse(data))<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/6-1.png\" alt=\"\" class=\"alignnone size-full wp-image-954\" width=\"486\" height=\"82\" \/><br \/>\n\uff083\uff09urlsplit\uff08\uff09<br \/>\n\u548curlparse\uff08\uff09\u7c7b\u4f3c\uff0c\u53ea\u4e0d\u8fc7\u4e0d\u518d\u5355\u72ec\u89e3\u6790params\u8fd9\u4e00\u9879\uff0c\u53ea\u8fd4\u56de5\u4e2a\u7ed3\u679c\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlsplit test\nresult = urlsplit('http:\/\/www.baidu.com\/index.html;user?id=5#comment')\nprint(type(result), result, sep='\\n')<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/7-1.png\" alt=\"\" class=\"alignnone size-full wp-image-955\" width=\"908\" height=\"75\" \/><br \/>\n\uff084\uff09urlunsplit\uff08\uff09<br \/>\n\u548curlunparse\uff08\uff09\u7c7b\u4f3c\uff0c\u540c\u6837\u53ea\u67095\u4e2a\u53c2\u6570\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlunsplit test\ndata = ['http', 'www.baidu.com', 'index.html', 'a=6', 'comment']\nprint(urlunsplit(data))<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/8-1.png\" alt=\"\" class=\"alignnone size-full wp-image-956\" width=\"385\" height=\"77\" \/><br \/>\n\uff085\uff09urljoin\uff08\uff09<br \/>\n\u4e00\u76f4\u751f\u6210\u94fe\u63a5\u7684\u65b9\u6cd5\uff0c\u4e00\u4e2a\u57fa\u7840\u94fe\u63a5\u4f5c\u4e3a\u7b2c\u4e00\u4e2a\u53c2\u6570\uff0c\u65b0\u7684\u94fe\u63a5\u4f5c\u4e3a\u7b2c\u4e8c\u4e2a\u53c2\u6570\uff0c\u8fd4\u56de\u4ee5\u65b0\u94fe\u63a5\u4e3a\u51c6\uff0c\u4f46\u662f\u5982\u679c\u65b0\u94fe\u63a5\u7684\u6210\u5206\u4e0d\u5b8c\u6574\uff0c\u5219\u548c\u57fa\u7840\u94fe\u63a5\u5408\u6210\u4e00\u4e2a\u5b8c\u6574\u94fe\u63a5\u8fd4\u56de\u3002<br \/>\n\uff086\uff09urlencode\uff08\uff09<br \/>\n\u81ea\u52a8\u751f\u6210GET\u8bf7\u6c42URL\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># urlencode test\nparams = {\n    'name': 'Sniper',\n    'age': '21'\n}\nbase_url = 'http:\/\/www.baidu.com?'\nurl = base_url+urlencode(params)\nprint(url)\n<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/9-1.png\" alt=\"\" class=\"alignnone size-full wp-image-957\" width=\"371\" height=\"98\" \/><br \/>\n\uff087\uff09parse_qs( )<br \/>\nurlencode( \uff09\u7684\u9006\u8fc7\u7a0b\uff0c\u8fd4\u56de\u503c\u4e3a\u5b57\u5178\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># parse_qs test\nquery = '?name=Sniper&amp;age=21'\nprint(parse_qs(query))<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/10-2.png\" alt=\"\" class=\"alignnone size-full wp-image-958\" width=\"334\" height=\"84\" \/><br \/>\n\uff088\uff09parse_qsl\uff08\uff09<br \/>\nurlencode( \uff09\u7684\u9006\u8fc7\u7a0b\uff0c\u8fd4\u56de\u503c\u4e3a\u5143\u7ec4\u7ec4\u6210\u7684\u5217\u8868\u3002<br \/>\n\uff089\uff09quote\uff08\uff09<br \/>\n\u5c06\u5185\u5bb9\u8f6c\u4e3aURL\u7f16\u7801\u683c\u5f0f\u3002\uff08\u7528\u5728URL\u4e2d\u6709\u4e2d\u6587\u65f6\uff09<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># quote test\nkeyword = '\u4e00\u4e2a\u4e0d\u4f1a\u73a9\u72d9\u7684\u5929\u624d\u72d9\u51fb\u624b'\nurl = 'https:\/\/www.baidu.com\/s?wd='+quote(keyword)\nprint(url)\n<\/pre>\n<p>\u7a0b\u5e8f\u8f93\u51fa\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/11-2.png\" alt=\"\" class=\"alignnone size-full wp-image-959\" width=\"1103\" height=\"100\" \/><br \/>\n\u6211\u4eec\u70b9\u51fb\u8fd9\u4e2a\u94fe\u63a5\uff1a<a href=\"https:\/\/www.baidu.com\/s?wd=%E4%B8%80%E4%B8%AA%E4%B8%8D%E4%BC%9A%E7%8E%A9%E7%8B%99%E7%9A%84%E5%A4%A9%E6%89%8D%E7%8B%99%E5%87%BB%E6%89%8B\" target=\"_blank\" rel=\"noopener\">https:\/\/www.baidu.com\/s?wd=%E4%B8%80%E4%B8%AA%E4%B8%8D%E4%BC%9A%E7%8E%A9%E7%8B%99%E7%9A%84%E5%A4%A9%E6%89%8D%E7%8B%99%E5%87%BB%E6%89%8B<\/a><br \/>\n\u4f1a\u53d1\u73b0\u767e\u5ea6\u641c\u7d22\u4e86\u6211\u4eec\u7684keyword\u3002<br \/>\n\uff0810\uff09unquote\uff08\uff09<br \/>\nquote\uff08\uff09\u7684\u9006\u8fc7\u7a0b\u3002<br \/>\n4.Robots\u534f\u8bae<br \/>\n\u5229\u7528urllib\u4e2d\u7684Robotparser\u6a21\u5757\uff0c\u6211\u4eec\u53ef\u4ee5\u5b9e\u73b0\u7f51\u7ad9\u7684Robots\u534f\u8bae\u7684\u5206\u6790\u3002<br \/>\n\uff081\uff09robots\u534f\u8bae<br \/>\n\u662f\u7528\u6765\u89c4\u5b9a\u54ea\u4e9b\u9875\u9762\u53ef\u4ee5\u6293\u53d6\uff0c\u54ea\u4e9b\u4e0d\u53ef\u4ee5\uff0c\u901a\u5e38\u5b58\u5728\u4e00\u4e2a\u53eb\u505arobots.txt\u7684\u6587\u4ef6\u4e2d\uff0c\u5b58\u653e\u5728\u670d\u52a1\u5668\u7aef\u3002<br \/>\nrobots\u6709\u4ee5\u4e0b\u51e0\u4e2a\u5b57\u6bb5\uff1a<br \/>\nUser-agent:\u00a0 # \u7528\u6765\u6307\u5b9a\u5bf9\u54ea\u4e9b\u722c\u866b\u6709\u6548\uff0c\u6bd4\u5982Baiduspider\uff08\u767e\u5ea6\uff09\u3001*\uff08\u6240\u6709\uff09\u3002<br \/>\nDisallow\uff1a # \u4e0d\u5141\u8bb8\u54ea\u4e9b\u9875\u9762\u88ab\u6293\u53d6\u3002<br \/>\nAllow\uff1a # \u5141\u8bb8\u54ea\u4e9b\u9875\u9762\u88ab\u6293\u53d6\u3002<br \/>\n\uff082\uff09\u722c\u866b\u540d\u79f0<br \/>\n\u56fa\u5b9a\u7684\u540d\u5b57\uff0c\u6bd4\u5982\uff1a<br \/>\nBaiduSpider\u00a0\u00a0\u00a0\u00a0\u00a0 \u767e\u5ea6<br \/>\nGooglebot\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 \u8c37\u6b4c<br \/>\n360Spider\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 360\u641c\u7d22<br \/>\nYodaoBot\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 \u6709\u9053<br \/>\nis_archiver\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 Alexa<br \/>\nScooter\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 altavista<br \/>\n\uff083\uff09robotparser\uff08\uff09<br \/>\n\u53ef\u4ee5\u7528\u8fd9\u4e2a\u65b9\u6cd5\u6765\u89e3\u6790robots.txt<br \/>\nset_url\uff08\uff09\uff1a\u7528\u6765\u8bbe\u7f6erobots.txt\u7684\u94fe\u63a5\uff0c\u5982\u679c\u5728\u521b\u5efaRobotFileParser\uff08\uff09\u7684\u65f6\u5019\u4f20\u5165\u4e86URL\u90a3\u4e48\u5c31\u4e0d\u9700\u8981\u8c03\u7528\u8be5\u65b9\u6cd5\u3002<br \/>\nread\uff08\uff09\uff1a\u8bfb\u53d6robots.txt\u6587\u4ef6\uff0c\u8fd9\u4e2a\u65b9\u6cd5\u6267\u884c\u4e00\u4e2a\u8bfb\u53d6\u548c\u5206\u6790\u7684\u64cd\u4f5c\uff0c\u867d\u7136\u6ca1\u6709\u8fd4\u56de\u7ed3\u679c\u4f46\u662f\u5fc5\u987b\u8c03\u7528\u3002<br \/>\nparse\uff08\uff09\uff1a\u89e3\u6790robots.txt\u3002<br \/>\ncan_fetch\uff08\uff09\uff1a\u4f20\u53c2\u6709\u4e24\u4e2a\uff0c\u7b2c\u4e00\u4e2a\u662fUser-agent\uff0c\u7b2c\u4e8c\u4e2a\u662fURL\uff0c\u8fd4\u56de\u7684\u662f\u8be5\u641c\u7d22\u5f15\u64ce\u662f\u5426\u53ef\u4ee5\u6293\u53d6\u8fd9\u4e2aURL\uff0c\u503c\u4e3aTrue\u6216Flase\u3002<br \/>\nmtime\uff08\uff09\uff1a\u8fd4\u56de\u4e0a\u6b21\u6293\u53d6\u548c\u5206\u6790robots.txt\u7684\u65f6\u95f4\u3002<br \/>\nmodified\uff08\uff09\uff1a\u5c06\u5f53\u524d\u65f6\u95f4\u8bbe\u7f6e\u4e3a\u4e0a\u6b21\u6293\u53d6\u548c\u5206\u6790robots.txt\u7684\u65f6\u95f4\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># robotparser test\nrp = RobotFileParser()\nrp.set_url('http:\/\/www.jianshu.com\/robots.txt')\nrp.read()\nprint(rp.can_fetch('*', 'http:\/\/www.jianshu.com\/p\/b67554025d7d'))\nprint(rp.can_fetch('*', 'http:\/\/www.jianshu.com\/search?q=python&amp;page=1&amp;type=collections'))<\/pre>\n<p>&nbsp;<\/p>\n<h3>2.requests<\/h3>\n<h4>1\u57fa\u672c\u7528\u6cd5<\/h4>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test\nr = requests.get('http:\/\/www.baidu.com')\nprint(type(r))\nprint(r.status_code)\nprint(type(r.text))\nprint(r.text)\nprint(r.cookies)<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/12-2.png\" alt=\"\" class=\"alignnone size-full wp-image-963\" width=\"720\" height=\"187\" \/><br \/>\n\u6211\u4eec\u53ef\u4ee5\u770b\u5230\uff0c\u5b83\u7684\u54cd\u5e94\u7c7b\u578b\u662frequests.models.Response\uff0c\u54cd\u5e94\u4f53\u7684\u7c7b\u578b\u662fstr\uff0cCookies\u7684\u7c7b\u578b\u662fRequestsCookieJar\u3002<br \/>\n&nbsp;<br \/>\nGET\u8bf7\u6c42\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; get\ndata = {\n'name': 'Spider',\n'age': '21'\n}\nr = requests.get('http:\/\/httpbin.org\/get',data)\nprint(r.text)<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">{\n\"args\":{\n\"age\":\"21\",\n\"name\":\"Spider\"\n},\n\"headers\":{\n\"Accept\":\"*\/*\",\n\"Accept-Encoding\":\"gzip, deflate\",\n\"Connection\":\"close\",\n\"Host\":\"httpbin.org\",\n\"User-Agent\":\"python-requests\/2.19.1\"\n},\n\"origin\":\"124.93.197.137\",\n\"url\":\"http:\/\/httpbin.org\/get?name=Spider&amp;age=21\"\n}\n<\/pre>\n<p>\u6211\u4eec\u53ef\u4ee5\u770b\u5230url\u90e8\u5206\uff0c\u662f\u4f7f\u7528\u7684get\u65b9\u6cd5\u8bbf\u95ee\u7684\u3002<br \/>\n\u6211\u4eec\u4f7f\u7528URL+\u6b63\u5219\u722c\u53d6\u77e5\u4e4e\u3002<br \/>\n\u8fd9\u91cc\u5fc5\u987b\u52a0\u4e0aheaders\uff0c\u5426\u5219\u4f1a\u88ab\u77e5\u4e4e\u62d2\u7edd\u8bbf\u95ee\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; zhihu\nheaders = {\n    'User-agent': 'Mozilla\/5.0(Macintosh;Intel Mac OS X 10_11_4)AppleWebKit\/537.36(KHTML, like Gecko)Chrome\/52.0.2743.116 Safari\/537.36'\n}\nr = requests.get('https:\/\/www.zhihu.com\/explore',headers=headers)\npattern = re.compile('explore-feed.*?question_link.*?&gt;(.*?)&lt;\/a&gt;',re.S)\ntitles = re.findall(pattern, r.text)\nprint(titles)<\/pre>\n<p>\u5176\u4e2d\u6b63\u5219\u8868\u8fbe\u5f0f\u6839\u636e\u5206\u6790\u9875\u9762\u6e90\u7801\u5f97\u51fa<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/15-2.png\" alt=\"\" class=\"alignnone size-full wp-image-966\" width=\"1307\" height=\"487\" \/><br \/>\n\u6211\u4eec\u8fd0\u884c\u7ed3\u679c\u548c\u5f53\u524d\u77e5\u4e4e\u7684\u9875\u9762\uff1a<br \/>\n&nbsp;<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/13-2.png\" alt=\"\" class=\"alignnone size-full wp-image-964\" width=\"707\" height=\"947\" \/><br \/>\n\u6293\u53d6\u7ed3\u679c\u548c\u9875\u9762\u95ee\u9898\u4e00\u81f4\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/14-2.png\" alt=\"\" class=\"alignnone size-full wp-image-965\" width=\"1715\" height=\"109\" \/><br \/>\n&nbsp;<br \/>\n\u6293\u53d6\u4e8c\u8fdb\u5236\u6587\u4ef6<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; binary file\nr= requests.get('https:\/\/github.com\/favicon.ico')\nprint(r.text)\nprint(r.content)\nwith open('favicon.ico', 'wb')as f:\n    f.write(r.content)<\/pre>\n<p>\u6211\u4eec\u770b\u5230\u4e24\u4e2a\u8f93\u51fa\u8f93\u51fa\u7684\u662f\u4e71\u7801\u548c\u5341\u516d\u8fdb\u5236\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/16-2.png\" alt=\"\" class=\"alignnone size-full wp-image-967\" width=\"1033\" height=\"356\" \/><br \/>\n\u8fd9\u65f6\u5019\u6211\u4eec\u4ee5\u4e8c\u8fdb\u5236\u5199\u683c\u5f0f\u6253\u5f00\u4e00\u4e2a\u6587\u4ef6\uff0c\u5c06\u6570\u636e\u5199\u5165\uff0c\u6211\u4eec\u53d1\u73b0\u56fe\u7247\u722c\u53d6\u6210\u529f\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/17-2.png\" alt=\"\" class=\"alignnone size-full wp-image-968\" width=\"376\" height=\"287\" \/><br \/>\n\u6dfb\u52a0headers\uff1a<br \/>\n\u4e0a\u6817\u5bf9\u77e5\u4e4e\u7684\u722c\u53d6\u4e2d\u4f7f\u7528\u8fc7\u3002<br \/>\n&nbsp;<br \/>\nPOST\u8bf7\u6c42\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; post\ndata = {\n    'name': 'Spider',\n    'age': '21'\n}\nr= requests.post('http:\/\/httpbin.org\/post', data=data)\nprint(r.text)<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/18-2.png\" alt=\"\" class=\"alignnone size-full wp-image-969\" width=\"1792\" height=\"93\" \/><br \/>\n&nbsp;<br \/>\n\u54cd\u5e94\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; responce\nheaders = {\n    'User-agent': 'Mozilla\/5.0(Macintosh;Intel Mac OS X 10_11_4)AppleWebKit\/537.36(KHTML, like Gecko)Chrome\/52.0.2743.116 Safari\/537.36'\n}\nr = requests.get('http:\/\/www.jianshu.com',headers=headers)\nprint(type(r.status_code), r.status_code)\nprint(type(r.headers), r.headers)\nprint(type(r.cookies), r.cookies)\nprint(type(r.url), r.url)\nprint(type(r.history), r.history)<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/19-2.png\" alt=\"\" class=\"alignnone size-full wp-image-970\" width=\"1795\" height=\"214\" \/><br \/>\n&nbsp;<br \/>\n\u6587\u4ef6\u4e0a\u4f20\uff1a<br \/>\n\u4f7f\u7528POST\u4e0a\u4f20\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; post\nfiles = {\n    'file':open('favicon.ico', 'rb')\n}\nr = requests.post('http:\/\/httpbin.org\/post',files=files)\nprint(r.text)<\/pre>\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/20-2.png\" alt=\"\" class=\"alignnone size-full wp-image-971\" width=\"924\" height=\"933\" \/><br \/>\n&nbsp;<br \/>\nCookies\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; cookies\nr= requests.get('http:\/\/www.baidu.com')\nprint(r.cookies)\nfor key, value in r.cookies.items():\n    print(key+'='+value)\n<\/pre>\n<p>\u7528Cookies\u7ef4\u6301\u767b\u5f55\uff1a<br \/>\n\u6211\u4eec\u5148\u628a\u77e5\u4e4e\u8bf7\u6c42\u7684cookies\u590d\u5236\u4e0b\u6765\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/23-1.png\" alt=\"\" class=\"alignnone size-full wp-image-974\" width=\"666\" height=\"374\" \/><br \/>\n\u7f16\u5199\u4ee3\u7801\u722c\u53d6\u4e2a\u4eba\u4e3b\u9875\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; cookies -&gt; zhihu\nheaders = {\n    'Cookies': '_xsrf=Buel2ZkWUtC7SgTSB8EJPCF4Br5Rcimr;'\n               ' _zap=1f70cf3c-9b1a-4513-a8f3-d6abb4270880;'\n               ' d_c0=\"AMCkg9ZJ7w2PTnLDQAncDVfCUc59_P4qggs=|1532176674\";'\n               ' q_c1=6e44fd4dd5064217a670c1c5aaf31711|1532176674000|1532176674000;'\n               ' capsion_ticket=\"2|1:0|10:1532176678|14:capsion_ticket|44:NWQ1YWNkNzU5ZjhiNGJmYmI0NTkyNmJiYzg4OWM5YzM=|67f43de453cf2ec1315e2bc98b734af6bef647447e40c73341943d6a8a2b4724\";'\n               ' z_c0=\"2|1:0|10:1532176688|4:z_c0|92:Mi4xemN3Z0FnQUFBQUFBd0tTRDFrbnZEU1lBQUFCZ0FsVk5NSGRBWEFEVnFndHJQYUZ1OWNXdHJidTJ4eldhX0NkRUFn|8155841af129425202e2ef982f977517d7940a6257866e3df4221696ff4710ef\"',\n    'Host': 'www.zhihu.com',\n    'User-Agent': 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/67.0.3396.99 Safari\/537.36'\n}\nr= requests.get('https:\/\/www.zhihu.com\/people\/zhao-yu-12-34\/activities', headers=headers)\nprint(r.text)<\/pre>\n<p>\u6211\u4eec\u5148\u770b\u4e3b\u9875\u90e8\u5206\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/21-3.png\" alt=\"\" class=\"alignnone size-full wp-image-975\" width=\"951\" height=\"326\" \/><br \/>\n\u722c\u53d6\u7684\u4ee3\u7801\u4e2d\u53ef\u4ee5\u770b\u5230\u5bf9\u5e94\u7684\u6587\u5b57\uff0c\u8bf4\u660e\u722c\u53d6\u7684\u65f6\u5019\u662f\u7ef4\u6301\u767b\u5f55\u72b6\u6001\u7684\u3002<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/22-1.png\" alt=\"\" class=\"alignnone size-full wp-image-973\" width=\"1449\" height=\"217\" \/><br \/>\n&nbsp;<br \/>\n\u4f1a\u8bdd\u7ef4\u6301\uff1a<br \/>\n\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528Cookies\u8fdb\u884c\u4f1a\u8bdd\u7ef4\u6301\uff0c\u4f46\u662f\u90a3\u592a\u9ebb\u70e6\u4e86\uff0c\u6211\u4eec\u8fd8\u53ef\u4ee5\u4f7f\u7528Session\u8fdb\u884c\u4f1a\u8bdd\u7ef4\u6301\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; session\nrequests.get('http:\/\/httpbin.org\/cookies\/set\/number\/123456789')\nr= requests.get('http:\/\/httpbin.org\/cookies')\nprint(r.text)\ns = requests.session()\ns.get('http:\/\/httpbin.org\/cookies\/set\/number\/123456789')\nr = s.get('http:\/\/httpbin.org\/cookies')\nprint(r.text)<\/pre>\n<p>\u8f93\u51fa\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/24-1.png\" alt=\"\" class=\"alignnone size-full wp-image-976\" width=\"398\" height=\"163\" \/><br \/>\n\u53d1\u73b0\u7b2c\u4e00\u6b21\u5982\u679c\u76f4\u63a5\u8bbf\u95ee\u4e24\u6b21\uff0ccookies\u4e3a\u7a7a\uff0c\u7b2c\u4e8c\u6b21\u83b7\u53d6\u5230session\u518d\u8bbf\u95ee\uff0ccookies\u4e3a\u7b2c\u4e00\u6b21\u8bbf\u95ee\u65f6\u8bbe\u7f6e\u7684cookies\u3002<br \/>\n&nbsp;<br \/>\nSSL\u8bc1\u4e66\u9a8c\u8bc1\uff1a<br \/>\nrequests\u8fd8\u63d0\u4f9b\u4e86\u8bc1\u4e66\u9a8c\u8bc1\u7684\u529f\u80fd\u3002\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528verify\u53c2\u6570\u63a7\u5236\u662f\u5426\u68c0\u67e5\u8bc1\u4e66\uff0c\u9ed8\u8ba4\u503c\u4e3aTrue\uff0c\u81ea\u52a8\u9a8c\u8bc1\u3002<br \/>\nhttps\u9700\u8981\u4e00\u4e2aCA\u8bc1\u4e66\u8ba4\u8bc1\uff0c\u4f46\u662f12306\u7684\u8ba4\u8bc1\u662f\u94c1\u9053\u90e8\u81ea\u884c\u7b7e\u53d1\u7684\uff0c\u4e0d\u88abCA\u8ba4\u8bc1\u673a\u6784\u6240\u4fe1\u4efb\uff0c\u6240\u4ee5\u5982\u679c\u722c\u53d612306\u5c31\u9700\u8981\u8bbe\u7f6e\u4e0d\u68c0\u67e5\u3002<br \/>\n\u4e0d\u52a0verify\u53c2\u6570\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/26-1.png\" alt=\"\" class=\"alignnone size-full wp-image-977\" width=\"977\" height=\"458\" \/><br \/>\n\u589e\u52a0verify\u53c2\u6570\uff0c\u867d\u7136\u4f1a\u63d0\u51fa\u8b66\u544a\uff0c\u4f46\u662f\u72b6\u6001\u7801\u6b63\u5e38\u8fd4\u56de\uff0c\u5e76\u4e14\u9000\u51fa\u7801\u4e5f\u662f0\uff1a<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/27-2.png\" alt=\"\" class=\"alignnone size-full wp-image-979\" width=\"884\" height=\"368\" \/><br \/>\n&nbsp;<br \/>\n\u4ee3\u7406\uff1a<br \/>\n\u6211\u4eec\u9700\u8981\u8bbe\u7f6e\u4ee3\u7406\u6765\u89e3\u51b3\u56e0\u4e3a\u9ad8\u9891\u8bbf\u95ee\u5bfc\u81f4\u7684\u62d2\u7edd\u8bbf\u95ee\u6216\u8005\u9a8c\u8bc1\u7801\u8fd9\u7c7b\u95ee\u9898\uff0c\u9700\u8981\u7528\u5230proxies\u53c2\u6570\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># request test -&gt; proxies\nproxies = {\n    'http': 'http:\/\/xxx.xxx.xxx.xxx:xxxx',\n    'https': 'https:\/\/xxx.xxx.xxx.xxx:xxxx'\n}\nr = requests.get('https:\/\/www.taobao.com', proxies=proxies)\nprint(r.text)\n<\/pre>\n<p>&nbsp;<br \/>\n\u8d85\u65f6\u8bbe\u7f6e\uff1a<br \/>\n\u9632\u6b62\u65f6\u95f4\u592a\u4e45\u6ca1\u6709\u54cd\u5e94\uff0c\u4f7f\u7528timeout\u53c2\u6570\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; timeout\nr = requests.get('https:\/\/www.taobao.com', timeout=1)\nprint(r.text)<\/pre>\n<p>&nbsp;<br \/>\n\u8eab\u4efd\u9a8c\u8bc1\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; auth\nr = requests.get(\"http:\/\/192.168.0.1\/index.asp\", auth=('admin', ''))\nprint(r.status_code)\nprint(r.text)<\/pre>\n<p>&nbsp;<br \/>\nPrepared Request\uff1a<br \/>\n\u7528\u6765\u5c06\u8bf7\u6c42\u8868\u793a\u4e3a\u6570\u636e\u7ed3\u6784\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># requests test -&gt; prepare_request\nurl = 'http:\/\/httpbin.org\/post'\ndata = {\n    'name': 'Sniper'\n}\nheaders = {\n    'User-Agent': 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/67.0.3396.99 Safari\/537.36'\n}\ns = Session()\nreq = Request('POST', url, data=data, headers=headers)\nprepped = s.prepare_request(req)\nr = s.send(prepped)\nprint(r.text)<\/pre>\n<p>\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/28-1.png\" alt=\"\" class=\"alignnone size-full wp-image-981\" width=\"1820\" height=\"169\" \/><br \/>\n\u8fd9\u91cc\u6211\u4eec\u5f15\u5165Request\uff0c\u7136\u540eurl\u3001data\u548cheaders\u53c2\u6570\u6784\u9020\u4e86\u4e00\u4e2aRequest\u5bf9\u8c61\uff0c\u8fd9\u65f6\u518d\u8c03\u7528Session\u7684prepare_request\uff08\uff09\u65b9\u6cd5\u5c06\u5176\u8f6c\u6362\u6210\u4e00\u4e2aPrepared Request\u5bf9\u8c61\uff0c\u7136\u540e\u8c03\u7528send\uff08\uff09\u65b9\u6cd5\u53d1\u9001\u5373\u53ef\u3002<br \/>\n&nbsp;<\/p>\n<h3>3.\u6b63\u5219\u8868\u8fbe\u5f0f<\/h3>\n<p><strong>\\w\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u5b57\u6bcd\u3001\u6570\u5b57\u53ca\u4e0b\u5212\u7ebf<br \/>\n\\W<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4e0d\u662f\u5b57\u6bcd\u3001\u6570\u5b57\u53ca\u4e0b\u5212\u7ebf\u7684\u5b57\u7b26<br \/>\n\\s<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4efb\u610f\u7a7a\u767d\u5b57\u7b26\uff0c\u7b49\u4ef7\u4e8e[\\t\\n\\r\\f]<br \/>\n\\S<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4efb\u610f\u975e\u7a7a\u5b57\u7b26<br \/>\n\\d<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4efb\u610f\u6570\u5b57\uff0c\u7b49\u4ef7\u4e8e[0-9<br \/>\n\\D<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4efb\u610f\u975e\u6570\u5b57\u7684\u5b57\u7b26<br \/>\n\\A<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u5b57\u7b26\u4e32\u5f00\u5934<br \/>\n\\Z<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u5b57\u7b26\u4e32\u7ed3\u5c3e\uff0c\u5982\u679c\u5b58\u5728\u6362\u884c\uff0c\u53ea\u5339\u914d\u5230\u6362\u884c\u524d\u7684\u7ed3\u675f\u5b57\u7b26\u4e32<br \/>\n\\z<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u5b57\u7b26\u4e32\u7ed3\u5c3e\uff0c\u5982\u679c\u5b58\u5728\u6362\u884c\uff0c\u540c\u65f6\u8fd8\u4f1a\u5339\u914d\u6362\u884c\u7b26<br \/>\n\\G<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u6700\u540e\u5339\u914d\u5b8c\u6210\u7684\u4f4d\u7f6e<br \/>\n\\n<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4e00\u4e2a\u6362\u884c\u7b26<br \/>\n\\t<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4e00\u4e2a\u5236\u8868\u7b26<br \/>\n^<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4e00\u884c\u5b57\u7b26\u4e32\u7684\u5f00\u5934<br \/>\n$<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u4e00\u884c\u5b57\u7b26\u4e32\u7684\u7ed3\u5c3e<br \/>\n.<strong>\u00a0\u00a0 <\/strong>\u5339\u914d\u4efb\u610f\u5b57\u7b26\uff0c\u9664\u4e86\u6362\u884c\u7b26\uff0c\u5f53re.DOTALL\u6807\u8bb0\u88ab\u6307\u5b9a\u65f6\uff0c\u5219\u53ef\u4ee5\u5339\u914d\u5305\u62ec\u6362\u884c\u7b26\u7684\u4efb\u610f\u5b57\u7b26<br \/>\n[&#8230;]<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u7528\u6765\u8868\u793a\u4e00\u7ec4\u5b57\u7b26\uff0c\u5355\u72ec\u5217\u51fa\u3002<br \/>\n[^&#8230;]<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u4e0d\u5728<code>[]<\/code>\u4e2d\u7684\u5b57\u7b26\u3002<br \/>\n*<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d0\u4e2a\u6216\u591a\u4e2a\u8868\u8fbe\u5f0f<br \/>\n+<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d1\u4e2a\u6216\u591a\u4e2a\u8868\u8fbe\u5f0f<br \/>\n?<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d0\u4e2a\u62161\u4e2a\u524d\u9762\u7684\u6b63\u5219\u8868\u8fbe\u5f0f\u5b9a\u4e49\u7684\u7247\u6bb5\uff0c\u975e\u8d2a\u5a6a\u65b9\u5f0f<br \/>\n{n}<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u7cbe\u786e\u5339\u914dn\u4e2a\u524d\u9762\u7684\u8868\u8fbe\u5f0f<br \/>\n{n, m}<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914dn\u5230m\u6b21\u7531\u524d\u9762\u6b63\u5219\u8868\u8fbe\u5f0f\u5b9a\u4e49\u7684\u7247\u6bb5\uff0c\u8d2a\u5a6a\u65b9\u5f0f<br \/>\na|b<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914da\u6216b<br \/>\n( )<strong>\u00a0\u00a0\u00a0\u00a0\u00a0 <\/strong>\u5339\u914d\u62ec\u53f7\u5185\u7684\u8868\u8fbe\u5f0f\uff0c\u4e5f\u8868\u793a\u4e00\u4e2a\u7ec4<br \/>\n&nbsp;<br \/>\nmatch\uff08\uff09\uff1a<br \/>\n\u7528\u6765\u68c0\u67e5\u6b63\u5219\u8868\u8fbe\u5f0f\u662f\u5426\u5339\u914d\u5b57\u7b26\u4e32\u3002<br \/>\n.group\uff08\uff09\uff1a\u6253\u51fa\u6b63\u5219\u8868\u8fbe\u5f0f\u5339\u914d\u7684\u5185\u5bb9\u3002<br \/>\n.span\uff08\uff09\uff1a\u8f93\u51fa\u5339\u914d\u7684\u5b57\u7b26\u4e32\u5728\u539f\u5b57\u7b26\u4e32\u4e2d\u7684\u76f8\u5bf9\u4f4d\u7f6e\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># re test -&gt; match\nstr = 'Hello 123456 123456 World Test'\nresult = re.match('^Hello.*Test$', str)\nprint(result)\nprint(result.group())\nprint(result.span())<\/pre>\n<p>\u8f93\u51fa\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/29-1.png\" alt=\"\" class=\"alignnone size-full wp-image-984\" width=\"663\" height=\"91\" \/><br \/>\n\u8d2a\u5a6a&amp;\u975e\u8d2a\u5a6a\uff1a<br \/>\n.* \u9ed8\u8ba4\u662f\u8d2a\u5a6a\u7684\uff0c\u6240\u4ee5\u9664\u975e\u662f\u5b57\u7b26\u4e32\u6700\u540e\uff0c\u4e00\u822c\u52a0\u4e0a\uff1f \u7528\u6765\u5b9a\u4e49\u975e\u8d2a\u5a6a\uff0c\u4f7f\u76ee\u6807\u53ef\u4ee5\u5339\u914d\u5230\u66f4\u591a\u5b57\u7b26\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># re test -&gt; greedy &amp; non-greedy\nstr = 'Hello 123456 123456 World Test'\nresult = re.match('^He.*(\\d+).*st$', str)\nprint(result)\nprint(result.group(0))\nprint(result.group(1))\nprint(result.span())\nresult = re.match('^He.*?(\\d+).*?st$', str)\nprint(result)\nprint(result.group(0))\nprint(result.group(1))\nprint(result.span())<\/pre>\n<p>\u7ed3\u679c\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/30.png\" alt=\"\" class=\"alignnone size-full wp-image-985\" width=\"667\" height=\"193\" \/><br \/>\n&nbsp;<br \/>\n\u8f6c\u4e49\u5b57\u7b26\uff1a<br \/>\n\u5982\u679c\u539f\u6587\u7b26\u53f7\u51fa\u73b0\u5728\u6b63\u5219\u8868\u8fbe\u5f0f\u4e2d\uff0c\u6211\u4eec\u9700\u8981\u8fdb\u884c\u8f6c\u4e49\uff0c\u5728\u7b26\u53f7\u524d\u9762\u52a0\u4e0a\\\u5373\u53ef \u3002<br \/>\n&nbsp;<br \/>\nsearch\uff08\uff09\uff1a<br \/>\n\u7528\u6765\u68c0\u67e5\u6b63\u5219\u8868\u8fbe\u5f0f\u662f\u5426\u5339\u914d\u5b57\u7b26\u4e32\u3002\u76f8\u6bd4match\uff08\uff09\uff0c\u5b83\u53ef\u4ee5\u641c\u7d22\u51fa\u5b57\u7b26\u4e32\u4e2d\u7684\u5339\u914d\u5b50\u5b57\u7b26\u4e32\uff0c\u800cmatch\uff08\uff09\u5219\u8981\u6c42\u5b57\u7b26\u4e32\u5f00\u5934\u5fc5\u987b\u7b26\u5408\u6b63\u5219\u8868\u8fbe\u5f0f\u3002<br \/>\nsearch\uff08\u2018regex\u2019\uff0c \u2018str\u2019\uff0c re.S\uff09<br \/>\n\u5176\u4e2d\u8fd9\u4e2are.S\u662f\u5339\u914d\u56de\u8f66\uff0c\u4e5f\u5c31\u662f\u8bf4\u52a0\u4e86\u8fd9\u4e2a\u6b63\u5219\u8868\u8fbe\u4e2d\u6709\u56de\u8f66\u5c06\u88ab\u5ffd\u7565\uff0c\u5efa\u8bae\u52a0\u4e0a\uff0c\u56e0\u4e3a\u5f88\u591a\u7f51\u9875\u6e90\u7801\u4e2d\u90fd\u662f\u6709\u56de\u8f66\u7684\u3002<br \/>\nfindall\uff08\uff09\uff1a<br \/>\n\u53ef\u4ee5\u8fd4\u56de\u6240\u6709\u5339\u914d\uff0c\u800c\u4e0d\u662f\u7b2c\u4e00\u4e2a\u3002<br \/>\nsub\uff08\uff09\uff1a<br \/>\n\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u4fee\u6539\u6587\u672c\u3002<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># re test -&gt; sub\nstr = 'weqr65s4agf6354agfa'\nresult = re.sub('\\d+', '', str)\nprint(result)<\/pre>\n<p>\u7a0b\u5e8f\u8f93\u51fa\uff1a<br \/>\nweqrsagfagfa<br \/>\n\u5c06\u6240\u6709\u6570\u5b57\u90fd\u66ff\u6362\u6210\u4e86\u7a7a\u3002<br \/>\ncompile\uff08\uff09\uff1a<br \/>\n\u8be5\u65b9\u6cd5\u53ef\u4ee5\u5c06\u6b63\u5219\u8868\u8fbe\u5f0f\u7f16\u8bd1\u6210\u5bf9\u8c61\uff0c\u4ee5\u4fbf\u540e\u9762\u591a\u6b21\u4f7f\u7528\u8be5\u6b63\u5219\u8868\u8fbe\u5f0f\u3002<br \/>\n&nbsp;<br \/>\n&nbsp;<br \/>\n.3<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u672c\u9875\u6e90\u7801\u53ef\u4ee5\u5728\u8fd9\u91cc\u67e5\u770b\u6216\u4e0b\u8f7d\u3002 1.urllib\u5e93 1.\u53d1\u9001\u8bf7\u6c42 \uff081\uff09request\uff1a\u53ef\u4ee5\u65b9\u4fbf\u5730\u5b9e [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_mi_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[12],"tags":[],"views":3341,"_links":{"self":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/934"}],"collection":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/comments?post=934"}],"version-history":[{"count":0,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/934\/revisions"}],"wp:attachment":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/media?parent=934"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/categories?post=934"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/tags?post=934"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}