{"id":1049,"date":"2018-07-26T15:32:32","date_gmt":"2018-07-26T07:32:32","guid":{"rendered":"http:\/\/www.sniper97.cn\/?p=1049"},"modified":"2018-07-26T15:32:32","modified_gmt":"2018-07-26T07:32:32","slug":"%e7%ac%ac%e5%85%ab%e8%8a%82%ef%bc%9a%e7%88%ac%e5%8f%96%e6%96%b0%e6%b5%aa%e5%be%ae%e5%8d%9a","status":"publish","type":"post","link":"http:\/\/www.sniper97.cn\/index.php\/note\/carwler\/1049\/","title":{"rendered":"\u7b2c\u516b\u8282\uff1a\u722c\u53d6\u65b0\u6d6a\u5fae\u535a"},"content":{"rendered":"<p>\u672c\u9875\u4ee3\u7801\u53ef\u4ee5\u5728<a href=\"https:\/\/github.com\/Sniper970119\/Spider\/tree\/master\/20180721\" target=\"_blank\" rel=\"noopener\" data-slimstat=\"5\">\u8fd9\u91cc<\/a>\u4e0b\u8f7d\u3002<\/p>\n<h5>1.\u5206\u6790\u8bf7\u6c42<\/h5>\n<p>\u6253\u5f00\u65b0\u6d6a\u5fae\u535a\uff0c\u8fc7\u6ee4Ajax\u8bf7\u6c42\u5e76\u67e5\u770b\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/62.png\" alt=\"\" class=\"alignnone size-full wp-image-1046\" width=\"1276\" height=\"615\" \/><br \/>\n\u6211\u4eec\u53ef\u4ee5\u770b\u5230URL\u4e2d\u67094\u4e2a\u5c5e\u6027\u503c\uff0c\u5206\u522b\u662ftype\u3001value\u3001containerid\u548cpage\uff0c\u5176\u4e2dcontainerid\u4e0d\u8fc7\u662f107603\u52a0\u4e0auid\uff0cpage\u662f\u6539\u53d8\u7684\u503c\u3002<\/p>\n<h5>2.\u5206\u6790\u54cd\u5e94<\/h5>\n<p>\u6211\u4eec\u53d1\u73b0\u54cd\u5e94\u6570\u636e\u90fd\u5728data\u4e0b\uff0c\u5176\u4e2dcardlistInfo\u662f\u5b58\u50a8\u5fae\u535a\u603b\u6570\u7b49\u5c5e\u6027\uff0ccards\u5305\u542b\u4e8610\u6761\u5fae\u535a\u7684\u4fe1\u606f\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/63.png\" alt=\"\" class=\"alignnone size-full wp-image-1047\" width=\"1012\" height=\"622\" \/><br \/>\n\u7136\u540e\u6211\u4eec\u89c2\u5bdf\u8bf7\u6c42\u5934\u90e8\u5206\uff0c\u5e76\u627e\u51fa\u6700\u5c0f\u6709\u6548\u5b50\u96c6\uff08\u4e00\u822c\u662fcookies\u3001ua\u3001host\u3001referer\uff09<br \/>\n\u7136\u540e\u81ea\u5b9a\u4e49\u5934\u3002<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/67.png\" alt=\"\" class=\"alignnone size-full wp-image-1050\" width=\"750\" height=\"284\" \/><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">headers = {\n    'Host': 'm.weibo.cn',\n    'Referer': 'https:\/\/m.weibo.cn\/u\/2656260571',\n    'User-Agent': 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/'\n                  '67.0.3396.99 Safari\/537.36',\n    'X-Requested-With': 'XMLHttpRequest'\n}\n<\/pre>\n<p>\u9996\u5148\u83b7\u53d6\u9875\u9762json\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># get page code\ndef get_page(page):\n    params = {\n        'type': 'uid',\n        'value': '2656260571',\n        'containerid': '1076032656260571',\n        'page': page\n    }\n    url = base_url + urlencode(params)\n    try:\n        response = requests.get(url=url, headers=headers)\n        if response.status_code == 200:\n            return response.json()\n    except requests.ConnectionError as e:\n        print('Error', e.args)\n<\/pre>\n<p>\u968f\u540e\u5904\u7406\u8fd9\u4e9bjson\uff0c\u63d0\u53d6\u51fa\u9700\u8981\u7684\u4fe1\u606f\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># parse the code\ndef parse_page(json):\n    if json:\n        items = json.get('data').get('cards')\n        for item in items:\n            item = item.get('mblog')\n            weibo = {}\n            weibo['id'] = item['id']\n            weibo['text'] = item['text']\n            weibo['attitudes'] = item['attitudes_count']\n            weibo['comment'] = item['comments_count']\n            weibo['reposts'] = item['reposts_count']\n            yield weibo\n<\/pre>\n<p>\u5b58\u5165\u6570\u636e\u5e93\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># save to database\ndef save_to_mongo(result):\n    client = pymongo.MongoClient(host='localhost', port=27017)\n    db = client.WeiBoTest\n    collection = db.WeiBo\n    collection.insert(result)\n<\/pre>\n<p>\u4e3b\u51fd\u6570\u8c03\u7528\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># main\nif __name__ == '__main__':\n    for page in range(1, 15):\n        json = get_page(page)\n        results = parse_page(json)\n        for result in results:\n            print(result)\n            print()\n            save_to_mongo(result)<\/pre>\n<p>&nbsp;<br \/>\n\u8fd0\u884c\u7ed3\u679c\uff1a<br \/>\nconsole\u4e2d\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/65.png\" alt=\"\" class=\"alignnone size-full wp-image-1051\" width=\"1883\" height=\"443\" \/><br \/>\nRobo 3T\u4e2d\uff1a<br \/>\n<img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2018\/07\/66.png\" alt=\"\" class=\"alignnone size-full wp-image-1052\" width=\"867\" height=\"937\" \/><br \/>\n&nbsp;<br \/>\n\u5b8c\u6574\u4ee3\u7801\uff1a<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\"># -*- coding:utf-8 -*-\nfrom urllib.parse import urlencode\nimport pymongo\nimport requests\nbase_url = 'https:\/\/m.weibo.cn\/api\/container\/getIndex?'\nheaders = {\n    'Host': 'm.weibo.cn',\n    'Referer': 'https:\/\/m.weibo.cn\/u\/2656260571',\n    'User-Agent': 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/'\n                  '67.0.3396.99 Safari\/537.36',\n    'X-Requested-With': 'XMLHttpRequest'\n}\n# get page code\ndef get_page(page):\n    params = {\n        'type': 'uid',\n        'value': '2656260571',\n        'containerid': '1076032656260571',\n        'page': page\n    }\n    url = base_url + urlencode(params)\n    try:\n        response = requests.get(url=url, headers=headers)\n        if response.status_code == 200:\n            return response.json()\n    except requests.ConnectionError as e:\n        print('Error', e.args)\n# parse the code\ndef parse_page(json):\n    if json:\n        items = json.get('data').get('cards')\n        for item in items:\n            item = item.get('mblog')\n            weibo = {}\n            weibo['id'] = item['id']\n            weibo['text'] = item['text']\n            weibo['attitudes'] = item['attitudes_count']\n            weibo['comment'] = item['comments_count']\n            weibo['reposts'] = item['reposts_count']\n            yield weibo\n# save to database\ndef save_to_mongo(result):\n    client = pymongo.MongoClient(host='localhost', port=27017)\n    db = client.WeiBoTest\n    collection = db.WeiBo\n    collection.insert(result)\n# main\nif __name__ == '__main__':\n    for page in range(1, 15):\n        json = get_page(page)\n        results = parse_page(json)\n        for result in results:\n            print(result)\n            print()\n            save_to_mongo(result)\n<\/pre>\n<p>&nbsp;<br \/>\n.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u672c\u9875\u4ee3\u7801\u53ef\u4ee5\u5728\u8fd9\u91cc\u4e0b\u8f7d\u3002 1.\u5206\u6790\u8bf7\u6c42 \u6253\u5f00\u65b0\u6d6a\u5fae\u535a\uff0c\u8fc7\u6ee4Ajax\u8bf7\u6c42\u5e76\u67e5\u770b\u3002 \u6211\u4eec\u53ef\u4ee5\u770b\u5230URL\u4e2d [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_mi_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[12],"tags":[],"views":2144,"_links":{"self":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/1049"}],"collection":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/comments?post=1049"}],"version-history":[{"count":0,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/1049\/revisions"}],"wp:attachment":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/media?parent=1049"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/categories?post=1049"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/tags?post=1049"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}