豆瓣爬蟲 (CookieJar練習 :爬取用戶登入後的響應頁面)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib
import urllib.request
import urllib.parse
from http.cookiejar import CookieJar
import ssl

# 全局取消證書驗證
# ssl._create_default_https_context = ssl._create_unverified_context()  報錯! 不能加括號!!!!
ssl._create_default_https_context = ssl._create_unverified_context

# 模擬登陸,登入豆瓣
# post url + form data
post_url = 'https://www.douban.com/accounts/login'

form_data = {
    'source':'index_nav',
    'form_email':'[email protected]',
    'form_password':'794662577.a'
}

form_data = urllib.parse.urlencode(form_data).encode('utf-8')

headers = {
	# POST /accounts/login HTTP/1.1
    'Host':	'www.douban.com',
    'Connection': 'keep-alive',
    # Content-Length	75
    # Cache-Control	max-age=0
    'Origin': 'https://www.douban.com',
    'Upgrade-Insecure-Requests': '1',
    'Content-Type':	'application/x-www-form-urlencoded',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Referer': 'https://www.douban.com/',
    # Accept-Encoding	gzip, deflate, br
    'Accept-Language':	'zh-CN,zh;q=0.9',

    # 此處不實用cookie,cookie要使用CookieJar獲取
    # Cookie	ll="108296"; bid=lwYON7qLDB0; _vwo_uuid_v2=D6B57AC4D84F5BF63BB9BF2F603A57C9F|74e508c40ebb0e5436dc9e123f92e0e0; douban-fav-remind=1; __yadk_uid=q8wonPa1qlhCYUmqxzt79JrBfns6OaMk; __utmz=30149280.1540563774.6.5.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _ga=GA1.2.353390240.1533607830; ps=y; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1541079301%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DY6G2SggcvG5d6ytGOTnajJSTkxsgS9i-Zy3LDoJuxHFe_CH5PQdQPqYWl-ST_CKG%26wd%3D%26eqid%3De15b72a60001805f000000035bd32328%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.353390240.1533607830.1540563774.1541079303.7; __utmc=30149280; __utmt=1; ue="[email protected]"; ap_v=0,6.0; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18672; _pk_id.100001.8cb4=71fe7549ce4bb89a.1533607824.4.1541079633.1540563771.; __utmb=30149280.4.10.1541079303
}

cookiejar = CookieJar()
cookiejar_handler = urllib.request.HTTPCookieProcessor(cookiejar)
opener = urllib.request.build_opener(cookiejar_handler)
request = urllib.request.Request(url=post_url, data=form_data, headers=headers)
response = opener.open(request)
content = response.read().decode('utf-8')

with open('./douban.html', mode='wb') as fp:
    fp.write(content.encode('utf-8'))



 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章