有時候網頁需要HTTP認證才能登錄,那麼,怎麼辦呢?
這裏需要注意的是,其與cookie和form的認證是不同的,HTTP認證主要與加密通信結合使用;
因涉及的,類比較多,直接上代碼了,不過有註釋應該可以看懂:
import sys, urllib.request
from urllib.request import HTTPBasicAuthHandler, HTTPPasswordMgr, build_opener
import getpass
# build a class for getting password and user
class TerminalPassword(HTTPPasswordMgr):
def find_user_password(self, realm, authuri):
retval = HTTPPasswordMgr.find_user_password(self, realm, authuri)
if retval[0] == 0 and retval[1] == 0:
sys.stdout.write("Login required for %s at %s\n "%(realm,authuri))
sys.stdout.write("Username: ")
username = sys.stdin.readline().rstrip()
password = getpass.getpass().rstrip()
return username, password
# a handler for basic HTTP Authentication
handler = HTTPBasicAuthHandler(TerminalPassword())
# a opener for opening url that need to be Authentication
opener = build_opener(handler)
# open url by urlopen, so install opener to the urllib.request
urllib.request.install_opener(opener)
url = 'http://jwc.scnu.edu.cn/default2.aspx'
# pretend as a browser
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1;\
WOW64; rv:23.0) Gecko/20100101 Firefox/23.0 '}
url2 = urllib.request.Request(url, headers=headers)
# the last, open!
res = urllib.request.urlopen(url2)
# print the info
print('from: ', res.geturl())
info = res.info()
for key, valus in info.items():
print("%s=%s" % (key, valus))
# print(res.read().decode('GBK'))
注意:1.下面這一行看起來很奇怪,這是因爲HTTPBasicAuthHandler類會自動調用TerminalPassword的合適的函數
handler = HTTPBasicAuthHandler(TerminalPassword())