因爲前幾天寫了個Python登錄教務網的爬蟲,這幾天又突然想到用Python登錄淘寶試試,於是二話不說,代碼走起。本來以爲很簡單,但一寫我就傻眼了,登淘寶的涉及到很多東西,驗證碼,加密算法,用httpfox查看post數據又很多看不懂的,所以百度大神們的類似代碼,經過幾天痛苦的嘗試,終於搞定了,登淘寶最重要的幾點就是:一,驗證碼,我是用正則提取驗證碼地址,然後用webbrowser把驗證碼直接在瀏覽器中顯示,讓用戶輸入;二,獲取token,當你處理好了驗證碼,登錄了login_url,你就自然會發現token,然後重定向到另一地址;三,登錄用token_url,獲取st,再利用st重定向到淘寶用戶的主頁地址,當你實現了這些就成功了!總的來說就像奔跑吧,兄弟的遊戲那種,每獲得一個值,登錄,就會獲得下個值的線索,依次下去,就會成功。這是我的一孔之見,如果我說錯了什麼,請大神指點!歡迎一起討論!
參考地址:
http://my.oschina.net/u/811744/blog/191165
http://www.sufeinet.com/thread-4585-1-1.html
代碼如下:
</pre><pre name="code" class="python">#-*-coding:gbk-*-
import urllib
import urllib2
import cookielib
import re
import webbrowser
#登錄地址
login_url = "https://login.taobao.com/member/login.jhtml"
proxy_url = 'http://120.193.146.95:843'
#post請求頭部
headers = {
'Host':'login.taobao.com',
'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0',
'Referer' : 'https://login.taobao.com/member/login.jhtml',
'Content-Type': 'application/x-www-form-urlencoded',
'Connection' : 'Keep-Alive'
}
#用戶名,密碼
username = raw_input("請輸入賬號: ") #此處不需要密碼,因爲經過淘寶加密算法後,在下面的ua中已經包含了賬號和密碼,所以很安全
#所以每個用戶的ua都是不一樣的
#請求數據包,這三個都是動態生成的,但是不用擔心,只要選取其中一組就可以,只要你的用戶名和密碼不變就可以
ua = '074UW5Tcy...uUm07'#因爲ua,gr,password2都太長了所以省略了很多
gr = '687474703A.....343433'
password2 = '100eba2....e674077e'
post = { 'ua':ua,
'TPL_checkcode':'',
'CtrlVersion': '1,0,0,7',
'TPL_password':'',
'TPL_redirect_url':'http://i.taobao.com/my_taobao.htm?nekot=udm8087E1424147022443',
'TPL_username':username,
'loginsite':'0',
'newlogin':'0',
'from':'tb',
'fc':'default',
'style':'default',
'css_style':'',
'tid':'XOR_1_000000000000000000000000000000_625C4720470A0A050976770A',
'support':'000001',
'CtrlVersion':'1,0,0,7',
'loginType':'4',
'minititle':'',
'minipara':'',
'umto':'NaN',
'pstrong':'3',
'llnick':'',
'sign':'',
'need_sign':'',
'isIgnore':'',
'full_redirect':'',
'popid':'',
'callback':'',
'guf':'',
'not_duplite_str':'',
'need_user_id':'',
'poy':'',
'gvfdcname':'10',
'gvfdcre':gr,
'from_encoding ':'',
'sub':'',
'TPL_password_2':password2,
'loginASR':'1',
'loginASRSuc':'1',
'allp':'',
'oslanguage':'zh-CN',
'sr':'1366*768',
'osVer':'windows|6.1',
'naviVer':'firefox|35'
}
postData = urllib.urlencode(post)
proxy = urllib2.ProxyHandler({'http':proxy_url}) #設置代理,防止自己的IP被封
cookieJar = cookielib.LWPCookieJar()
cookie= urllib2.HTTPCookieProcessor(cookieJar) #智能處理cookie
opener = urllib2.build_opener(cookie,proxy,urllib2.HTTPHandler)
urllib2.install_opener(opener)
req = urllib2.Request(login_url,postData,headers)
taobao = urllib2.urlopen(req)
read = taobao.read()
staus = taobao.getcode()
if staus == 200:
print '獲取服務器請求成功!'
#處理驗證碼,獲取token
pattern = re.compile(r'(?<=<img id="J_StandardCode_m" src="https://s.tbcdn.cn/apps/login/static/img/blank.gif" data-src=").[^<]*?(?=")')
checkCodeUrlList = re.findall(pattern, read)
#print checkCodeUrlList[0]
webbrowser.open_new_tab(checkCodeUrlList[0]) #這裏和下面的[0],只是爲了提取列表中的值
print '到瀏覽器看驗證碼圖片'
checkcode = raw_input('請輸入驗證碼:')
post['TPL_checkcode'] = checkcode
postData = urllib.urlencode(post)
req = urllib2.Request(login_url,postData,headers)
taobao = urllib2.urlopen(req)
read_token = taobao.read()
#處理token,獲得st
pattern_token = re.compile(r'(?<=<input type="hidden" id="J_HToken" value=").[^<]*?(?=")')
token = re.findall(pattern_token,read_token)
token_url = 'https://passport.alipay.com/mini_apply_st.js?site=0&token=%s&callback=stCallback6' % token[0]
req_token = urllib2.Request(token_url)
response_token = urllib2.urlopen(req_token).read()
#處理st,獲得用戶淘寶主頁的登錄地址
pattern_st = re.compile(r'(?<="st":").[^<]*?(?=")')
st = re.findall(pattern_st,response_token)
st_url = 'https://login.taobao.com/member/vst.htm?st=%s&TPL_username=%s' % (st[0],username)
req_st = urllib2.Request(st_url)
response_st = urllib2.urlopen(req_st).read()
print response_st
pattern_end = re.compile(r'(?<=top.location = ").[^<]*?(?=";)')
end = re.findall(pattern_end,response_st)
end_url = '%s' % end[0]
req_end = urllib2.Request(end_url)
response_end = urllib2.urlopen(req_end).read()
print response_end