Python之爬蟲-- js加密(破解有道詞典加密的算法)

js加密

  • 有的反爬蟲策略採用js對需要傳輸的數據進行加密處理(通常是取md5值)
  • 經過加密,傳輸的就是密文,但是
  • 加密函數或者過程一定是在瀏覽器完成,也就是一定會把代碼(js代碼)暴露給使用者
  • 通過閱讀加密算法,就可以模擬出加密過程,從而達到破解
  • 過程參看案例1, 案例2
  • 過程比較囉嗦,筆記比較少,仔細觀察

案例1: 

'''
破解有道詞典
V1
'''

from urllib import request, parse


def youdao(key):

    url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"

    data = {
        "i": "boy",
        "from":"AUTO",
        "to": "AUTO",
        "smartresult": "dict",
        "client": "fanyideskweb",
        "salt": "1523100789519",
        "sign": "b8a55a436686cd89873fa46514ccedbe",
        "doctype": "json",
        "version": "2.1",
        "keyfrom": "fanyi.web",
        "action":"FY_BY_REALTIME",
        "typoResult": "false"
    }

    # 參數data需要是bytes格式
    data = parse.urlencode(data).encode()

    headers = {
                  "Accept": "application/json,text/javascript,*/*;q=0.01",
                  #"Accept-Encoding": "gzip,deflate",
                  "Accept-Language": "zh-CN,zh;q=0.9",
                  "Connection": "keep-alive",
                  "Content-Length": "200",
                  "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
                  "Cookie": "[email protected];JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
                  "Host": "fanyi.youdao.com",
                  "Origin": "http://fanyi.youdao.com",
                  "Referer": "http://fanyi.youdao.com/",
                  "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
    }

    req = request.Request(url=url, data=data, headers=headers)

    rsp = request.urlopen(req)

    html = rsp.read().decode()
    print(html)

if __name__ == '__main__':
    youdao("boy")

案例2: 

'''
V2
處理js加密代碼
'''

'''
通過查找,能找到js代碼中操作代碼
1. 這個是計算salt的公式 r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
2. sign: n.md5("fanyideskweb" + t + r + "ebSeFb%=XZ%T[KZ)c(sy!");
md5一共需要四個參數,第一個和第四個都是固定值的字符串,第三個是所謂的salt,第二個是。。。。。
第二個參數就是輸入的要查找的單詞

'''

#得到鹽
def getSalt():
    '''
    salt公式是:  "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
    把他翻譯成python代碼
    :return:
    '''
    import time, random
	#通過對js代碼查找,然後把鹽的公式中的兩項在瀏覽器的F12狀態下console中運行查找結果,然後用Python代碼替換(他們的結果相同)
    salt = int(time.time()*1000) + random.randint(0,10)

    return salt
#得到md5值
def getMD5(v):
    import hashlib
	#生成md5實例
    md5 = hashlib.md5()

    # update需要一共bytes格式的參數
    md5.update(v.encode("utf-8"))

    sign = md5.hexdigest()

    return sign


def getSign(key, salt):

    sign = 'fanyideskweb'+ key + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!"
    sign = getMD5(sign)

    return sign

from urllib import request, parse


def youdao(key):

    url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
	#要確定多個地方是同一個鹽
    salt = getSalt()

    data = {
        "i": key,
        "from":"AUTO",
        "to": "AUTO",
        "smartresult": "dict",
        "client": "fanyideskweb",
        "salt": str(salt) ,
        "sign": getSign(key, salt),
        "doctype": "json",
        "version": "2.1",
        "keyfrom": "fanyi.web",
        "action":"FY_BY_REALTIME",
        "typoResult": "false"
    }

    print(data)

    # 參數data需要是bytes格式
    data = parse.urlencode(data).encode()

    headers = {
        "Accept": "application/json,text/javascript,*/*;q=0.01",
        #"Accept-Encoding": "gzip,deflate",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Content-Length": len(data),
        "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
        "Cookie": "[email protected];JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
        "Host": "fanyi.youdao.com",
        "Origin": "http://fanyi.youdao.com",
        "Referer": "http://fanyi.youdao.com/",
        "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
    }

    req = request.Request(url=url, data=data, headers=headers)

    rsp = request.urlopen(req)

    html = rsp.read().decode()
    print(html)

if __name__ == '__main__':
    youdao("boy")

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章