百度翻譯爬蟲(案例練習:POST 請求)

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import urllib.request
import urllib.parse
import json
import ssl

# 通過抓包工具,獲取接口
post_url = 'https://fanyi.baidu.com/v2transapi'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
    'Accept': '*/*',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    # Content-Length: 103,
    # Content-Type: application/x-www-form-urlencoded; charset=UTF-8
    'Cookie': 'BAIDUID=5825D3624FFD2FF79AD102CCE35CF40D:FG=1; PSTM=1532620217; BIDUPSID=4CA0F78CD45B4F46C5E80CFE8C9EB708; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=26524_1434_21122_26350; BDSFRCVID=TWtsJeCCxG3Z5YJ7d-o458x4OlRaCS_xAsmV3J; H_BDCLCKID_SF=tRk8oI-XJCvbfP0k247Hhn8thmT22-uS-K5bSCD-tCDahIPxDIj5MJ08Hq3EhtvtKD5KW-bVfMFbV-O_bfbT2MbyjN7wbjbbHm_JhnQPbb5DMU-wLPnMhU3BeabZqjDetnFJoK85f-3bfTruM-r2Mt6H-UnLq5vW057Z0lOnMp05fI_mDTJfqfu-btPj5qjH0HnlV4QKLtDVJKO_e6t5D5J0jN-s-bbfHDJK0b7aHJOoDDv3DIc5y4LdLp7xJhItymLf0T67MxnUbDPRhh5HLptm2-Qe2lKeWJLfoKtbJC05bP365ITS-t-e5eT22-usQNvJQhcH0hOWsIOLjqrkjP3DhN8tqnkJBKJN3lORX-nNqn6KDUC0-nDSHHAet63P; delPer=0; PSINO=1; locale=zh; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1540903147,1540903157,1540903171,1540903195; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1540903244',
    'Host': 'fanyi.baidu.com',
    'Origin': 'https://fanyi.baidu.com',
    'Referer': 'https://fanyi.baidu.com/',
    'X-Requested-With': 'XMLHttpRequest'
}

key = input('請輸入翻譯的漢字:')
num = input('請輸入抓包工具中FormData裏的"sign"對應的value:')
# 百度翻譯對form要求很嚴格,所以在構造Form Data時,要參照抓包工具,將數據寫全
form_data = {
    'from': 'en',
    'to': 'zh',
    'query': key,
    'simple_means_flag': '3',
    # sign 這個參數,是一個隨着key的變動而變動,所以必須在抓包工具中獲取
    'sign': num,
    'token': 'a4c67ddbb80e05a08756492fe9f227a9',
}

# 模仿傳遞的表單數據要進行編碼
form_data = urllib.parse.urlencode(form_data).encode('utf-8')
# 構造請求頭
request_headers = urllib.request.Request(url=post_url, data=form_data, headers=headers)
# 全局取消證書驗證
ssl._create_default_https_context = ssl._create_unverified_context
# urlopen()函數帶着僞裝好的請求頭向模擬瀏覽器發送請求,得到一個服務器的響應response
response = urllib.request.urlopen(request_headers)

# 如果用response.read().decode('unicode-escape')等效於json的loads和dumps函數操作
# content = response.read().decode('unicode-escape')

# 將服務器響應返回的頁面進行解碼,此時有部分數據依然不能看懂,所以需要對該數據的編碼重新調整
content = response.read().decode('utf-8')
print(type(content))

# 使用json加載,將字符串轉換爲python對象,編碼utf-8
py_obj = json.loads(content, encoding='utf-8')
# 然後dumps,再將python對象轉換爲字符串,編碼不採用ascii碼
string = json.dumps(py_obj, ensure_ascii=False)
print(string)

with open('./baudufanyi.json', mode='w', encoding='utf-8') as fp:
    fp.write(string)


'''
如果你代碼沒有運行成功,可以嘗試以下變量:
:key:appel
:num:704513.926512
'''

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章