百度翻译爬虫(案例练习:POST 请求)

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import urllib.request
import urllib.parse
import json
import ssl

# 通过抓包工具,获取接口
post_url = 'https://fanyi.baidu.com/v2transapi'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
    'Accept': '*/*',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    # Content-Length: 103,
    # Content-Type: application/x-www-form-urlencoded; charset=UTF-8
    'Cookie': 'BAIDUID=5825D3624FFD2FF79AD102CCE35CF40D:FG=1; PSTM=1532620217; BIDUPSID=4CA0F78CD45B4F46C5E80CFE8C9EB708; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=26524_1434_21122_26350; BDSFRCVID=TWtsJeCCxG3Z5YJ7d-o458x4OlRaCS_xAsmV3J; H_BDCLCKID_SF=tRk8oI-XJCvbfP0k247Hhn8thmT22-uS-K5bSCD-tCDahIPxDIj5MJ08Hq3EhtvtKD5KW-bVfMFbV-O_bfbT2MbyjN7wbjbbHm_JhnQPbb5DMU-wLPnMhU3BeabZqjDetnFJoK85f-3bfTruM-r2Mt6H-UnLq5vW057Z0lOnMp05fI_mDTJfqfu-btPj5qjH0HnlV4QKLtDVJKO_e6t5D5J0jN-s-bbfHDJK0b7aHJOoDDv3DIc5y4LdLp7xJhItymLf0T67MxnUbDPRhh5HLptm2-Qe2lKeWJLfoKtbJC05bP365ITS-t-e5eT22-usQNvJQhcH0hOWsIOLjqrkjP3DhN8tqnkJBKJN3lORX-nNqn6KDUC0-nDSHHAet63P; delPer=0; PSINO=1; locale=zh; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1540903147,1540903157,1540903171,1540903195; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1540903244',
    'Host': 'fanyi.baidu.com',
    'Origin': 'https://fanyi.baidu.com',
    'Referer': 'https://fanyi.baidu.com/',
    'X-Requested-With': 'XMLHttpRequest'
}

key = input('请输入翻译的汉字:')
num = input('请输入抓包工具中FormData里的"sign"对应的value:')
# 百度翻译对form要求很严格,所以在构造Form Data时,要参照抓包工具,将数据写全
form_data = {
    'from': 'en',
    'to': 'zh',
    'query': key,
    'simple_means_flag': '3',
    # sign 这个参数,是一个随着key的变动而变动,所以必须在抓包工具中获取
    'sign': num,
    'token': 'a4c67ddbb80e05a08756492fe9f227a9',
}

# 模仿传递的表单数据要进行编码
form_data = urllib.parse.urlencode(form_data).encode('utf-8')
# 构造请求头
request_headers = urllib.request.Request(url=post_url, data=form_data, headers=headers)
# 全局取消证书验证
ssl._create_default_https_context = ssl._create_unverified_context
# urlopen()函数带着伪装好的请求头向模拟浏览器发送请求,得到一个服务器的响应response
response = urllib.request.urlopen(request_headers)

# 如果用response.read().decode('unicode-escape')等效于json的loads和dumps函数操作
# content = response.read().decode('unicode-escape')

# 将服务器响应返回的页面进行解码,此时有部分数据依然不能看懂,所以需要对该数据的编码重新调整
content = response.read().decode('utf-8')
print(type(content))

# 使用json加载,将字符串转换为python对象,编码utf-8
py_obj = json.loads(content, encoding='utf-8')
# 然后dumps,再将python对象转换为字符串,编码不采用ascii码
string = json.dumps(py_obj, ensure_ascii=False)
print(string)

with open('./baudufanyi.json', mode='w', encoding='utf-8') as fp:
    fp.write(string)


'''
如果你代码没有运行成功,可以尝试以下变量:
:key:appel
:num:704513.926512
'''

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章