import requests
import json
import time
import openpyxl
from pyquery import PyQuery
def get_headers():
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
return headers
def Visa_unionpay_equity_grab(start_url, headers):
merchant_name = '' # 首页商户名称
merchant_title = '' # 首页商户标题
merchant_LOGO = '' # 商户LOGO
address_datas = ''
slogan = '' # 第二页宣传语
hidden_url = '' # 隐藏地址
publicity_figure = '' # 宣传图
merchants_introduce = '' # 商户介绍
merchants_introduce_richtext = '' # rick商户介绍
merchants_terms = '' #商户条款
merchants_terms_richtext = ''
visa_terms = ''
visa_terms_richtext = ''
card_data = ''
responce = requests.get(url=start_url, headers=headers)
html = responce.text
data = html[41:][:-2]
info = json.loads(data)['offersList']
return_data = []
for i in info:
merchant_name = i['merchantList'][0]['merchant'] # 首页商户名称
merchant_title = i['offerTitle'] # 首页商户标题
merchant_LOGO = i['merchantList'][0]['merchantImages'][0]['fileLocation'] if len(i['merchantList'][0]['merchantImages']) > 0 else '' # 第二页商户图片LOGO
slogan = i['offerShortDescription']['text'] if i['offerShortDescription']['text'] else '' #宣传语
hidden_url = i['redemptionUrl'] if i['redemptionUrl'] else '' #第二页隐藏url
publicity_figure = i['imageList'][0]['fileLocation'] if len(i['imageList'][0])>0 else '' # 宣传图
# 商户地址
if len(i['merchantList'][0]['merchantAddress'])>0:
data_lists = i['merchantList'][0]['merchantAddress']
address_data = []
for info in data_lists:
countryname1 = info['countryName'] # 国家名字
merchant_address_city1 = info['city'] # 城市
merchant_address_latitude1 = info['latitude']
merchant_address_longitude1 = info['longitude']
merchant_address1 = info['address1']
merchant_address1_2 = info['address2']
address_data.append([countryname1,merchant_address_city1,merchant_address_latitude1,merchant_address_longitude1,merchant_address1,merchant_address1_2])
# address_datas = json.dumps(address_data)
address_datas = str(address_data)
merchants_introduce = i['offerCopy']['text'] # 第二页商户介绍
# 第二页商户介绍
if i['offerCopy']['richText']:
merchants_introduce_richtext_html = i['offerCopy']['richText']
merchants_introduce_richtext_doc = PyQuery(merchants_introduce_richtext_html)
merchants_introduce_richtext = merchants_introduce_richtext_doc.text()
if i['merchantTerms']['text']:
merchants_terms = i['merchantTerms']['text']
if i['merchantTerms']['richText']:
merchants_terms_richtext_html = i['merchantTerms']['richText']
merchants_terms_richtext_doc = PyQuery(merchants_terms_richtext_html)
merchants_terms_richtext = merchants_terms_richtext_doc.text()
# visa 条款
if i['visaTerms']['text']:
visa_terms = i['visaTerms']['text']
if i['visaTerms']['richText']:
visa_terms_richtext_html = i['visaTerms']['richText']
visa_terms_richtext_doc = PyQuery(visa_terms_richtext_html)
visa_terms_richtext = visa_terms_richtext_doc.text()
if len(i['cardTypeList'][0]['value'])>0:
card_data_type = list()
for k in i['cardTypeList']:
card_data_type.append(k['value'])
# card_data = json.dumps(card_data_type)
card_data = str(card_data_type)
return_data.append([merchant_name, merchant_title, merchant_LOGO, slogan, hidden_url, publicity_figure, address_datas,
merchants_introduce, merchants_introduce_richtext,merchants_terms,merchants_terms_richtext, visa_terms,
visa_terms_richtext, card_data])
return return_data
def create_table_data(all_data):
wb = openpyxl.Workbook()
sheet = wb.create_sheet('info')
for i in all_data:
for j in i:
sheet.append(j)
wb.save('vias数据.xlsx')
def main():
headers = get_headers()
all_data = []
i = 1
while i <= 108:
start_url = 'https://offerswidget.visa.com/api/v1.1/jsonp/offersListing?' \
'userKey=85B1DA5A-B881-4044-A4CD-6F4E8C73D991&pageSize=6&pageNumber={i}' \
'&language=zh_cn&sortingOrder=desc&sortingField=programName,featured,lastModifiedDateTime&' \
'businessSegment=consumer&cardType=signature&sortProgramName=CN%20Premium,AP%20Premium,CN%20,HK%20,KR%20,TW%20,AP%20&' \
'jsonpCallback=jQuery34102814493958122888_1567388344910&format=json'.format(i=i)
# start_url = "https://offerswidget.visa.com/api/v1.1/jsonp/offersListing?userKey=85B1DA5A-B881-4044-A4CD-6F4E8C73D991&pageSize=6&pageNumber=9&language=zh_cn&sortingOrder=desc&sortingField=programName,featured,lastModifiedDateTime&businessSegment=consumer&cardType=signature&sortProgramName=CN%20Premium,AP%20Premium,CN%20,HK%20,KR%20,TW%20,AP%20&jsonpCallback=jQuery34102814493958122888_1567388344910&format=json"
print(f'抓取第{i}条')
print(start_url)
data_list = Visa_unionpay_equity_grab(start_url=start_url ,headers=headers)
all_data.append(data_list)
time.sleep(5)
i += 1
create_table_data(all_data = all_data)
if __name__ == '__main__':
main()
爬虫-抓取visa权益-保存到表格内
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.