爬蟲-抓取visa權益-保存到表格內


import requests
import json
import time
import openpyxl
from pyquery import PyQuery

def get_headers():
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    }
    return headers

def Visa_unionpay_equity_grab(start_url, headers):
    merchant_name = ''    # 首頁商戶名稱
    merchant_title = ''   # 首頁商戶標題
    merchant_LOGO = ''     # 商戶LOGO
    address_datas = ''
    slogan = ''   # 第二頁宣傳語
    hidden_url = ''  # 隱藏地址
    publicity_figure = '' # 宣傳圖
    merchants_introduce = '' # 商戶介紹
    merchants_introduce_richtext = '' # rick商戶介紹
    merchants_terms = ''  #商戶條款
    merchants_terms_richtext = ''
    visa_terms = ''
    visa_terms_richtext = ''
    card_data = ''
    responce = requests.get(url=start_url, headers=headers)
    html = responce.text
    data = html[41:][:-2]
    info = json.loads(data)['offersList']
    return_data = []
    for i in info:
        merchant_name = i['merchantList'][0]['merchant'] # 首頁商戶名稱
        merchant_title = i['offerTitle']  # 首頁商戶標題
        merchant_LOGO = i['merchantList'][0]['merchantImages'][0]['fileLocation'] if len(i['merchantList'][0]['merchantImages']) > 0 else '' # 第二頁商戶圖片LOGO
        slogan = i['offerShortDescription']['text'] if i['offerShortDescription']['text'] else ''  #宣傳語
        hidden_url = i['redemptionUrl'] if i['redemptionUrl'] else ''   #第二頁隱藏url
        publicity_figure = i['imageList'][0]['fileLocation'] if len(i['imageList'][0])>0 else '' # 宣傳圖
        # 商戶地址
        if len(i['merchantList'][0]['merchantAddress'])>0:
            data_lists = i['merchantList'][0]['merchantAddress']
            address_data = []
            for info in   data_lists:
                countryname1 = info['countryName']  # 國家名字
                merchant_address_city1 = info['city']  # 城市
                merchant_address_latitude1 = info['latitude']
                merchant_address_longitude1 = info['longitude']
                merchant_address1 = info['address1']
                merchant_address1_2 = info['address2']
                address_data.append([countryname1,merchant_address_city1,merchant_address_latitude1,merchant_address_longitude1,merchant_address1,merchant_address1_2])
            # address_datas = json.dumps(address_data)
            address_datas = str(address_data)
        merchants_introduce = i['offerCopy']['text']  # 第二頁商戶介紹
        # 第二頁商戶介紹
        if i['offerCopy']['richText']:
            merchants_introduce_richtext_html = i['offerCopy']['richText']
            merchants_introduce_richtext_doc = PyQuery(merchants_introduce_richtext_html)
            merchants_introduce_richtext = merchants_introduce_richtext_doc.text()
        if i['merchantTerms']['text']:
            merchants_terms = i['merchantTerms']['text']
        if  i['merchantTerms']['richText']:
            merchants_terms_richtext_html = i['merchantTerms']['richText']
            merchants_terms_richtext_doc = PyQuery(merchants_terms_richtext_html)
            merchants_terms_richtext = merchants_terms_richtext_doc.text()
        # visa 條款
        if i['visaTerms']['text']:
            visa_terms = i['visaTerms']['text']
        if i['visaTerms']['richText']:
            visa_terms_richtext_html = i['visaTerms']['richText']
            visa_terms_richtext_doc = PyQuery(visa_terms_richtext_html)
            visa_terms_richtext = visa_terms_richtext_doc.text()
        if len(i['cardTypeList'][0]['value'])>0:
            card_data_type = list()
            for k in  i['cardTypeList']:
                card_data_type.append(k['value'])
            # card_data = json.dumps(card_data_type)
            card_data = str(card_data_type)
        return_data.append([merchant_name, merchant_title, merchant_LOGO, slogan, hidden_url, publicity_figure, address_datas,
                            merchants_introduce, merchants_introduce_richtext,merchants_terms,merchants_terms_richtext, visa_terms,
                            visa_terms_richtext, card_data])
    return return_data

def create_table_data(all_data):
    wb = openpyxl.Workbook()
    sheet = wb.create_sheet('info')
    for i in all_data:
        for j  in  i:
            sheet.append(j)

    wb.save('vias數據.xlsx')
def main():
    headers = get_headers()
    all_data = []
    i = 1
    while i <= 108:
        start_url = 'https://offerswidget.visa.com/api/v1.1/jsonp/offersListing?' \
              'userKey=85B1DA5A-B881-4044-A4CD-6F4E8C73D991&pageSize=6&pageNumber={i}' \
              '&language=zh_cn&sortingOrder=desc&sortingField=programName,featured,lastModifiedDateTime&' \
              'businessSegment=consumer&cardType=signature&sortProgramName=CN%20Premium,AP%20Premium,CN%20,HK%20,KR%20,TW%20,AP%20&' \
              'jsonpCallback=jQuery34102814493958122888_1567388344910&format=json'.format(i=i)
        # start_url = "https://offerswidget.visa.com/api/v1.1/jsonp/offersListing?userKey=85B1DA5A-B881-4044-A4CD-6F4E8C73D991&pageSize=6&pageNumber=9&language=zh_cn&sortingOrder=desc&sortingField=programName,featured,lastModifiedDateTime&businessSegment=consumer&cardType=signature&sortProgramName=CN%20Premium,AP%20Premium,CN%20,HK%20,KR%20,TW%20,AP%20&jsonpCallback=jQuery34102814493958122888_1567388344910&format=json"
        print(f'抓取第{i}條')
        print(start_url)
        data_list = Visa_unionpay_equity_grab(start_url=start_url  ,headers=headers)
        all_data.append(data_list)
        time.sleep(5)
        i += 1
    create_table_data(all_data = all_data)
if __name__ == '__main__':
    main()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章