爬蟲爬取國內肯德基門店信息

預期效果

爬取每個城市(300+城市)所有的肯德基門店信息,保存到excel中,效果如下:
在這裏插入圖片描述
在這裏插入圖片描述

代碼實現

import requests
import csv
import json

def getData(page,key):
    url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
    data={
    'cname':'',
    'pid':'',
    'keyword': key,
    'pageIndex': page,
    'pageSize': '10'
    }
    res=requests.post(url=url,data=data,headers=headers)
    json_data=res.json()
    print(json_data)
    count=json_data['Table'][0]['rowcount']
    for i in json_data['Table1']:
        storeName=i['storeName']
        cityName=i['cityName']
        addressDetail=i['addressDetail']
        pro=i['pro']
        print([storeName, cityName,addressDetail,pro])
        with open('kfc_store.csv', mode='a', newline='') as f:
            writer = csv.writer(f, delimiter=',')
            writer.writerow([key,storeName, cityName,addressDetail,pro])


if __name__=='__main__':
    # 從拉勾網上爬取全國各地城市,複製粘貼網頁內容到txt文件再,接口如下。
    # URL='https://www.lagou.com/lbs/getAllCitySearchLabels.json'
    f2 = open('ci.txt', encoding='utf-8')
    ts = f2.readline()
    print(ts)
    json_ts = json.loads(ts)
    cs = json_ts['content']['data']['allCitySearchLabels']
    place = []
    m = 0
    for c in cs:
        for i in cs[c]:
            m = m + 1
            place.append(i['name'])
    print(f'一共有{m}個城市')
    print(place)

    # place=['北京','上海','廣州','深圳']
    for key in place:
        url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
        data = {
            'cname': '',
            'pid': '',
            'keyword': key,
            'pageIndex': 1,
            'pageSize': '10'
        }
        res = requests.post(url=url, data=data, headers=headers)
        json_data = res.json()
        # print(json_data)
        count = json_data['Table'][0]['rowcount']
        c = count // 10 + 2
        for n in range(1,c):
            print(f'===========正在獲取{key}數據===========')
            print('===========正在獲取第{}頁數據==========='.format(n))
            getData(n,key)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章