Python 爬金十數據

話不多說,直接上能用的代碼 

import requests
import  datetime
import pymssql
import pymysql
from requests.adapters import HTTPAdapter


def conn():
    connect = pymysql.connect(host='', user='', password='', database='',charset='utf8')
    if connect:
        print("連接成功!")
    return connect
conn = conn()




##爬蟲獲取頁面數據
url = "https://flash-api.jin10.com/get_flash_list"
header = {
    "x-app-id": "SO1EJGmNgCtmpcPF",
    "x-version": "1.0.0",
}
queryParam = {
    "max_time": "2020-05-07 14:17:02",
    "channel": "-8200",
}

#循環爬取並插入數據:結束條件是爬不到數據爲止
totalCount = 0
Data = requests.get(url, queryParam, headers=header).json()['data']
length = len(Data)
while (length > 0):
    for i in range(length):
        try:
            id = Data[i]['id']
            time = Data[i]['time']
            create_time = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
            type = Data[i]['type']
            if type == 0:
                if len(Data[i]['data']) > 2:
                    pic = Data[i]['data']['pic']
                    content = Data[i]['data']['content']
                    title = Data[i]['data']['title']
                elif len(Data[i]['data']) == 1:
                    pic = None
                    content = Data[i]['data']['content']
                    title = None
                else:
                    pic = Data[i]['data']['pic']
                    content = Data[i]['data']['content']
                    title = None
                print(id, time, type, pic, content, title)
                try:

                    sql = "insert into  jin10_data(id,create_time,type,pic,content,title) values(%s,%s,%s,%s,%s,%s)"
                    cursor = conn.cursor()
                    cursor.execute(sql, (id, create_time, type, pic, content, title))
                    conn.commit()
                    cursor.close()
                except Exception as e:
                    print(e)
                    continue
        except Exception as e:
            print(e)
            continue

    totalCount += length

    # 修正下一個查詢時間
    queryParam['max_time'] = Data[length - 1]['time']
    print('next queryParam is', queryParam['max_time'])

    # 再請求一次數據
    try:
        s = requests.Session()
        s.mount('http://', HTTPAdapter(max_retries=3))
        s.mount('https://', HTTPAdapter(max_retries=3))
        Data = requests.get(url, queryParam,timeout=5, headers=header).json()['data']
        length = len(Data)
    except Exception as e:
        print(e)



print('all ok,totalCount is:', totalCount)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章