爬蟲---簡單爬取雪球網並存入Mysql中

import json
import requests
import pymysql


class mysql_connect(object):
    # 初始化的構造函數
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1', user='root', password='123456', port=3306, database='py101')
        self.cursor = self.db.cursor()
    # 執行修改操作
    def mysql_do(self,sql):
        self.cursor.execute(sql)
        self.db.commit()

    # 結束函數
    def __del__(self):
        self.cursor.close()
        self.db.close()

#urllib 的相關操作
url ='https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'

def lei (num,max_id = None,count = None):
    #手動設置count
    count = 15
    #判斷url 裏 的 max _id
    if max_id is None:
        a = url.format(max_id=-1,count=10)
    else:
        a = url.format(max_id,count)

    #爬取市遇到錯誤,添加頭部headers
    headers = {
        # 'Accept' : '*/*',
        # 'Accept-Encoding' : 'gzip, deflate, br',
        # 'Accept-Language' : 'zh-CN,zh;q=0.9',
        # 'Connection' : 'keep-alive',
        'Cookie' : '_ga=GA1.2.196337452.1534315041; _gid=GA1.2.1615699811.1534315041; device_id=16c92affeb2505503c12d5d10895cdbf; aliyungf_tc=AQAAAAe7aEnlqgoAUhVFeemx5vOqGZRe; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; u=691534336127228; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534315043,1534336127; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534336127; _gat_gtag_UA_16079156_4=1',
        # 'Host' : 'xueqiu.com',
        # 'Referer' : 'https://xueqiu.com/',
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
        # 'X-Requested-With' : 'XMLHttpRequest',
    }
    #設置頁碼數
    if num<=3:
        # print('第%d頁:' %num)
        num+=1

        response = requests.get(a,headers=headers)

        res_dict = json.loads(response.text)

        list_list = res_dict['list']
        w = mysql_connect()
        # print(list_list)
        for list_item_dict in  list_list:
            data_str = list_item_dict['data']
            s = json.loads(data_str)
            # print(s['id'],s['title'],s['description'],s['target'])
            id_str = list_item_dict['id']
            # print(id_str)
            # print(type(s['id']),type(s['title']),type(s['description']),type(s['target']))
            sql = "insert into pyrhon10(ids,title,description,target) values('{}','{}','{}','{}');".format(s['id'],s['title'],s['description'],s['target'])
            print(type(sql))
            w.mysql_do(sql)
        # list_list[-1]['id']
        # aa =list_list[-1]['id']
        #最後一項的id
        # print(aa)
        # lei(num,max_id=aa,count=count)
#
if __name__ =='__main__':
    lei(num=1,max_id=-1,count=10)
    # mc = mysql_connect()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章