import json
import requests
import pymysql
class mysql_connect(object):
# 初始化的構造函數
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1', user='root', password='123456', port=3306, database='py101')
self.cursor = self.db.cursor()
# 執行修改操作
def mysql_do(self,sql):
self.cursor.execute(sql)
self.db.commit()
# 結束函數
def __del__(self):
self.cursor.close()
self.db.close()
#urllib 的相關操作
url ='https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'
def lei (num,max_id = None,count = None):
#手動設置count
count = 15
#判斷url 裏 的 max _id
if max_id is None:
a = url.format(max_id=-1,count=10)
else:
a = url.format(max_id,count)
#爬取市遇到錯誤,添加頭部headers
headers = {
# 'Accept' : '*/*',
# 'Accept-Encoding' : 'gzip, deflate, br',
# 'Accept-Language' : 'zh-CN,zh;q=0.9',
# 'Connection' : 'keep-alive',
'Cookie' : '_ga=GA1.2.196337452.1534315041; _gid=GA1.2.1615699811.1534315041; device_id=16c92affeb2505503c12d5d10895cdbf; aliyungf_tc=AQAAAAe7aEnlqgoAUhVFeemx5vOqGZRe; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; u=691534336127228; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534315043,1534336127; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534336127; _gat_gtag_UA_16079156_4=1',
# 'Host' : 'xueqiu.com',
# 'Referer' : 'https://xueqiu.com/',
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
# 'X-Requested-With' : 'XMLHttpRequest',
}
#設置頁碼數
if num<=3:
# print('第%d頁:' %num)
num+=1
response = requests.get(a,headers=headers)
res_dict = json.loads(response.text)
list_list = res_dict['list']
w = mysql_connect()
# print(list_list)
for list_item_dict in list_list:
data_str = list_item_dict['data']
s = json.loads(data_str)
# print(s['id'],s['title'],s['description'],s['target'])
id_str = list_item_dict['id']
# print(id_str)
# print(type(s['id']),type(s['title']),type(s['description']),type(s['target']))
sql = "insert into pyrhon10(ids,title,description,target) values('{}','{}','{}','{}');".format(s['id'],s['title'],s['description'],s['target'])
print(type(sql))
w.mysql_do(sql)
# list_list[-1]['id']
# aa =list_list[-1]['id']
#最後一項的id
# print(aa)
# lei(num,max_id=aa,count=count)
#
if __name__ =='__main__':
lei(num=1,max_id=-1,count=10)
# mc = mysql_connect()
爬蟲---簡單爬取雪球網並存入Mysql中
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.