以前我總是想着自己寫代碼,對那些只知道複製用別人代碼的嗤之以鼻,後來幾次查到現成的直接複製粘貼拿來用 ,握草,那感覺:
import requests
import json
import time
import re
import pymssql
import pymysql
import datetime
import random
from requests.adapters import HTTPAdapter
def conn():
connect = pymysql.connect(host='', user='', password='', database='',charset='utf8')
if connect:
print("連接成功!")
return connect
def get_json_data(base_url,headers):
s = requests.Session()
s.mount('http://', HTTPAdapter(max_retries=3))
s.mount('https://', HTTPAdapter(max_retries=3))
print(time.strftime('%Y-%m-%d %H:%M:%S'))
try:
response = requests.get(base_url, timeout=5, headers=headers)
html = response.text
# print(html)
html_cl = html[12:-14]
false = False
true = True
null = None
html_json = eval(html_cl)
json_str = json.dumps(html_json)
results = json.loads(json_str)
data = results['result']['data']['feed']['list']
except Exception as e:
print('get_json_str未收錄錯誤類型,請檢查網絡通斷,錯誤位置:',e)
time.sleep(5)
get_json_data(base_url, headers)
else:
return data
conn = conn()
page = 0
while True:
try:
page+=1
print(page)
referer_url = "http://finance.sina.com.cn/7x24/?tag=0"
cookie = "UOR=www.baidu.com,tech.sina.com.cn,; SINAGLOBAL=114.84.181.236_1579684610.152568; UM_distinctid=16fcc8a8b704c8-0a1d2def9ca4c6-33365a06-15f900-16fcc8a8b718f1; lxlrttp=1578733570; gr_user_id=2736e487-ee25-4d52-a1eb-c232ac3d58d6; grwng_uid=d762fe92-912b-4ea8-9a24-127a43143ebf; __gads=ID=d79f786106eb99a1:T=1582016329:S=ALNI_MZoErH_0nNZiM3D4E36pqMrbHHOZA; Apache=114.84.181.236_1582267433.457262; ULV=1582626620968:6:4:1:114.84.181.236_1582267433.457262:1582164462661; ZHIBO-SINA-COM-CN=; SUB=_2AkMpBPEzf8NxqwJRmfoWz2_ga4R2zQzEieKfWADoJRMyHRl-yD92qm05tRB6AoTf3EaJ7Bg2UU4l1CDZXUBCzEuJv3mP; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WhqhhGsPWdPjar0R99pFT8s"
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Cookie": cookie,
"Host": "zhibo.sina.com.cn",
"Referer": referer_url,
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36"
}
base_url = "http://zhibo.sina.com.cn/api/zhibo/feed?callback=jQuery0&page=%s"%page+"&page_size=20&zhibo_id=152&tag_id=0&dire=f&dpc=1&pagesize=20&_=0%20Request%20Method:GET%27"
data = get_json_data(base_url,headers)
for i in data:
id = i['id']
create_time = i['create_time']
rich_text = i['rich_text']
# print(id, create_time, rich_text)
new_time = datetime.datetime.strptime(create_time, "%Y-%m-%d %H:%M:%S")
judge_time = create_time.replace('-','')[0:8]
cursor = conn.cursor()
sql_id = "SELECT id from sina_data"
find_id = cursor.execute(sql_id)
find_id_num = cursor.fetchall()
find_id1 = [x[0] for x in find_id_num]
if id not in find_id1:
print(id, create_time, rich_text)
try:
sql = "insert into sina_data(id,create_time,rich_text) values(%s,%s,%s)"
cursor.execute(sql, (id, new_time, rich_text))
conn.commit()
cursor.close()
except Exception as e:
print(e)
continue
time.sleep(random.randint(1,3))
except Exception as e:
print(e)
continue