1. 網址
http://fund.jd.com/
2. 源碼參考
# @Time : 2020/3/18 19:17
# @Author : GKL
# FileName : spider.py
# Software : PyCharm
import re
import time
import json
import requests
import threading
offset = 1
def get_data(url):
global offset
while offset < 618:
response = requests.get(url).text
data_list = re.findall(r'jQuery.*?\((.*)\)', response)[0]
for data in json.loads(data_list)['items']:
print(data['itemName'])
with open('data.json', 'a', encoding='utf-8') as f:
f.write(json.dumps(data, ensure_ascii=False) + '\n')
offset += 1
next_url = 'http://fund.jd.com/getLeftTab?callback=jQuery18301965720891993532_1584529336998&sortType=1&page={}'.format(offset)
get_data(next_url)
if __name__ == '__main__':
for _ in range(1, 6):
t = threading.Thread(target=get_data, args=('http://fund.jd.com/getLeftTab?callback=jQuery18301965720891993532_1584529336998&sortType=1&page={}'.format(_), ))
t.start()