import requests
import re
import openpyxl
def open_url(keyword):
payload = {'q':"關鍵字參數","sort":"關鍵字參數"}
url = "網址url"
headers = {
"user-agent": "F12可獲得"
}
res = requests.get(url,params=payload,headers=headers)
return res
def get_space_end(level):
return ' ' * level + '-'
def get_space_expand(level):
return ' ' * level + '+'
def find_keys(targets,level):
keys = iter(targrts)
for each in keys:
if type(targets[each] is not dict:
print(get_space_end(level) + each)
else:
next_level = level + 1
print(get_space_expand(level) + each)
find_keys(targets[each],next_level)
#獲得所有商品
def get_items(res):
g_page_config = re.search(r"g_page_config = (.*?);\n",res.text)
page_config_json = json.loads(g_page_config .group(1))
#find_keys(page_config_json,1) # 獲得層次結構
page_items = page_config_json ['mods']['itemlist']['data']['auction']
results = [] # 整理出我們關注的信息(ID、標題、鏈接、售價、銷量、商家)
for each_item in page_items:
dict1 = dict.formkeys(('nid','title','detail_url','view_price','view_sales','nick'))
dict1['nid'] = each_item['nid']
dict1['title'] = each_item['title']
dict1['view_price'] = each_item['view_price']
dict1['view_sales'] = each_item['view_sales']
dict1['nick'] = each_item['nick']
results.append(dict1)
return results
def results_toexcel(results):
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(results[0].keys())
for each in results:
ws.append(each)
def main():
url = "網站網址"
res = open_url(url)
results = get_items(res)
results_toexcel(results)
main()
模板4之統計淘寶商品銷量(正則表達式、JSON、遞歸)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.