模板4之統計淘寶商品銷量(正則表達式、JSON、遞歸)

import requests
import re
import openpyxl

def open_url(keyword):
	payload = {'q':"關鍵字參數","sort":"關鍵字參數"}
	url = "網址url"
	headers = {
		"user-agent": "F12可獲得"
		}
	res = requests.get(url,params=payload,headers=headers)
	
	return res

def get_space_end(level):
	return ' ' * level + '-'
	
def get_space_expand(level):
	return ' ' * level + '+'
	
def find_keys(targets,level):
	keys = iter(targrts)
	for each in keys:
		if type(targets[each] is not dict:
			print(get_space_end(level) + each)
		else:
			next_level = level + 1
			print(get_space_expand(level) + each)
			find_keys(targets[each],next_level)
			
#獲得所有商品
def get_items(res):
	g_page_config = re.search(r"g_page_config = (.*?);\n",res.text)
	page_config_json = json.loads(g_page_config .group(1))
	#find_keys(page_config_json,1) # 獲得層次結構
	page_items = page_config_json ['mods']['itemlist']['data']['auction']
	
	results = [] # 整理出我們關注的信息(ID、標題、鏈接、售價、銷量、商家)
	for each_item in page_items:
		dict1 = dict.formkeys(('nid','title','detail_url','view_price','view_sales','nick'))
		dict1['nid'] = each_item['nid']
		dict1['title'] = each_item['title']
		dict1['view_price'] = each_item['view_price']
		dict1['view_sales'] = each_item['view_sales']
		dict1['nick'] = each_item['nick']
		results.append(dict1)
	
	return results

def results_toexcel(results):
	wb = openpyxl.Workbook()
	wb.guess_types = True
	ws = wb.active
	ws.append(results[0].keys())
	for each in results:
		ws.append(each)

def main():
	url = "網站網址"
	res = open_url(url)
	results = get_items(res)
	results_toexcel(results)

main()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章