# -*- coding: utf-8 -*-
"""
Created on Fri Dec 6 09:33:51 2019
@author: 18352
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException
import time
import json
import os
import csv
s = time.time()
#class SaveCSV(object):
# def save(self, keyword_list,path, item):
# """
# 保存csv方法
# :param keyword_list: 保存文件的字段或者說是表頭
# :param path: 保存文件路徑和名字
# :param item: 要保存的字典對象
# :return:
# """
# try:
# # 第一次打開文件時,第一行寫入表頭
# if not os.path.exists(path):
# with open(path, "w", newline='', encoding='gbk') as csvfile: # newline='' 去除空白行
# writer = csv.DictWriter(csvfile, fieldnames=keyword_list) # 寫字典的方法
# writer.writeheader() # 寫表頭的方法
#
# # 接下來追加寫入內容
# with open(path, "a", newline='', encoding='gbk') as csvfile: # newline='' 一定要寫,否則寫入數據有空白行
# writer = csv.DictWriter(csvfile, fieldnames=keyword_list)
# writer.writerow(item) # 按行寫入數據
# print("^_^ write success")
#
# except Exception as e:
# print("write error==>", e)
# # 記錄錯誤數據
# with open("error.txt", "w") as f:
# f.write(json.dumps(item) + ",\n")
# pass
#
#item_list = [
# "title",
# "prcie",
# "sales",
# "評價"
# ]
#t0 = time.strftime("%Y%m%d%H%M")
#path = ".\爬蟲結果\銷量排行&{}.csv".format(t0)
browser = webdriver.Chrome()
#browser = webdriver.PhantomJS()
wait = WebDriverWait(browser,10)
browser.get('https://chaoshi.tmall.com')
#先獲取cookie
time.sleep(30)
#print(browser.get_cookies())
=
#
for i in cookies:
if 'expiry' in i:
del i['expiry']
## print(i)
browser.add_cookie(i)
#
#
#
#
#input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mq')))
#input.send_keys('大米')
##
#submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#mallSearch > form > fieldset > div > button')))
#submit.click()
tmall = browser.current_window_handle
def list_click(i):
try:
browser.switch_to.window(tmall)
to1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ProductList > li:nth-child({}) > div > div.product-img > a'.format(i+1))))
#J_ProductList > li:nth-child(20) > div > h3 > a #J_ProductList > li:nth-child(20) > div > div.product-img > a
to1.click()
handles = browser.window_handles
list_ = None
for handle in handles:
if handle != tmall:
list_ = handle
browser.switch_to.window(list_)
try:
def pick():
title = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > div.tb-detail-hd > h1')))
print(title.text)
price = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_PromoPrice > dd > div > span')))
print(price.text)
num = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > ul > li.tm-ind-item.tm-ind-sellCount > div > span.tm-count')))
print(num.text)
pkjx = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ItemRates > div > span.tm-count')))
print(pkjx.text)
except TimeoutException:
return pick()
pick()
browser.close()
browser.switch_to.window(tmall)
except TimeoutException:
browser.switch_to.window(tmall)
pass
for i in range(18,22):
list_click(i)
e = time.time()
print("用時{}".format(e-s))
爬蟲天貓超市失敗
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.