爬蟲天貓超市失敗

# -*- coding: utf-8 -*-
"""
Created on Fri Dec  6 09:33:51 2019

@author: 18352
"""

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException
import time
import json
import os
import csv
s = time.time()

#class SaveCSV(object):
#    def save(self, keyword_list,path, item):
#        """
#        保存csv方法
#        :param keyword_list: 保存文件的字段或者說是表頭
#        :param path: 保存文件路徑和名字
#        :param item: 要保存的字典對象
#        :return:
#        """
#        try:
#            # 第一次打開文件時,第一行寫入表頭
#            if not os.path.exists(path):
#                with open(path, "w", newline='', encoding='gbk') as csvfile:  # newline='' 去除空白行
#                    writer = csv.DictWriter(csvfile, fieldnames=keyword_list)  # 寫字典的方法
#                    writer.writeheader()  # 寫表頭的方法
#
#            # 接下來追加寫入內容
#            with open(path, "a", newline='', encoding='gbk') as csvfile:  # newline='' 一定要寫,否則寫入數據有空白行
#                writer = csv.DictWriter(csvfile, fieldnames=keyword_list)
#                writer.writerow(item)  # 按行寫入數據
#                print("^_^ write success")
#
#        except Exception as e:
#            print("write error==>", e)
#            # 記錄錯誤數據
#            with open("error.txt", "w") as f:
#                f.write(json.dumps(item) + ",\n")
#            pass
#
#item_list = [
#            "title",
#            "prcie",
#            "sales",
#            "評價"
#            ]
#t0 = time.strftime("%Y%m%d%H%M")
#path = ".\爬蟲結果\銷量排行&{}.csv".format(t0)

browser = webdriver.Chrome()
#browser = webdriver.PhantomJS()
wait = WebDriverWait(browser,10)

browser.get('https://chaoshi.tmall.com')
#先獲取cookie
time.sleep(30)
#print(browser.get_cookies())

=
#
for i in cookies:  
   if 'expiry' in i:
       del i['expiry']
##    print(i) 
   browser.add_cookie(i)
#
#
#
# 
#input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mq')))
#input.send_keys('大米')
##
#submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#mallSearch > form > fieldset > div > button')))
#submit.click()

tmall = browser.current_window_handle


def list_click(i):
    try:

        browser.switch_to.window(tmall) 
        to1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ProductList > li:nth-child({}) > div > div.product-img > a'.format(i+1))))
                                                     #J_ProductList > li:nth-child(20) > div > h3 > a  #J_ProductList > li:nth-child(20) > div > div.product-img > a
        to1.click()
        handles = browser.window_handles

        list_ = None
        for handle in handles:
            if handle != tmall:
                list_ = handle
        
        browser.switch_to.window(list_) 
        
        try:
            def pick():
                title = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > div.tb-detail-hd > h1')))
                print(title.text)
                price = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_PromoPrice > dd > div > span')))
                print(price.text)
                num = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_DetailMeta > div.tm-clear > div.tb-property > div > ul > li.tm-ind-item.tm-ind-sellCount > div > span.tm-count')))
                print(num.text)
                pkjx = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_ItemRates > div > span.tm-count')))
                print(pkjx.text)
        except TimeoutException:
            return pick()
        pick()  
        browser.close()
        browser.switch_to.window(tmall)
        
    except TimeoutException:
        browser.switch_to.window(tmall)
        pass
for i in range(18,22): 
    

    list_click(i)
    
e = time.time()
print("用時{}".format(e-s))
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章