from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from pyquery import PyQuery as pq
import time
import random
# window上必須要有
browser = webdriver.Chrome(r"C:\Users\dell\AppData\Local\Google\Chrome\Application\chromedriver.exe")
# 將窗口設置爲最大窗口
browser.maximize_window()
# 將要爬取數據的關鍵字 KEYWORD 可以爲任何關鍵字
KEYWORD = "手機"
class VIP(object):
def search(self):
"""
獲取商品頁面信息
:param page: 當前頁碼數
:return:
"""
url = "https://www.vip.com/"
browser.get(url)
wait = WebDriverWait(browser, 5)
# 等待搜索框出現
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".c-search-input")))
# 等待點擊按鈕出現
# 另一種方法是模擬鍵盤enter
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".J-search-button")))
time.sleep(random.randint(1, 4))
# 清空搜索框原始內容
input.clear()
# 將關鍵字填寫進去
input.send_keys(KEYWORD)
time.sleep(random.randint(1, 4))
# 模擬認爲點擊事件
submit.click()
time.sleep(random.randint(1, 4))
# 跳轉頁面下拉到底
self.scroll()
# browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
# 這種下拉有一個缺點就是frame遮蓋時候不能下拉到最底部,出現點擊 < 時候出現超時異常
# 還有一種解決方法是將窗口最大化,可以避免遮蓋問題發生
time.sleep(random.randint(2, 4))
def scroll(self):
"""
針對下拉進行操作
:return:
"""
browser.execute_script("""
(function () {
var y = document.body.scrollTop;
var step = 100;
window.scroll(0, y);
function f() {
if (y < document.body.scrollHeight) {
y += step;
window.scroll(0, y);
setTimeout(f, 50);
}
else {
window.scroll(0, y);
document.title += "scroll-done";
}
}
setTimeout(f, 1000);
})();
""")
def getData(self):
"""
獲取頁面信息
:return:
"""
html = browser.page_source
doc = pq(html)
items = doc(".goods-list-item").items()
for index, item in enumerate(items):
product = {
"URL:": "http:{}".format(str(item(".goods-image a").attr("href"))),
"DISCOUNT_PRICE:": item(".inner-exclusive").text(),
"VIP_PRICE:": item(".goods-vipshop-wrap").text(),
"DISCOUNT:": item(".goods-discount-wrap").text().replace("\n", " "),
"TITLE:": item(".goods-title-info").text()
}
self.write(product)
print(index, product)
time.sleep(random.randint(1, 4))
def write(self, content):
"""
結果本地化保存
:param content:
:return:
"""
with open(r"vip_iPhone.csv", 'a+', encoding="utf-8") as file:
file.write(str(content) + "\n")
def nextPage(self):
"""
點擊 < 跳轉下一頁
:return:
"""
wait = WebDriverWait(browser, 5)
# 點擊下一頁按鈕
sumbit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".cat-paging-next")))
time.sleep(4)
sumbit.click()
# 當前URL是頁面跳轉後的URL
current_url = browser.current_url
print(current_url)
def execute(self):
# 先執行搜索操作
self.search()
# 跳轉頁面
self.getData()
# 獲取數據
# 19 是基於商品的總頁數來定的
for i in range(1, 19):
# 在跳轉下一頁
self.nextPage()
# 獲取數據
self.getData()
if __name__ == "__main__":
vip = VIP()
vip.execute()