import re
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
import csv
import time
完整代碼
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 10)
def search():
print('正在搜索')
try:
browser.get("https://www.jd.com/")
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#key'))
)
input.send_keys(keywords)
submit = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#search > div > div.form > button')))
time.sleep(3)
submit.click()
total = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#J_bottomPage > span.p-skip > em:nth-child(1)')))
return total.text
except TimeoutException:
return search()
def next_page(page_number):
print(f'正在翻第{page_number}頁')
try:
print('定位到跳轉頁數')
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#J_bottomPage > span.p-skip > input'))
)
print('定位到跳轉按鈕,確保可點擊')
submit = wait.until(EC.element_to_be_clickable(
(By.CSS_SELECTOR, '#J_bottomPage > span.p-skip > a')))
input.clear()
input.send_keys(page_number)
submit.click()
# time.sleep(2)
wait.until(EC.text_to_be_present_in_element(
(By.CSS_SELECTOR, '#J_bottomPage > span.p-num > a.curr'), str(page_number)))
get_products()
except TimeoutException:
next_page(page_number)
def get_products():
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#J_goodsList > ul')))
html = browser.page_source
doc = pq(html,parser="html")
items = doc('#J_goodsList .gl-item').items()
for item in items:
# print(item)
image="http:"+str(item('.gl-i-wrap .p-img a img').attr('src'))
price=item.find('.p-price').text()
title=item.find('.p-name').text()
title = title.strip('\n')
shop=item.find('div span a').text()
comment=item.find('.p-commit a').text()
product = [image,price,title,shop,comment]
# product = ["http:"+str(item('.gl-i-wrap .p-img a img').attr('src')), item.find('.p-price').text(), item.find('.p-name').text(), item.find('div span a').text(), item.find('.p-commit a').text()]
print(product)
writer.writerow(product)
def main():
keywords = input('請輸入關鍵字:')
total = search()
total = int(re.compile('(\d+)').search(total).group(1))
global f
f=open('result.csv',mode='w',encoding='gbk',newline='')
global writer
writer = csv.writer(f)
head = ['image', 'price', 'title', 'shop', 'comment']
writer.writerow(head)
# get_products()
for i in range(2, total + 1):
next_page(i)
f.close()
if __name__ == '__main__':
main()