from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from random import randint
from pyquery import PyQuery as pq
import pymongo
class JD(object):
def __init__(self):
self.options = webdriver.ChromeOptions()
# 無痕模式
self.options.add_argument("headless")
self.browser = webdriver.Chrome(
executable_path=r"C:\Users\dell\AppData\Local\Google\Chrome\Application\chromedriver.exe",
chrome_options=self.options
)
# 瀏覽器最大化
self.browser.maximize_window()
self.keyword = "iphone"
self.url = "https://www.jd.com/?cu=true&utm_source=baidu-search&utm_medium=cpc&utm_campaign=t_262767352_baidusearch&utm_term=106807362512_0_1ea216375c8242409e3b4487043f782b"
def scroll(self):
self.browser.execute_script("""
(function () {
var y = document.body.scrollTop;
var step = 100;
window.scroll(0, y);
function f() {
if (y < document.body.scrollHeight) {
y += step;
window.scroll(0, y);
setTimeout(f, 50);
}
else {
window.scroll(0, y);
document.title += "scroll-done";
}
}
setTimeout(f, 1000);
})();
""")
def skip(self):
"""
主頁面跳轉到detail界面
:return:
"""
self.browser.get(self.url)
wait = WebDriverWait(self.browser, 5)
# 等待輸入框出現
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#key')))
# 等待搜索按鈕出現
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".button")))
time.sleep(2)
# 清空出現的關鍵字
input.clear()
# 寫入我們需要的關鍵字
input.send_keys(self.keyword)
time.sleep(2)
# 模擬點擊
submit.click()
# 跳轉到商品信息界面
time.sleep(randint(1, 3))
# 頁面下拉
self.scroll()
time.sleep(5)
html = self.browser.page_source
doc = pq(html)
# 獲取商品的總頁數
count = doc(".p-skip b").text()
return int(count)
def getData(self):
self.browser.get(
"https://search.jd.com/Search?keyword=iphone&enc=utf-8&suggest=1.def.0.V16--12s0,20s0,38s0,97s0&wq=ip&pvid=652942ef6a96487c830988faa7a2d8e6")
html = self.browser.page_source
doc = pq(html)
items = doc(".gl-item").items()
for index, item in enumerate(items):
product = {
"href": "https:{}".format(str(item(".p-img a").attr("href"))),
"title": str(item(".p-img a").attr("title")).replace("\n", " "),
"price": item(".p-price").text(),
"name": str(item(".p-name em").text()).replace("\n", " "),
"commit": item(".p-commit").text(),
"shop": item(".p-shop a").text() + " " + "https:" + item(".p-shop a").attr("href"),
"icons": item(".p-icons").text(),
"stock": item(".p-stock").text()
}
time.sleep(randint(1, 2))
print(index, product)
self.write(product)
self.save_to_mongo(product)
def next(self):
wait = WebDriverWait(self.browser, 3)
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".pn-next")))
time.sleep(3)
submit.click()
print(self.browser.current_url)
time.sleep(2)
self.scroll()
def write(self, content):
with open(r"G:\個人總結\csv\jd_iphone.csv", 'a+', encoding="utf-8") as file:
file.write(str(content) + "\n")
def save_to_mongo(self, result):
MONGO_URL = "localhost"
MONGO_DB = "JingDong"
MONGO_COLLECTION = "products"
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]
try:
db[MONGO_COLLECTION].insert_one(result)
except Exception:
print("存儲到MongoDB失敗")
def execute(self):
count = self.skip()
for i in range(count):
print("第{}頁".format(i + 1))
self.getData()
self.next()
if __name__ == "__main__":
JD().execute()
python 爬京東商品信息
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.