import time
import json
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
class Test:
url = 'http://www.test.com/hello'
options = webdriver.ChromeOptions()
# 不加載圖片,加快訪問速度
options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
# 此步驟很重要,設置爲開發者模式,防止被各大網站識別出來使用了Selenium
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 添加本地代理
# options.add_argument("--proxy--server=127.0.0.1:8080")
# 添加UA
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
# ua = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)'
# ua = 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)'
options.add_argument('user-agent=' + ua)
driver = webdriver.Chrome(options=options)
driver.maximize_window()
wait = WebDriverWait(driver, 10)
driver.get(url)
time.sleep(3)
# driver.refresh()
result = []
items = driver.find_elements_by_css_selector('div.ex-item')
for item in items:
name = item.find_element_by_css_selector('div.ex-item-bottom div.title').text
print(name)
address = item.find_element_by_css_selector('div.ex-item-bottom p.f-toe').text
print(address)
exhi_time = item.find_element_by_css_selector('div.ex-item-bottom p.item-line').text
print(exhi_time)
image = item.find_element_by_css_selector('div.ex-item-top img').get_attribute('src')
print(image)
one = {}
one['name'] = name
one['address'] = address
one['exhi_time'] = exhi_time
one['image'] = image
result.append(one)
with open('d:\\data.json', 'w', encoding='utf-8') as file:
file.write(json.dumps(result, indent=2, ensure_ascii=False))
time.sleep(10)
# driver.close() # 關閉瀏覽器
輸出json格式的文件內容示例如下:
[
{
"name": "2020年上海國際展",
"address": "上海世博展覽館",
"exhi_time": "2020/03/24~03/26",
"image": "https://show.test.com/show/imgs/202003/61a840a1373f45122d4e.jpg"
},
{
"name": "中國國際產業展覽會",
"address": "上海市徐彙區漕寶路88號",
"exhi_time": "2020/04/10~04/12",
"image": "https://show.test.com/show/imgs/202003/289f27cb5513fad11.jpg"
}
]
本文內容到此結束。