在使用selenium 之前先要下载浏览器驱动
下载 chrome 驱动
python 解压缩 zip
def un_zip(file_name, to_dir='./'):
"""unzip zip file"""
zip_file = zipfile.ZipFile(file_name)
if os.path.isdir(to_dir):
pass
else:
os.mkdir(to_dir)
for names in zip_file.namelist():
zip_file.extract(names, to_dir)
zip_file.close()
requests 下载网络文件
def download_driver(to_dir='./'):
print('install chrome-driver first')
url = 'http://npm.taobao.org/mirrors/chromedriver/LATEST_RELEASE'
version = requests.get(url).content.decode('utf8')
driver_file = 'http://npm.taobao.org/mirrors/chromedriver/' + version + '/chromedriver_win32.zip'
r = requests.get(driver_file)
download_zip = "chromedriver_win32.zip"
with open(download_zip, "wb") as code:
code.write(r.content)
un_zip(download_zip, to_dir)
os.remove(download_zip)
使用 selenium 访问百度图片
自动打开浏览器,模拟手工搜索行为,间歇性滚动鼠标
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
import requests
import zipfile
import os
#初始化一个浏览器(如:谷歌,使用Chrome需安装chromedriver)
try:
driver = webdriver.Chrome()
except Exception as e:
download_driver(to_dir='./')
driver = webdriver.Chrome()
try:
#请求网页
driver.get("http://image.baidu.com/")
#查找id值为kw的节点对象(搜索输入框)
input = driver.find_element_by_id("kw")
#模拟键盘输入字串内容
input.send_keys("街拍")
#模拟键盘点击回车键
input.send_keys(Keys.ENTER)
#显式等待,最长10秒
wait = WebDriverWait(driver,10)
#等待条件:10秒内必须有个id属性值为imgContainer的节点加载出来,否则抛异常。
wait.until(EC.presence_of_element_located((By.ID,'imgContainer')))
# 输出响应信息
#print(driver.current_url) #请求url地址
#print(driver.get_cookies())
#print(driver.page_source) #获取网页内容
#
#将页面滚动条拖到底部
js="var q=document.documentElement.scrollTop=100000"
while(True):
driver.execute_script(js)
time.sleep(4)
except Exception as err:
print(err)
finally:
#关闭浏览器
driver.close()
pass