安裝selenium
pip3 install selenium # 查看是否安裝成功 # 進入到python環境中 import selenium print(selenium.__version__)
網頁元素定位
# 通過屬性id和name來實現定位 find_element_by_id() find_element_by_name() # 如果多個元素的id和name相同的話只會定位到第一個元素 # 通過HTML標籤類型,和屬性class來實現定位 find_element_by_class_name() fine_element_by_tag_name() # 只能定位到符合條件的第一個元素 # 通過標籤的值實現定位,partial_link用於模糊匹配 find_element_by_link_text() find_element_by_partial_link_text() # 如果網頁中的文字不是唯一,那麼也只會定位到第一個元素 # 元素的路徑定位選擇器 find_element_by_xpath() find_element_by_css_selector()
# 如果有多個相同元素,有想要同時獲取 find_elements_by_id() find_elements_by_name() find_elements_by_class_name() find_elements_by_tag_name() find_elements_by_link_text() find_elements_by_partial_link_text() find_elements_by_xpath() find_elements_by_css_selector()
節點交互:
import time browser = webdriver.Chrome() browser.get('https://www.taobao.com') input = browser.find_element_by_id('q') # 輸入文字用send_keys() input.send_keys('ipone') time.sleep(1) #清空文字用clear() input.clear() input.send_keys('ipad') button = browser.find_element_by_class_name('btn-search') #點擊 button.click()
動作鏈
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') #找到被拖拽的標籤 source = browser.find_element_by_css_selector('#draggable') #找到拖拽目的地的標籤 target = browser.find_element_by_css_selector('#droppable') actions = ActionChains(browser) actions.drag_and_drop(source,target) actions.perform()
執行js
例如下拉進度條,可以直接模擬運行JavaScript,適用execute_script()
即可實現
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') browser.execute_script('alert("To Bottom")')
獲取節點信息
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') logo = browser.find_element_by_id('zh-top-link-logo') print(logo) # 獲取class屬性 print(logo.get_attribute('class'))
獲取文本值
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.text)
輸出id、位置、標籤名、大小
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.id) # 輸出位置 print(input.location) #標籤名 print(input.tag_name) #大小 print(input.size)
界面切換
from selenium import webdriver from selenium.common.exceptions import NoSuchElementException browser = webdriver.Chrome() browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') #界面切換到子界面 browser.switch_to.frame('iframeResult') try: # 查找logo logo = browser.find_element_by_class_name('logo') except NoSuchElementException: print('NO LOGO') # 界面切換到父級界面 browser.switch_to.parent_frame() # 查找logo logo = browser.find_element_by_class_name('logo') print(logo) print(logo.text)
延時等待(隱式等待)
browser = webdriver.Chrome() browser.implicitly_wait(10) browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input)
顯式等待
browser = webdriver.Chrome() browser.get('https://www.taobao.com/') wait = WebDriverWait(browser,10) input = wait.until(EC.presence_of_element_located((By.ID,'q'))) # 節點可點擊 button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search'))) print(input,button)
Cookies
browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') # 獲取cookies print(browser.get_cookies()) # 添加cookie browser.add_cookie({'name':'name','domin':'www.zhihu.com','value':'germey'}) print(browser.get_cookies()) # 刪除所有的cookies browser.delete_all_cookies() print(browser.get_cookies())