yangTime#False

from bs4 import BeautifulSoup
from selenium import webdriver
import time


def parseXPath(soupElement, currentPath=''):

    previousCount = len(soupElement.find_previous_siblings(soupElement.name))
    nextCount = len(soupElement.find_next_siblings(soupElement.name))

    if soupElement.name == 'html':
        return f'/html{currentPath}'

    if previousCount == 0 and nextCount == 0:
        return parseXPath(soupElement.parent, f'/{soupElement.name}{currentPath}')

    return parseXPath(soupElement.parent, f'/{soupElement.name}[{previousCount+1}]{currentPath}')



driver = webdriver.Chrome(executable_path="./chromedriver")
driver.get("http://baidu.com")

time.sleep(3)

html = driver.execute_script("return document.documentElement.outerHTML")

soup = BeautifulSoup(html, 'html.parser')

for element in soup.find_all('input'):
    print(parseXPath(element))

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章