基于selenium的动态网页Xpath测试工具

原創

李奇峰1998

2020-06-21 03:36

最近在搞一些Xpath网页规则的编写，发现网上的Xpath测试工具很多，但都是基于静态页面的。暂时还没有发现基于动态页面的Xpath测试工具，为了后续的测试方便，于是就自己动手写了一个

from tkinter import *
import tkinter as tk
import re
from lxml import etree
from selenium import webdriver

# 创建chrome无头浏览器
driver = ""
try:
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(chrome_options=chrome_options)
except:
    fire_profile = webdriver.FirefoxOptions()
    fire_profile.add_argument('--disable-gpu')  # 设置无头模式
    fire_profile.add_argument('-headless')  # 设置无头模式
    driver = webdriver.Firefox(options=fire_profile)
# 动态网页源码字段
source = ""
# 辅助字段
old_url = ""


def result_to_string(result):
    """
    将匹配到的网页内容输出
    :param result: 已经匹配好的网页内容
    :return:
    """
    if isinstance(result, list):
        out_str = ""
        out_html = ""
        for one in result:
            if isinstance(one, str):
                out_str = out_str + one + "\n"
                out_html = out_html + one + "\n"
            else:
                out_str = out_str + one.text + "\n"
                out_html = out_html + etree.tostring(one, pretty_print=True, encoding="utf-8").decode("utf-8") + "\n"
        return out_str.replace(" ", "").strip("\n"), out_html.replace(" ", "").strip("\n")
    elif isinstance(result, str):
        return result.replace(" ", ""), result.replace(" ", "")
    else:
        return "", ""


def jiazai():
    """
    加载动态网页源码
    :return:
    """
    global old_url, source
    url = url_text.get()
    if not (str(url).startswith("http://") or str(url).startswith("https://")):
        source_text.delete(1.0, 'end')
        source_text.insert("insert", "请检查是否添加http或https前缀！！！")
        return None
    if not str(url).__contains__("."):
        source_text.delete(1.0, 'end')
        source_text.insert("insert", "请输入正确格式的网址！！！")
        return None
    if url != "" and (source_text.get(1.0, 1.1) == "" or old_url != url):
        source_text.delete(1.0, 'end')
        old_url = url
        try:
            driver.get(url)
            source = driver.page_source
            if source == '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body></body></html>':
                source_text.insert("insert", "请输入真实的网址！！！")
            else:
                source_text.insert("insert", source)
        except Exception as e:
            source_text.insert("insert", "此网址无法解析，请输入其他的网址！！！")


def ceshi():
    """
    将匹配到的网页结果显示出来
    :return:
    """
    global source
    if source != "" and xpath_text.get() != "":
        html = etree.HTML(source)
        print(xpath_text.get())
        result = html.xpath(xpath_text.get())
        out_string, out_html = result_to_string(result)
        result_text.delete(1.0, "end")
        result_source_text.delete(1.0, "end")
        result_text.insert("insert", out_string)
        result_source_text.insert("insert", out_html)


window = tk.Tk()
window.title('动态网页XPATH验证工具    Designed by Mr.Li')
window.geometry('750x560')

# 框架列
url_frame = tk.Frame(window)
url_frame.pack()
xpath_frame = tk.Frame(window)
xpath_frame.pack()
result_frame = tk.Frame(window)
result_frame.pack()
html_frame = tk.Frame(window)
html_frame.pack()
source_frame = tk.Frame(window)
source_frame.pack()

# 待匹配网址输入列
url_label = tk.Label(url_frame, text='请输入网址：')
url_label.pack(side=LEFT)
url_text = tk.Entry(url_frame, show=None, width=45)
url_text.pack(side=LEFT)
button1var = StringVar()
button1var.set("加载网页")
url_button = tk.Button(url_frame, textvariable=button1var, width=10, command=jiazai)
url_button.pack(side=RIGHT)

# xpath规则输入列
xpath_label = tk.Label(xpath_frame, text='请输入规则：')
xpath_label.pack(side=LEFT)
xpath_text = tk.Entry(xpath_frame, show=None, width=45)
xpath_text.pack(side=LEFT)
xpath_button = tk.Button(xpath_frame, text="测试", width=10, command=ceshi)
xpath_button.pack(side=RIGHT)

# 结果列
result_label = tk.Label(result_frame, text='结果文字：')
result_label.pack(side=LEFT)
result_text = tk.Text(result_frame, show=None, height=9, width=80)
result_text.pack(side=LEFT)
result_source_label = tk.Label(html_frame, text='结果源码：')
result_source_label.pack(side=LEFT)
result_source_text = tk.Text(html_frame, show=None, height=9, width=80)
result_source_text.pack(side=LEFT)

# 源码列
source_label = tk.Label(source_frame, text='网页源码：')
source_label.pack(side=LEFT)
source_text = tk.Text(source_frame, height=17, width=80)
source_text.pack(side=LEFT)
window.mainloop()

下图为启动界面：

下图为运行结果：

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

基于selenium的动态网页Xpath测试工具

Python实现大麦网抢票的四大关键技术点解析

基於selenium的動態網頁Xpath測試工具

網站反爬方案分析

Windows下網頁連接VNC操作手冊

Kafka中數據通過SpringBoot-WebSocket進行實時數據可視化

Python根據dict動態創建mysql表並寫入數據

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結