(五)Python3 pyqt5網頁截圖(長圖)的方法和兼容、未完善問題

廢話不說，直接上代碼:

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# Author:Leslie-x
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path

class ScreenShotMerge():
    def __init__(self, page, over_flow_size,url):
        self.im_list = []
        self.page = page
        self.over_flow_size = over_flow_size
        self.get_path(url)

    def get_path(self,url):
        self.root_path = Path(__file__).parent.joinpath(r'pictures')
        if not self.root_path.exists():
            self.root_path.mkdir(parents=True)
        self.save_path = self.root_path.joinpath(url.replace("?",'/').replace("%","/").split('/')[-1]+"save.png")
        #print(self.save_path)
        
    def add_im(self, path):
        if len(self.im_list) == self.page:
            im = self.reedit_image(path)
        else:
            im = Image.open(path)
        if(len(self.im_list)<15):#防止  MemoryError
            im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
            self.im_list.append(im)

    def get_new_size(self):
        max_width = 0
        total_height = 0
        # 計算合成後圖片的寬度（以最寬的爲準）和高度
        for img in self.im_list:
            width, height = img.size
            if width > max_width:
                max_width = width
            total_height += height
        return max_width, total_height

    def image_merge(self, ):
        if len(self.im_list) > 1:
            max_width, total_height = self.get_new_size()
            # 產生一張空白圖
            new_img = Image.new('RGB', (max_width - 15, total_height), 255)
            x = y = 0
            for img in self.im_list:
                width, height = img.size
                new_img.paste(img, (x, y))
                y += height
            print(self.save_path)
            new_img.save(self.save_path)
            print('截圖成功:', self.save_path)
        else:
            obj = self.im_list[0]
            width, height = obj.size
            left, top, right, bottom = 0, 0, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            new_img = Image.new('RGB', (width, height), 255)
            new_img.paste(region, box)
            new_img.save(self.save_path)
            print('截圖成功:', self.save_path)

    def reedit_image(self, path):
        obj = Image.open(path)
        width, height = obj.size
        left, top, right, bottom = 0, height - self.over_flow_size, width, height
        box = (left, top, right, bottom)
        region = obj.crop(box)
        return region


class MainWindow(QMainWindow):
    def __init__(self, url,parent=None):
        super(MainWindow, self).__init__(parent)
        
        self.count=0#記錄拍照次數

        self.setWindowTitle('頁面截圖')
        self.temp_height = 0
        self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False)  # 禁用最大化，最小化
        # self.setWindowFlag(Qt.WindowStaysOnTopHint, True)  # 窗口頂置
        self.setWindowFlag(Qt.FramelessWindowHint, True)  # 窗口無邊框
        self.url=url

    def urlScreenShot(self, url):
        self.browser = QWebEngineView()
        self.browser.load(QUrl(url))
        geometry = self.chose_screen()
        self.setGeometry(geometry)
        self.browser.loadFinished.connect(self.check_page)
        self.setCentralWidget(self.browser)

    def get_page_size(self):
        size = self.browser.page().contentsSize()
        self.set_height = size.height()
        self.set_width = size.width()
        return size.width(), size.height()

    def chose_screen(self):
        width, height = 1920, 1080
        desktop = QApplication.desktop()
        screen_count = desktop.screenCount()
        for i in range(0, screen_count):
            rect = desktop.availableGeometry(i)
            s_width, s_height = rect.width(), rect.height()
            if s_width > width and s_height > height:
                return QRect(rect.left(), rect.top(), width, height)
        return QRect(0, 0, width, height)

    def check_page(self):
        p_width, p_height = self.get_page_size()
        self.page, self.over_flow_size = divmod(p_height, self.height())
        if self.page == 0:
            self.page = 1
        self.ssm = ScreenShotMerge(self.page, self.over_flow_size,self.url)
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.exe_command)
        self.timer.setInterval(400)
        self.timer.start()

    def exe_command(self):
        if self.page > 0 and self.count<15:
            self.screen_shot()
            self.run_js()

        elif self.page < 0 and self.count<15:
            self.timer.stop()
            self.ssm.image_merge()
            self.close()

        elif self.over_flow_size > 0 and self.count<15:
            self.screen_shot()
        self.page -= 1

    def run_js(self):
        script = """
            var scroll = function (dHeight) {
            var t = document.documentElement.scrollTop
            var h = document.documentElement.scrollHeight
            dHeight = dHeight || 0
            var current = t + dHeight
            if (current > h) {
                window.scrollTo(0, document.documentElement.clientHeight)
              } else {
                window.scrollTo(0, current)
              }
            }
        """
        command = script + '\n scroll({})'.format(self.height())
        self.browser.page().runJavaScript(command)

    def screen_shot(self):
        screen = QApplication.primaryScreen()
        winid = self.browser.winId()
        pix = screen.grabWindow(int(winid))
        name = r'{}/temp.png'.format(self.ssm.root_path)
        pix.save(name)
        self.ssm.add_im(name)


global app
app = QApplication(sys.argv)
def get_picture(url):
    try:
        win = MainWindow(url)
        win.urlScreenShot(url)
        win.show()
        app.exit(app.exec_())
    except OSError:
        print("頁面截取失敗")

可能出現的問題

No module name PIL

執行pip install pillow 安裝即可(注意不是pip install PIL)，如果還有這個提示，嘗試重新啓動idle。

Memory Error

爲了避免出現這類問題，我將圖片拼接的數量限制在15個，若要修改或者對圖片進行進一步的處理，可修改此部分及截圖部分:

def add_im(self, path):
        if len(self.im_list) == self.page:
            im = self.reedit_image(path)
        else:
            im = Image.open(path)
        if(len(self.im_list)<15):#防止  MemoryError
            im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
            self.im_list.append(im)

報錯Release of profile requested but WebEnginePage still not deleted. Expect troubles !

經過我的修改後，在我的函數中不會出現此問題。若修改代碼出現此問題，可以參考:https://stackoverflow.com/questions/57789171/i-get-release-of-profile-requested-but-webenginepage-still-not-deleted-expect（英語）
https://teratail.com/questions/193394 （日語）

截圖過程中出現 can not open file類錯誤

爲了區分各種網頁的圖片，我本想將圖片名保存成url+".png"的形式，奈何在圖片拼接的過程中，出現了這類錯誤，於是我將圖片名保存成網頁最後一個’/‘後面的字符串+’.png’的格式（經過測試’?‘和’%‘有時候也會出錯，於是也將其也統一變成’’）代碼如下所示：

def get_path(self,url):
        self.root_path = Path(__file__).parent.joinpath(r'pictures')
        if not self.root_path.exists():
            self.root_path.mkdir(parents=True)
        self.save_path = self.root_path.joinpath(url.replace("?",'/').replace("%","/").split('/')[-1]+"save.png")
        #print(self.save_path)

如有更好的建議歡迎提出

截圖一次後退出

爲了實現多次截圖，需要將app定義爲global，不能定義在函數內部(定義在內部只能給一個網頁截圖)：

global app
app = QApplication(sys.argv)
def get_picture(url):
    try:
        win = MainWindow(url)
        win.urlScreenShot(url)
        win.show()
        app.exit(app.exec_())
    except OSError:
        print("頁面截取失敗")

報錯類似於[10900:16264:0831/013951.389:ERROR:permission_manager_qt.cpp(82)] NOT IMPLEMENTEDUnsupported permission type: 13

簡單來說：就是某些功能QT開發者並沒有實現

The problem has nothing to do with program execution permissions.

Qt WebEngine is written on the basis of chromium so the Qt developers do not currently implement all the functionalities but will add more functionalities little by little. In this case, the permissions available by chromium are:

enum class PermissionType {
  MIDI_SYSEX = 1,
  // PUSH_MESSAGING = 2,
  NOTIFICATIONS = 3,
  GEOLOCATION = 4,
  PROTECTED_MEDIA_IDENTIFIER = 5,
  MIDI = 6,
  DURABLE_STORAGE = 7,
  AUDIO_CAPTURE = 8,
  VIDEO_CAPTURE = 9,
  BACKGROUND_SYNC = 10,
  FLASH = 11,
  SENSORS = 12,
  ACCESSIBILITY_EVENTS = 13,
  CLIPBOARD_READ = 14,
  CLIPBOARD_WRITE = 15,
  PAYMENT_HANDLER = 16,
  BACKGROUND_FETCH = 17,
  IDLE_DETECTION = 18,
  PERIODIC_BACKGROUND_SYNC = 19,
  WAKE_LOCK_SCREEN = 20,
  WAKE_LOCK_SYSTEM = 21,

  // Always keep this at the end.
  NUM,
};

詳細可參考：https://stackoverflow.com/questions/57733039/errorpermission-manager-qt-cpp82-unsupported-permission-type-13
這裏講的很清楚

(五)Python3 pyqt5網頁截圖(長圖)的方法和兼容、未完善問題

可能出現的問題

No module name PIL

Memory Error

報錯Release of profile requested but WebEnginePage still not deleted. Expect troubles !

截圖過程中出現 can not open file類錯誤

截圖一次後退出

報錯類似於[10900:16264:0831/013951.389:ERROR:permission_manager_qt.cpp(82)] NOT IMPLEMENTEDUnsupported permission type: 13

「Pygors跨平臺GUI」1：Pygors跨平臺GUI應用研究

[轉帖]

python列出centos7內存使用前50的進程信息

「Pygors跨平臺GUI」2：安裝MinGW-w64、MSYS2還是WSL2

一鍵自動化博客發佈工具,用過的人都說好(掘金篇)

通義千問 2.5 “客串” ChatGPT4，你分的清嗎？

Garnet：微軟官方基於.NET開源的高性能分佈式緩存存儲數據庫

Flink執行圖

Java響應式編程

評估統計算法在銀行僞造鈔票檢測中的價值

國慶七天更

（三）Python3 NLTK（Natural Language Toolkit）安裝和下載的常見問題

C++ 大數相乘算法(類似於列豎式）

（二）Python3 網頁正文提取的各種方法和技巧

（一）Python3 爬蟲基本知識和爬蟲常用的庫和方法

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結