(五)Python3 pyqt5网页截图(长图)的方法和兼容、未完善问题

废话不说，直接上代码:

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# Author:Leslie-x
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path

class ScreenShotMerge():
    def __init__(self, page, over_flow_size,url):
        self.im_list = []
        self.page = page
        self.over_flow_size = over_flow_size
        self.get_path(url)

    def get_path(self,url):
        self.root_path = Path(__file__).parent.joinpath(r'pictures')
        if not self.root_path.exists():
            self.root_path.mkdir(parents=True)
        self.save_path = self.root_path.joinpath(url.replace("?",'/').replace("%","/").split('/')[-1]+"save.png")
        #print(self.save_path)
        
    def add_im(self, path):
        if len(self.im_list) == self.page:
            im = self.reedit_image(path)
        else:
            im = Image.open(path)
        if(len(self.im_list)<15):#防止  MemoryError
            im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
            self.im_list.append(im)

    def get_new_size(self):
        max_width = 0
        total_height = 0
        # 计算合成后图片的宽度（以最宽的为准）和高度
        for img in self.im_list:
            width, height = img.size
            if width > max_width:
                max_width = width
            total_height += height
        return max_width, total_height

    def image_merge(self, ):
        if len(self.im_list) > 1:
            max_width, total_height = self.get_new_size()
            # 产生一张空白图
            new_img = Image.new('RGB', (max_width - 15, total_height), 255)
            x = y = 0
            for img in self.im_list:
                width, height = img.size
                new_img.paste(img, (x, y))
                y += height
            print(self.save_path)
            new_img.save(self.save_path)
            print('截图成功:', self.save_path)
        else:
            obj = self.im_list[0]
            width, height = obj.size
            left, top, right, bottom = 0, 0, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            new_img = Image.new('RGB', (width, height), 255)
            new_img.paste(region, box)
            new_img.save(self.save_path)
            print('截图成功:', self.save_path)

    def reedit_image(self, path):
        obj = Image.open(path)
        width, height = obj.size
        left, top, right, bottom = 0, height - self.over_flow_size, width, height
        box = (left, top, right, bottom)
        region = obj.crop(box)
        return region


class MainWindow(QMainWindow):
    def __init__(self, url,parent=None):
        super(MainWindow, self).__init__(parent)
        
        self.count=0#记录拍照次数

        self.setWindowTitle('页面截图')
        self.temp_height = 0
        self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False)  # 禁用最大化，最小化
        # self.setWindowFlag(Qt.WindowStaysOnTopHint, True)  # 窗口顶置
        self.setWindowFlag(Qt.FramelessWindowHint, True)  # 窗口无边框
        self.url=url

    def urlScreenShot(self, url):
        self.browser = QWebEngineView()
        self.browser.load(QUrl(url))
        geometry = self.chose_screen()
        self.setGeometry(geometry)
        self.browser.loadFinished.connect(self.check_page)
        self.setCentralWidget(self.browser)

    def get_page_size(self):
        size = self.browser.page().contentsSize()
        self.set_height = size.height()
        self.set_width = size.width()
        return size.width(), size.height()

    def chose_screen(self):
        width, height = 1920, 1080
        desktop = QApplication.desktop()
        screen_count = desktop.screenCount()
        for i in range(0, screen_count):
            rect = desktop.availableGeometry(i)
            s_width, s_height = rect.width(), rect.height()
            if s_width > width and s_height > height:
                return QRect(rect.left(), rect.top(), width, height)
        return QRect(0, 0, width, height)

    def check_page(self):
        p_width, p_height = self.get_page_size()
        self.page, self.over_flow_size = divmod(p_height, self.height())
        if self.page == 0:
            self.page = 1
        self.ssm = ScreenShotMerge(self.page, self.over_flow_size,self.url)
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.exe_command)
        self.timer.setInterval(400)
        self.timer.start()

    def exe_command(self):
        if self.page > 0 and self.count<15:
            self.screen_shot()
            self.run_js()

        elif self.page < 0 and self.count<15:
            self.timer.stop()
            self.ssm.image_merge()
            self.close()

        elif self.over_flow_size > 0 and self.count<15:
            self.screen_shot()
        self.page -= 1

    def run_js(self):
        script = """
            var scroll = function (dHeight) {
            var t = document.documentElement.scrollTop
            var h = document.documentElement.scrollHeight
            dHeight = dHeight || 0
            var current = t + dHeight
            if (current > h) {
                window.scrollTo(0, document.documentElement.clientHeight)
              } else {
                window.scrollTo(0, current)
              }
            }
        """
        command = script + '\n scroll({})'.format(self.height())
        self.browser.page().runJavaScript(command)

    def screen_shot(self):
        screen = QApplication.primaryScreen()
        winid = self.browser.winId()
        pix = screen.grabWindow(int(winid))
        name = r'{}/temp.png'.format(self.ssm.root_path)
        pix.save(name)
        self.ssm.add_im(name)


global app
app = QApplication(sys.argv)
def get_picture(url):
    try:
        win = MainWindow(url)
        win.urlScreenShot(url)
        win.show()
        app.exit(app.exec_())
    except OSError:
        print("页面截取失败")

可能出现的问题

No module name PIL

执行pip install pillow 安装即可(注意不是pip install PIL)，如果还有这个提示，尝试重新启动idle。

Memory Error

为了避免出现这类问题，我将图片拼接的数量限制在15个，若要修改或者对图片进行进一步的处理，可修改此部分及截图部分:

def add_im(self, path):
        if len(self.im_list) == self.page:
            im = self.reedit_image(path)
        else:
            im = Image.open(path)
        if(len(self.im_list)<15):#防止  MemoryError
            im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
            self.im_list.append(im)

报错Release of profile requested but WebEnginePage still not deleted. Expect troubles !

经过我的修改后，在我的函数中不会出现此问题。若修改代码出现此问题，可以参考:https://stackoverflow.com/questions/57789171/i-get-release-of-profile-requested-but-webenginepage-still-not-deleted-expect（英语）
https://teratail.com/questions/193394 （日语）

截图过程中出现 can not open file类错误

为了区分各种网页的图片，我本想将图片名保存成url+".png"的形式，奈何在图片拼接的过程中，出现了这类错误，于是我将图片名保存成网页最后一个’/‘后面的字符串+’.png’的格式（经过测试’?‘和’%‘有时候也会出错，于是也将其也统一变成’’）代码如下所示：

def get_path(self,url):
        self.root_path = Path(__file__).parent.joinpath(r'pictures')
        if not self.root_path.exists():
            self.root_path.mkdir(parents=True)
        self.save_path = self.root_path.joinpath(url.replace("?",'/').replace("%","/").split('/')[-1]+"save.png")
        #print(self.save_path)

如有更好的建议欢迎提出

截图一次后退出

为了实现多次截图，需要将app定义为global，不能定义在函数内部(定义在内部只能给一个网页截图)：

global app
app = QApplication(sys.argv)
def get_picture(url):
    try:
        win = MainWindow(url)
        win.urlScreenShot(url)
        win.show()
        app.exit(app.exec_())
    except OSError:
        print("页面截取失败")

报错类似于[10900:16264:0831/013951.389:ERROR:permission_manager_qt.cpp(82)] NOT IMPLEMENTEDUnsupported permission type: 13

简单来说：就是某些功能QT开发者并没有实现

The problem has nothing to do with program execution permissions.

Qt WebEngine is written on the basis of chromium so the Qt developers do not currently implement all the functionalities but will add more functionalities little by little. In this case, the permissions available by chromium are:

enum class PermissionType {
  MIDI_SYSEX = 1,
  // PUSH_MESSAGING = 2,
  NOTIFICATIONS = 3,
  GEOLOCATION = 4,
  PROTECTED_MEDIA_IDENTIFIER = 5,
  MIDI = 6,
  DURABLE_STORAGE = 7,
  AUDIO_CAPTURE = 8,
  VIDEO_CAPTURE = 9,
  BACKGROUND_SYNC = 10,
  FLASH = 11,
  SENSORS = 12,
  ACCESSIBILITY_EVENTS = 13,
  CLIPBOARD_READ = 14,
  CLIPBOARD_WRITE = 15,
  PAYMENT_HANDLER = 16,
  BACKGROUND_FETCH = 17,
  IDLE_DETECTION = 18,
  PERIODIC_BACKGROUND_SYNC = 19,
  WAKE_LOCK_SCREEN = 20,
  WAKE_LOCK_SYSTEM = 21,

  // Always keep this at the end.
  NUM,
};

详细可参考：https://stackoverflow.com/questions/57733039/errorpermission-manager-qt-cpp82-unsupported-permission-type-13
这里讲的很清楚

(五)Python3 pyqt5网页截图(长图)的方法和兼容、未完善问题

可能出现的问题

No module name PIL

Memory Error

报错Release of profile requested but WebEnginePage still not deleted. Expect troubles !

截图过程中出现 can not open file类错误

截图一次后退出

报错类似于[10900:16264:0831/013951.389:ERROR:permission_manager_qt.cpp(82)] NOT IMPLEMENTEDUnsupported permission type: 13

redis的key乱码问题和值自增问题

CORS error 但是 status code 是200 OK

一个开源且全面的C#算法实战教程

一款.NET开源、功能强大、跨平台的绘图库 - OxyPlot

压缩上传的GPU数据的方案

使用skopeo同步镜像

國慶七天更

（三）Python3 NLTK（Natural Language Toolkit）安裝和下載的常見問題

C++ 大數相乘算法(類似於列豎式）

（二）Python3 網頁正文提取的各種方法和技巧

（一）Python3 爬蟲基本知識和爬蟲常用的庫和方法

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結