Python圖像識別驗證碼處理之opencv切割驗證碼

切割驗證碼 >> 標註驗證碼 >> 訓練模型 >> 識別驗證碼

'''
opencv做圖像處理，所以需要安裝下面兩個庫
pip3 install opencv-python

'''

import cv2
import matplotlib.pyplot as plt
import time
import numpy as np
import pandas as pd
import os

filePath1 = r'F://data/yzm/'  # 驗證碼路徑
filePath2 = r'F://data/yzm_split/'  # 切割後的驗證碼的存放路徑

n = 0
fileNames1 = os.listdir(filePath1)
fileNames2 = os.listdir(filePath2)
def getIndex(fileName):
    fileNameIndex = fileName.split('_')[0]
    return fileNameIndex
n = int(max(list(map(getIndex, fileNames2))))
for fileName in fileNames1:
    filePathIm = r'F://data/yzm/{0}'.format(fileName)
    im1 = cv2.imread(filePathIm)  # 讀入圖片
    im_gray1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)   # 將圖片轉成灰度圖
    # 報錯 error: (-215:Assertion failed) 原因是文件路徑錯誤
    ret1, im_inv1 = cv2.threshold(im_gray1, 127, 255, cv2.THRESH_BINARY_INV)  # 二值化
    # 應用高斯模糊對圖片進行降噪，高斯模糊的本質是用高斯核和圖像做卷積
    kernel1 = 1/16*np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]])
    im_blur1 = cv2.filter2D(im_inv1, -1, kernel1)

    # 降噪後再做一輪二值化處理
    ret2, im_inv2 = cv2.threshold(im_blur1, 127, 255, cv2.THRESH_BINARY)

    # 把最開始的圖片切割成單個字符
    # 第一步 用opencv的findContours來提取輪廓 （cv2.findContours()函數接受的參數爲二值圖）
    im2, contours, hierarchy = cv2.findContours(im_inv2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # 第一個參數是尋找輪廓的圖像，第二個參數表示輪廓的檢索模式，第三個參數method爲輪廓的近似辦法
    cv2.drawContours(im1, contours, -1, (0, 255, 0), 1)  # 第三個參數爲線條顏色，第四個參數線條粗度
    cv2.namedWindow("im1_drawContour")
    cv2.imshow("im1_drawContour", im1)
    cv2.waitKey(50)

    dfContourShape = pd.DataFrame(columns=('X','Y','W','H'))
    for i in range(len(contours)):
        # cv2.drawContours(cv2.imread(filepath1) , contours, i, (0, 255, 0), 1)   # i 表示繪製第i條輪廓
        x, y, w, h = cv2.boundingRect(contours[i])  # 用一個最小的矩形，把找到的形狀包起來
        dfContourShape = dfContourShape.append([{'X': x, 'Y':y, 'W':w, 'H':h}], ignore_index=True)
        im3 = cv2.rectangle(im1, (x, y), (x + w, y + h), (0, 0, 255), 2)
        # cv2.namedWindow("im3")
        # cv2.imshow("im3", im3)
        # cv2.waitKey(50)
        # cv2.destroyAllWindows()
    result = []
    for i in range(len(dfContourShape)):
        x = dfContourShape['X'][i]
        y = dfContourShape['Y'][i]
        w = dfContourShape['W'][i]
        h = dfContourShape['H'][i]
        wMax = max(dfContourShape['W'])
        wMin = min(dfContourShape['W'])
        if len(dfContourShape) == 1:
            boxLeft = np.int0([[x, y], [x + w / 4, y], [x + w / 4, y + h], [x, y + h]])
            boxMidLeft = np.int0([[x + w / 4, y], [x + w * 2 / 4, y], [x + w * 2 / 4, y + h], [x + w / 4, y + h]])
            boxMidRight = np.int0([[x + w * 2 / 4, y], [x + w * 3 / 4, y], [x + w * 3 / 4, y + h], [x + w * 2 / 4, y + h]])
            boxRight = np.int0([[x + w * 3 / 4, y], [x + w, y], [x + w, y + h], [x + w * 3 / 4, y + h]])
            result.extend([boxLeft, boxMidLeft, boxMidRight, boxRight])
        elif len(dfContourShape) == 2:  # 兩種情況
            # 如果一個輪廓的寬度是另一個的2倍以上，這個輪廓大概率是包含3個字符的輪廓
            if wMax >= wMin * 2:
                if w == wMax:
                    box_left = np.int0([[x,y], [x+w/3,y], [x+w/3,y+h], [x,y+h]])
                    box_mid = np.int0([[x+w/3,y], [x+w*2/3,y], [x+w*2/3,y+h], [x+w/3,y+h]])
                    box_right = np.int0([[x+w*2/3,y], [x+w,y], [x+w,y+h], [x+w*2/3,y+h]])
                    result.append(box_left)
                    result.append(box_mid)
                    result.append(box_right)
                else:
                    box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
                    result.append(box)
            # 如果兩個輪廓，較寬的寬度小於較窄的2倍，大概率這是兩個包含2個字符的輪廓
            else:
                box_left = np.int0([[x,y], [x+w/2,y], [x+w/2,y+h], [x,y+h]])
                box_right = np.int0([[x+w/2,y], [x+w,y], [x+w,y+h], [x+w/2,y+h]])
                result.append(box_left)
                result.append(box_right)
        elif len(dfContourShape) == 3:
            if w == wMax:
                # w_max是所有 contonur 的寬度中最寬的值
                box_left = np.int0([[x,y], [x+w/2,y], [x+w/2,y+h], [x,y+h]])
                box_right = np.int0([[x+w/2,y], [x+w,y], [x+w,y+h], [x+w/2,y+h]])
                result.append(box_left)
                result.append(box_right)
            else:
                box = np.int0([[x,y], [x+w,y], [x+w,y+h], [x,y+h]])
                result.append(box)
        elif len(dfContourShape) == 4:
            box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
            result.append(box)
        elif len(dfContourShape) == 5:
            if w != wMin:
                box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
                result.append(box)
        else:
            print('分割輪廓數大於 5 切換驗證碼!')
    if len(result) != 4:
        print('len(result) != 4 .....\nlen(result) != 4 .....\nlen(result) != 4 .....')

    for box in result:
        cv2.drawContours(cv2.imread(filePathIm), [box], 0, (0, 0, 255), 1)
        cv2.namedWindow("im1")
        cv2.imshow("im1", im1)
        cv2.waitKey(100)  # 100毫秒
        time.sleep(3)
        cv2.destroyAllWindows()
        roi = im_inv2[box[0][1]:box[3][1], box[0][0]:box[1][0]]
        roiStd = cv2.resize(roi, (30, 30))
        # 將字符圖片統一調整爲30x30的圖片大小
        # 爲防止文件重名，使用時間戳命名文件名
        n += 1
        fileSavePath = '{0}_{1}img_{2}.png'.format(filePath2, n, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())))
        print(fileSavePath)
        cv2.imwrite(fileSavePath, roiStd)

Python圖像識別驗證碼處理之opencv切割驗證碼

這個網絡爬蟲代碼，拿到數據之後如何存到csv文件中去？

.NET開源強大、易於使用的緩存框架 - FusionCache

面試，有時候是個運氣活

數據庫中表建立索引的優缺點

NLP之文本預處理

spark環境idea與sbt的配置

用本機VM虛擬機作爲網站的服務器

服務器搭建本地局域網下載文件（sz下載大文件總是出問題）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結