文檔掃描OCR（Python3.7）

假期水一波文章，介紹一個小項目，基於pytesseract實現圖片OCR掃描。簡單來講，就是有一張照片（可能是拍攝的PPT/發票/文檔）,我們先識別出有效區域（文檔區域），然後識別該區域中的文字。
實驗環境：Python3.7+OpenCV3.4
測試圖片（小票圖片源自必應）：

需要注意的是本次項目針對有效區域爲四邊形的圖片，所以在挑選測試圖片的時候需要注意下。
環境準備好我們就可以愉快地碼代碼了！

輪廓檢測

本部分需要檢測出小票的區域。具體操作：二值化->高斯濾波->Canny邊緣檢測->輪廓檢測->輪廓排序取面積最大的->找到輪廓曲線爲四邊形的輪廓

import cv2
#檢測輪廓
def find_contours(imgPath):
    image = cv2.imread(imgPath)
    # 預處理操作
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #二值化
    gray = cv2.GaussianBlur(gray, (5, 5), 0) #高斯濾波
    edged = cv2.Canny(gray, 75, 200) #canny檢測
    # 輪廓檢測
    h = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # 尋找輪廓
    contours = h[1] #獲取輪廓座標點
    # 對一系列輪廓點座標按它們圍成的區域面積進行排序，取前5個
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]  
    #找到第一個輪廓線爲矩形的，即爲我們要找的輪廓線
    for contour in contours:
        peri = cv2.arcLength(contour, True)  # 周長，閉合
        approx = cv2.approxPolyDP(contour, 0.02 * peri, True)  # 檢測出來的輪廓可能是離散的點，故因在此做近似計算，使其形成一個矩形
        # 做精度控制，原始輪廓到近似輪廓的最大的距離，較小時可能爲多邊形；較大時可能爲矩形
        # True表示閉合
        if len(approx) == 4:  # 如果檢測出來的是矩形，則break本段if
            screenCnt = approx
            break
    img = cv2.drawContours(image, [screenCnt], -1, (0, 0, 255), 2)  # 繪製輪廓，-1表示全部繪製
    cv2.imshow("img", img)
    cv2.imwrite('./test_contours.jpg', img) #保存結果圖片
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return screenCnt
#測試
if __name__ == "__main__":
    contour = find_contours("./test.jpg")

結果：

變換

剛剛圈出來的區域雖然是一個矩形，但並不正，這一步就是將找到的區域提取出來擺正。

def transform(imgPath, contour):
    img = cv2.imread(imgPath)
    wraped = four_point_transform(img, contour.reshape(4, 2))  # 透視變換：擺正圖像內容
    wraped = cv2.cvtColor(wraped, cv2.COLOR_BGR2GRAY) #二值化
    ref = cv2.threshold(wraped, 150, 255, cv2.THRESH_BINARY)[1] #將圖片變爲黑白，設置閾值，超過150顯示黑色
    cv2.imwrite('./test_transform.jpg', ref)  # 保存結果圖片
    cv2.imshow("img", ref)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return ref

if __name__ == "__main__":
    imgPath = "./test.jpg"
    contour = find_contours(imgPath)
    transform(imgPath, contour)

結果圖：

OCR識別

使用pytesseract，安裝參考：關於windows下pytesseract的安裝
需要注意一點tesseract要安裝3.0以上版本，博主直接安裝的目前最新的。

if __name__ == "__main__":
    imgPath = "./test.jpg"
    contour = find_contours(imgPath)
    imgref = transform(imgPath, contour)
    text = pytesseract.image_to_string(imgref,lang='chi_sim')  #設置語言爲中文簡體
    print(text)

識別結果：

慶認技廣智站瞧廳
區迎光臨做起撤個喬妨逢下用由|
節揪類型: 結哩
位間寺:人
此臺是:229
項目必稱 人全 從星人魚燦
撲 一      1 5.00
| 5
 | 機 呂
找才:0.00
01574720 19:25:08
交飲行業伍墳背銷縣務疝
窗諜區飢旗路1671 0416
,MTG .NET
3; 400-811-5533
掃一掃，手機支付
弛單尋: 1133981
和
貳多領; 10，
讓 0.史
提供價: 10.00

效果有點不忍直視，需要自己訓練tesseract，後面再更新~

完整代碼

import cv2
from imutils.perspective import four_point_transform
import pytesseract

def find_contours(imgPath):
    image = cv2.imread(imgPath)
    # 預處理操作
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #二值化
    gray = cv2.GaussianBlur(gray, (5, 5), 0) #高斯濾波
    edged = cv2.Canny(gray, 75, 200) #canny檢測
    # 輪廓檢測
    h = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # 尋找輪廓
    contours = h[1] #獲取輪廓座標點
    # 對一系列輪廓點座標按它們圍成的區域面積進行排序，取前5個
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
    #找到第一個輪廓線爲矩形的，即爲我們要找的輪廓線
    for contour in contours:
        peri = cv2.arcLength(contour, True)  # 周長，閉合
        approx = cv2.approxPolyDP(contour, 0.02 * peri, True)  # 檢測出來的輪廓可能是離散的點，故因在此做近似計算，使其形成一個矩形
        # 做精度控制，原始輪廓到近似輪廓的最大的距離，較小時可能爲多邊形；較大時可能爲矩形
        # True表示閉合
        if len(approx) == 4:  # 如果檢測出來的是矩形，則break本段if
            screenCnt = approx
            break
    img = cv2.drawContours(image, [screenCnt], -1, (0, 0, 255), 2)  # 繪製輪廓，-1表示全部繪製
    cv2.imwrite('./test_contours.jpg', img) #保存結果圖片
    # cv2.imshow("img", img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    return screenCnt

def transform(imgPath, contour):
    img = cv2.imread(imgPath)
    wraped = four_point_transform(img, contour.reshape(4, 2))  # 透視變換：擺正圖像內容
    wraped = cv2.cvtColor(wraped, cv2.COLOR_BGR2GRAY) #二值化
    ref = cv2.threshold(wraped, 150, 255, cv2.THRESH_BINARY)[1] #將圖片變爲黑白，設置閾值，超過150顯示黑色
    cv2.imwrite('./test_transform.jpg', ref)  # 保存結果圖片
    # cv2.imshow("img", ref)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    return ref

if __name__ == "__main__":
    imgPath = "./test.jpg"
    contour = find_contours(imgPath)
    imgref = transform(imgPath, contour)
    text = pytesseract.image_to_string(imgref,lang='chi_sim')  #設置語言爲中文簡體
    print(text)

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

文檔掃描OCR（Python3.7）

輪廓檢測

變換

OCR識別

完整代碼

《日本蠟燭圖》讀書筆記 & 技術分析回測

Python多線程編程深度探索：從入門到實戰

《期貨-市場技術分析》讀書筆記

mongodb處理json數據很好

頂級 Javaer 都在用的 20 個類庫，真香！

[轉帖]cpupower

google瀏覽器插件開發

35K*14 薪，入職了！這公司只要不裁員，我能一直呆下去！

DCGAN生成二次元頭像（Pytorch）

1020 Delete At Most Two Characters (35 分)（C++）

PAT頂級目錄（C++）

爬取知乎表情包

WaveNet筆記

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結