darknet-ocr文字檢測部分單獨實現(darknet_ocr)

運行chineseocr(yolov3+crnn) 中單獨檢測的部分(darknet_detect), 由於cuda版本問題,遇到:
OSError: libcudart.so.9.2: cannot open shared object file: No such file or directory
所以用到darknet_ocr中單獨檢測的部分,源碼鏈接:添加鏈接描述

該鏈接中包含darknet框架下文字檢測text.py腳本,在dnn目錄下,但是由於有額外的輸出需求以及遇到的一些問題,所以做了一些修改,之後會附上完整的代碼集合以供參考。

import cv2
import numpy as np
import time
from config import textPath, anchors
from helper.image import resize_img, get_origin_box, soft_max, reshape
from helper.detectors import TextDetector
from config import scale, maxScale, TEXT_LINE_SCORE
from dnn.image import rotate_cut_img, sort_box
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


textNet   =  cv2.dnn.readNetFromDarknet(textPath.replace('weights','cfg'),textPath)

def detect_box(image, scale=600, maxScale=900):
    H, W = image.shape[:2]
    image, rate = resize_img(image, scale, maxScale=maxScale)
    h, w = image.shape[:2]
    inputBlob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(w, h), swapRB=False, crop=False);
    outputName = textNet.getUnconnectedOutLayersNames()
    textNet.setInput(inputBlob)
    out = textNet.forward(outputName)[0]
    clsOut = reshape(out[:, :20, ...])
    boxOut = reshape(out[:, 20:, ...])
    boxes = get_origin_box((w, h), anchors, boxOut[0])
    scores = soft_max(clsOut[0])
    boxes[:, 0:4][boxes[:, 0:4] < 0] = 0
    boxes[:, 0][boxes[:, 0] >= w] = w - 1
    boxes[:, 1][boxes[:, 1] >= h] = h - 1
    boxes[:, 2][boxes[:, 2] >= w] = w - 1
    boxes[:, 3][boxes[:, 3] >= h] = h - 1
    print('scores:', scores)
    print('boxes:', boxes)
    print('rate:', rate)
    print('w:', w)
    print('h:', h)
    return scores, boxes, rate, w, h
timeTake = time.time()

def detect_lines(image, scale=600,
                 maxScale=900,
                 MAX_HORIZONTAL_GAP=30,
                 MIN_V_OVERLAPS=0.6,
                 MIN_SIZE_SIM=0.6,
                 TEXT_PROPOSALS_MIN_SCORE=0.7,
                 TEXT_PROPOSALS_NMS_THRESH=0.3,
                 TEXT_LINE_NMS_THRESH=0.9,
                 TEXT_LINE_SCORE=0.9
                 ):
    MAX_HORIZONTAL_GAP = max(16, MAX_HORIZONTAL_GAP)
    detectors = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM)
    scores, boxes, rate, w, h = detect_box(image, scale, maxScale)
    size = (h, w)
    text_lines, scores = detectors.detect(boxes, scores, size, \
                                          TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH,
                                          TEXT_LINE_SCORE)
    if len(text_lines) > 0:
        text_lines = text_lines / rate
    print('text_lines:', text_lines)
    print('scores:', scores)
    return text_lines, scores

timeTake = time.time()-timeTake
print('It take:{}s'.format(timeTake))


def detect(img):
    image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    boxes, scores = detect_lines(image, scale=scale, maxScale=maxScale)
    data = []
    n = len(boxes)
    for i in range(n):
        box = boxes[i]
        box = [int(x) for x in box]
        if scores[i] > TEXT_LINE_SCORE:
            data.append({'box': box, 'prob': round(float(scores[i]), 2), 'text': None})
    res = {'data': data, 'errCode': 0}
    return res

def ocr_batch(img, boxes, leftAdjustAlph=0.01, rightAdjustAlph=0.01):
    """
    batch for ocr
    """
    im = Image.fromarray(img)
    newBoxes = []
    for index, box in enumerate(boxes):
        partImg, box = rotate_cut_img(im, box, leftAdjustAlph, rightAdjustAlph)
        box['img'] = partImg.convert('L')
        newBoxes.append(box)
        cvPartImg = np.array(partImg)
        #cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
        cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
        #cv2.imshow('part', cvImg)
        cv2.waitKey(0)
    #return res

def drawDetectBox(img, resJson):
    for idx in range(len(resJson['data'])):
        box = resJson['data'][idx]['box']
        [x1,y1,x2,y2,x3,y3,x4,y4] = box
        p1 = (int(x1), int(y1))
        p2 = (int(x2), int(y2))
        p3 = (int(x3), int(y3))
        p4 = (int(x4), int(y4))
        cv2.line(img, p1, p2, (0, 255, 0))
        cv2.line(img, p2, p3, (0, 255, 0))
        cv2.line(img, p3, p4, (0, 255, 0))
        cv2.line(img, p4, p1, (0, 255, 0))
        #cv2.putText(img, str(text_tags[idx]), (int(p1[0]), int(p1[1])), 1, 1, (0, 0, 255))
    #cv2.imshow('detect', img)
#    cv2.waitKey(0)


    #return box



def show_img(imgs: np.ndarray, color=True):
    if (len(imgs.shape) == 3 and color) or (len(imgs.shape) == 2 and not color):
        imgs = np.expand_dims(imgs, axis=0)
    for img in imgs:
        plt.figure()
        plt.imshow(img, cmap=None if color else 'gray')

imgDir = './test/'
img = cv2.imread(imgDir + 'img.jpeg')
res = detect(img)
print(res)
boxes = []
for idx in range(len(res['data'])):
    box = res['data'][idx]['box']
    boxes.append(box)
ocr_batch(img, boxes)
drawDetectBox(img, res)
cv2.imwrite('detect7.jpg', img)
show_img(img, color = True)
plt.show()


由於之前在代碼中用到cv2.imshow()語句在linux系統下運行,顯示:cannot connect to X server, 需要將該語句註釋掉,之後用plt.show()替換。
運行之後得到結果:
在這裏插入圖片描述
在這裏插入圖片描述
另外整個代碼的運行都在上篇文章中同樣的鏡像裏,所以運行text.py文件的代碼是:

docker run -v /.../OCR-DARKNET/darknet-ocr:/chineseocr/darknet-ocr -w /chineseocr/darknet-ocr chineseocr:v2 python text.py

改了幾個版本的text.py滿足不同需求,有需要的可以一起討論,可能大家環境不同或者是有opencv庫的問題,如果有什麼差異或者更新歡迎隨時討論!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章