Halcon例程分析13：OCR文字識別

打開halcon，按下ctrl+e打開halcon自帶例程。應用範圍->光學字符識別->ocrcolor.hdev

* 
* OCR (numbers) with color segmentation
* 
read_image (Image, 'ocr/color_form_01')
get_image_pointer3 (Image, PointerRed, PointerGreen, PointerBlue, Type, Width, Height)
dev_close_window ()
dev_open_window (0, 0, Width, Height, 'black', WindowID)
dev_display (Image)
dev_set_line_width (3)
dev_set_draw ('margin')
dev_update_window ('off')
* Read the classifier to use for reading the text.
* It is easiest to use the pre-trained font Industrial_0-9_NoRej.  If you
* have run the program ocrcolort.hdev in this directory, you can activate
* the second line to use the font trained with this program.
*讀取halcon自帶的模型文件
*read_ocr_class_mlp ('Industrial_0-9_NoRej', OCRHandle)
*讀取前面訓練得到的文件
read_ocr_class_mlp ('ocrcolor', OCRHandle)
* 
* LOOP: Process all Images
* 
NumImages := 8
for img := 1 to NumImages by 1
    read_image (Image, 'ocr/color_form_0' + img)
    * 
    * Detect foreground
    * 
    *分割出文字區域
    mean_image (Image, Mean, 3, 3)
    decompose3 (Mean, Red, Green, Blue)
    threshold (Green, ForegroundRaw, 0, 220)
    clip_region (ForegroundRaw, Foreground, 3, 3, Height - 4, Width - 4)
    * 
    * Divide colors
    * 
    reduce_domain (Red, Foreground, RedReduced)
    reduce_domain (Green, Foreground, GreenReduced)
    sub_image (RedReduced, GreenReduced, ImageSub, 2, 128)
    mean_image (ImageSub, ImageMean, 3, 3)
    binary_threshold (ImageMean, Cluster1, 'smooth_histo', 'dark', UsedThreshold)
    difference (Foreground, Cluster1, Cluster2)
    concat_obj (Cluster1, Cluster2, Cluster)
    opening_circle (Cluster, Opening, 2.5)
    smallest_rectangle1 (Opening, Row1, Column1, Row2, Column2)
    WidthCluster := Column2 - Column1 + 1
    if (WidthCluster[0] > WidthCluster[1])
        select_obj (Opening, NumberRegion, 2)
    else
        select_obj (Opening, NumberRegion, 1)
    endif
    * 
    * Expand Numbers
    * 
    closing_rectangle1 (NumberRegion, NumberCand, 1, 20)
    difference (Image, NumberCand, NoNumbers)
    connection (NumberRegion, NumberParts)
    intensity (NumberParts, Green, MeanIntensity, Deviation)
    expand_gray_ref (NumberParts, Green, NoNumbers, Numbers, 20, 'image', MeanIntensity, 48)
    union1 (Numbers, NumberRegion)
    connection (NumberRegion, Numbers)
    * 
    * Fine tuning
    * 
    fill_up_shape (Numbers, RegionFillUp, 'area', 1, 100)
    opening_circle (RegionFillUp, FinalNumbersUnsorted, 3.5)
    sort_region (FinalNumbersUnsorted, FinalNumbers, 'character', 'true', 'row')
    dev_set_color ('blue')
    dev_display (Image)
    dev_display (FinalNumbers)
    count_obj (FinalNumbers, NumNumbers)
    union1 (FinalNumbers, NumberRegion)
    difference (Image, NumberRegion, NoNumbers)
    paint_region (NoNumbers, Green, ImageOCRRaw, 255, 'fill')
    paint_region (NumberRegion, ImageOCRRaw, ImageOCR, 0, 'fill')
    * 
    * OCR
    * 
    *識別文字信息
    *第一個參數是數字的像素區域
    *第二個參數輸入是原圖像
    *第三個模型句柄，即前面訓練出來的那個模型
    *第五個輸出的識別出來的數字
    *第六個參數是識別信任度
    do_ocr_multi_class_mlp (FinalNumbers, ImageOCR, OCRHandle, RecChar, Confidence)
    set_display_font (WindowID, 27, 'mono', 'true', 'false')
    disp_message (WindowID, sum(RecChar), 'window', 32, 24, 'blue', 'false')
    if (img < NumImages)
        set_display_font (WindowID, 16, 'mono', 'true', 'false')
        disp_continue_message (WindowID, 'black', 'true')
        stop ()
    endif
endfor
dev_update_window ('on')

用於識別文字的原圖像與識別信息

以上官方例程帶有一些干擾信息，其實對於文字訓練與識別是很簡單的。簡單分爲以下幾步：

訓練

1，分割出文字對應圖像區域，這裏需要主注意文字模型是要黑色背景白色字體才能訓練成功，如果是白色背景黑色像素區域訓練會失敗，具體原因暫不清楚。

2，用append_ocr_trainf （)函數把像素區域與對應的文字信息加到訓練文件中，此時還沒有開始訓練文件，trf文件只是圖像與文字的映射

3，create_ocr_class_mlp（）函數創建訓練模型

4，trainf_ocr_class_mlp（）函數開始訓練文件，得到omc文件，此文件即爲我們需要的神經網絡模型，可以存儲起來

識別

1，分割出文字對應圖像區域

2，讀取訓練好的omc文件

3，do_ocr_multi_class_mlp（）或do_ocr_single_class_mlp()識別文字

下面是我自己寫的一個簡單訓練識別中文的例子，文字是手寫的

WindowHandle:=3600
dev_open_window (0, 0, 512, 512, 'black', WindowHandle)
gen_empty_obj (EmptyObject)
read_image (Image, 'C:/Users/Administrator/Desktop/2.PNG')
rgb1_to_gray (Image, GrayImage)
charcount:=6
for Index := 1 to charcount by 1
     disp_message (WindowHandle, '請框選單個漢字區域，右鍵確認：','window', 12, 12, 'yellow', 'false')
     **畫個矩形
     draw_rectangle1 (WindowHandle, Row1, Column1, Row2, Column2)
     **根據畫的矩形生成對應的矩形
     gen_rectangle1 (Rectangle, Row1, Column1, Row2, Column2)
     *裁出來
     reduce_domain (GrayImage, Rectangle, ImageReduced1)
     *閾值
     threshold (ImageReduced1, Region1, 128, 255)
     opening_circle (Region1, RegionOpening, 1.5)
     *把區域加入到空變量中
     concat_obj (EmptyObject, RegionOpening, EmptyObject)
endfor
*訓練的圖像對應的文字，注意順序，這裏從上到下，從左到右分別是蘇林其，蘇林其
words:=['蘇','林','其','蘇','林','其']

*區域排序，從左到右，從上到下
sort_region (EmptyObject, SortedRegions1, 'character', 'true', 'row')

for Index1:=1 to charcount by 1
 select_obj (SortedRegions1, ObjectSelected1, Index1)
 append_ocr_trainf (ObjectSelected1, Image, words[Index1-1], '2.trf')
endfor

read_ocr_trainf_names ('2.trf', CharacterNames, CharacterCount)
create_ocr_class_mlp (8, 10, 'constant', 'default', CharacterNames, 80, 'none', 10, 42, OCRHandle)
trainf_ocr_class_mlp (OCRHandle, '2.trf', 200, 1, 0.01, Error, ErrorLog)
write_ocr_class_mlp (OCRHandle, '2.omc')

*讀取一張測試圖
read_image (Image1, 'C:/Users/Administrator/Desktop/3.PNG')

*二值化
threshold (Image1, testwordregion, 200, 255)
*開始識別
read_ocr_class_mlp ('2.omc', OCRHandle1)
*識別單個
do_ocr_single_class_mlp (testwordregion, Image1, OCRHandle1,1, Class, Confidence)

*顯示結果
disp_message(WindowHandle, '中文識別結果：', 'image', 50, 50, 'white', 'false')
disp_message(WindowHandle, Class, 'image', 50, 200, 'red', 'false')

訓練原圖像