打開halcon,按下ctrl+e打開halcon自帶例程。應用範圍->光學字符識別->ocrcolor.hdev
*
* OCR (numbers) with color segmentation
*
read_image (Image, 'ocr/color_form_01')
get_image_pointer3 (Image, PointerRed, PointerGreen, PointerBlue, Type, Width, Height)
dev_close_window ()
dev_open_window (0, 0, Width, Height, 'black', WindowID)
dev_display (Image)
dev_set_line_width (3)
dev_set_draw ('margin')
dev_update_window ('off')
* Read the classifier to use for reading the text.
* It is easiest to use the pre-trained font Industrial_0-9_NoRej. If you
* have run the program ocrcolort.hdev in this directory, you can activate
* the second line to use the font trained with this program.
*讀取halcon自帶的模型文件
*read_ocr_class_mlp ('Industrial_0-9_NoRej', OCRHandle)
*讀取前面訓練得到的文件
read_ocr_class_mlp ('ocrcolor', OCRHandle)
*
* LOOP: Process all Images
*
NumImages := 8
for img := 1 to NumImages by 1
read_image (Image, 'ocr/color_form_0' + img)
*
* Detect foreground
*
*分割出文字區域
mean_image (Image, Mean, 3, 3)
decompose3 (Mean, Red, Green, Blue)
threshold (Green, ForegroundRaw, 0, 220)
clip_region (ForegroundRaw, Foreground, 3, 3, Height - 4, Width - 4)
*
* Divide colors
*
reduce_domain (Red, Foreground, RedReduced)
reduce_domain (Green, Foreground, GreenReduced)
sub_image (RedReduced, GreenReduced, ImageSub, 2, 128)
mean_image (ImageSub, ImageMean, 3, 3)
binary_threshold (ImageMean, Cluster1, 'smooth_histo', 'dark', UsedThreshold)
difference (Foreground, Cluster1, Cluster2)
concat_obj (Cluster1, Cluster2, Cluster)
opening_circle (Cluster, Opening, 2.5)
smallest_rectangle1 (Opening, Row1, Column1, Row2, Column2)
WidthCluster := Column2 - Column1 + 1
if (WidthCluster[0] > WidthCluster[1])
select_obj (Opening, NumberRegion, 2)
else
select_obj (Opening, NumberRegion, 1)
endif
*
* Expand Numbers
*
closing_rectangle1 (NumberRegion, NumberCand, 1, 20)
difference (Image, NumberCand, NoNumbers)
connection (NumberRegion, NumberParts)
intensity (NumberParts, Green, MeanIntensity, Deviation)
expand_gray_ref (NumberParts, Green, NoNumbers, Numbers, 20, 'image', MeanIntensity, 48)
union1 (Numbers, NumberRegion)
connection (NumberRegion, Numbers)
*
* Fine tuning
*
fill_up_shape (Numbers, RegionFillUp, 'area', 1, 100)
opening_circle (RegionFillUp, FinalNumbersUnsorted, 3.5)
sort_region (FinalNumbersUnsorted, FinalNumbers, 'character', 'true', 'row')
dev_set_color ('blue')
dev_display (Image)
dev_display (FinalNumbers)
count_obj (FinalNumbers, NumNumbers)
union1 (FinalNumbers, NumberRegion)
difference (Image, NumberRegion, NoNumbers)
paint_region (NoNumbers, Green, ImageOCRRaw, 255, 'fill')
paint_region (NumberRegion, ImageOCRRaw, ImageOCR, 0, 'fill')
*
* OCR
*
*識別文字信息
*第一個參數是數字的像素區域
*第二個參數輸入是原圖像
*第三個模型句柄,即前面訓練出來的那個模型
*第五個輸出的識別出來的數字
*第六個參數是識別信任度
do_ocr_multi_class_mlp (FinalNumbers, ImageOCR, OCRHandle, RecChar, Confidence)
set_display_font (WindowID, 27, 'mono', 'true', 'false')
disp_message (WindowID, sum(RecChar), 'window', 32, 24, 'blue', 'false')
if (img < NumImages)
set_display_font (WindowID, 16, 'mono', 'true', 'false')
disp_continue_message (WindowID, 'black', 'true')
stop ()
endif
endfor
dev_update_window ('on')
用於識別文字的原圖像與識別信息
以上官方例程帶有一些干擾信息,其實對於文字訓練與識別是很簡單的。簡單分爲以下幾步:
訓練
1,分割出文字對應圖像區域,這裏需要主注意文字模型是要黑色背景白色字體才能訓練成功,如果是白色背景黑色像素區域訓練會失敗,具體原因暫不清楚。
2,用append_ocr_trainf ()函數把像素區域與對應的文字信息加到訓練文件中,此時還沒有開始訓練文件,trf文件只是圖像與文字的映射
3,create_ocr_class_mlp()函數創建訓練模型
4,trainf_ocr_class_mlp()函數開始訓練文件,得到omc文件,此文件即爲我們需要的神經網絡模型,可以存儲起來
識別
1,分割出文字對應圖像區域
2,讀取訓練好的omc文件
3,do_ocr_multi_class_mlp()或do_ocr_single_class_mlp()識別文字
下面是我自己寫的一個簡單訓練識別中文的例子,文字是手寫的
WindowHandle:=3600
dev_open_window (0, 0, 512, 512, 'black', WindowHandle)
gen_empty_obj (EmptyObject)
read_image (Image, 'C:/Users/Administrator/Desktop/2.PNG')
rgb1_to_gray (Image, GrayImage)
charcount:=6
for Index := 1 to charcount by 1
disp_message (WindowHandle, '請框選單個漢字區域,右鍵確認:','window', 12, 12, 'yellow', 'false')
**畫個矩形
draw_rectangle1 (WindowHandle, Row1, Column1, Row2, Column2)
**根據畫的矩形生成對應的矩形
gen_rectangle1 (Rectangle, Row1, Column1, Row2, Column2)
*裁出來
reduce_domain (GrayImage, Rectangle, ImageReduced1)
*閾值
threshold (ImageReduced1, Region1, 128, 255)
opening_circle (Region1, RegionOpening, 1.5)
*把區域加入到空變量中
concat_obj (EmptyObject, RegionOpening, EmptyObject)
endfor
*訓練的圖像對應的文字,注意順序,這裏從上到下,從左到右分別是蘇林其,蘇林其
words:=['蘇','林','其','蘇','林','其']
*區域排序,從左到右,從上到下
sort_region (EmptyObject, SortedRegions1, 'character', 'true', 'row')
for Index1:=1 to charcount by 1
select_obj (SortedRegions1, ObjectSelected1, Index1)
append_ocr_trainf (ObjectSelected1, Image, words[Index1-1], '2.trf')
endfor
read_ocr_trainf_names ('2.trf', CharacterNames, CharacterCount)
create_ocr_class_mlp (8, 10, 'constant', 'default', CharacterNames, 80, 'none', 10, 42, OCRHandle)
trainf_ocr_class_mlp (OCRHandle, '2.trf', 200, 1, 0.01, Error, ErrorLog)
write_ocr_class_mlp (OCRHandle, '2.omc')
*讀取一張測試圖
read_image (Image1, 'C:/Users/Administrator/Desktop/3.PNG')
*二值化
threshold (Image1, testwordregion, 200, 255)
*開始識別
read_ocr_class_mlp ('2.omc', OCRHandle1)
*識別單個
do_ocr_single_class_mlp (testwordregion, Image1, OCRHandle1,1, Class, Confidence)
*顯示結果
disp_message(WindowHandle, '中文識別結果:', 'image', 50, 50, 'white', 'false')
disp_message(WindowHandle, Class, 'image', 50, 200, 'red', 'false')
訓練原圖像
識別結果