import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
from PIL import Image
import pytesseract
im=Image.open('new_num.jpg')
gray=im.convert('L')
gray.show()
gray.save('new_num_gray.jpg')
threshold=150
table=[]
for i in range(256):
if i <threshold:
table.append(0)
else:
table.append(1)
out=gray.point(table,'1')
out.show()
out.save('new_num_thresholded.jpg')
th=Image.open('new_num_thresholded.jpg')
print(pytesseract.image_to_string(th))
看處理前後的3張圖
但是識別的結果 不是很準確。只識別出了‘2’。
然後又試了所有的參數,準確率就好了一丟丟。
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
from PIL import Image
import pytesseract
im=Image.open('new_num.jpg')
gray=im.convert('L')
gray.show()
gray.save('new_num_gray.jpg')
threshold=150
table=[]
for i in range(256):
if i <threshold:
table.append(0)
else:
table.append(1)
out=gray.point(table,'1')
out.show()
out.save('new_num_thresholded.jpg')
th=Image.open('new_num_thresholded.jpg')
for i in range(3,14):
str1='--psm '+str(i)+'--oem 3 -c tessedit_char_whitelist=0123456789'
print(pytesseract.image_to_string(th,config=str1))