使用 tesseract 技術,練習驗證碼識別技術
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pytesseract
import urllib
import urllib.request
from PIL import Image
url = 'https://so.gushiwen.org/RandCode.ashx'
urllib.request.urlretrieve(url=url,filename='./captcha.jpg')
image = Image.open('./captcha.jpg')
image.show()
# # 識別之前修改這張圖片TODO
def deal_captcha(image):
# 黑白的圖片
image = image.convert('L')
# 圖片中的數據二維數組[[209,156,……],[],[]]
data = image.load()
w,h= image.size
# 顏色範圍0~255
# 255純白
# 0純黑
for i in range(w):
for j in range(h):
if data[i,j] > 100:
data[i,j] = 255
else:
data[i,j] = 0
return image
image = deal_captcha(image)
str = pytesseract.image_to_string(image)
print(str)