使用Python提取身份證上的信息

import pytesseract
import cv2
import matplotlib.pyplot as plt
import dlib
import matplotlib.patches as mpatches
from skimage import io,draw,transform,color
import numpy as np
import pandas as pd
import re

detector = dlib.get_frontal_face_detector()
image = io.imread("img-0.png")
dets = detector(image, 2) #使用detector進行人臉檢測 dets爲返回的結果
## 將識別的圖像可視化
plt.figure()
ax = plt.subplot(111)
# ax.imshow(image)
plt.axis("off")
for i, face in enumerate(dets):
# 在圖片中標註人臉，並顯示
left = face.left()
top = face.top()
right = face.right()
bottom = face.bottom()
rect = mpatches.Rectangle((left,bottom), right - left, top - bottom,
fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)
plt.show()

predictor = dlib.shape_predictor("shape_predictor_5_face_landmarks.dat")
detected_landmarks = predictor(image, dets[0]).parts()
landmarks = np.array([[p.x, p.y] for p in detected_landmarks])
## 將眼睛位置可視化
# plt.figure()
# ax = plt.subplot(111)
# ax.imshow(image)
# plt.axis("off")
# plt.plot(landmarks[0:4,0],landmarks[0:4,1],'ro')
# for ii in np.arange(4):
# plt.text(landmarks[ii,0]-10,landmarks[ii,1]-15,ii)
# plt.show()

## 計算眼睛的傾斜角度,逆時針角度
def twopointcor(point1,point2):
"""point1 = (x1,y1),point2 = (x2,y2)"""
deltxy = point2 - point1
corner = np.arctan(deltxy[1] / deltxy[0]) * 180 / np.pi
return corner

## 計算多個角度求均值
corner10 = twopointcor(landmarks[1,:],landmarks[0,:])
corner23 = twopointcor(landmarks[3,:],landmarks[2,:])
corner20 = twopointcor(landmarks[2,:],landmarks[0,:])
corner = np.mean([corner10,corner23,corner20])
# print(corner10)
# print(corner23)
# print(corner20)
# print(corner)

## 計算圖像的身份證傾斜的角度
def IDcorner(landmarks):
"""landmarks:檢測的人臉5個特徵點
經過測試使用第0個和第2個特徵點計算角度較合適
"""
corner20 = twopointcor(landmarks[2,:],landmarks[0,:])
corner = np.mean([corner20])
return corner
corner = IDcorner(landmarks)
# print(corner)

## 將照片轉正
def rotateIdcard(image):
"image :需要處理的圖像"
## 使用dlib.get_frontal_face_detector識別人臉
detector = dlib.get_frontal_face_detector()
dets = detector(image, 2) #使用detector進行人臉檢測 dets爲返回的結果
## 檢測人臉的眼睛所在位置
predictor = dlib.shape_predictor("shape_predictor_5_face_landmarks.dat")
detected_landmarks = predictor(image, dets[0]).parts()
landmarks = np.array([[p.x, p.y] for p in detected_landmarks])
corner = IDcorner(landmarks)
## 旋轉後的圖像
image2 = transform.rotate(image,corner,clip=False)
image2 = np.uint8(image2*255)
## 旋轉後人臉位置
det = detector(image2, 2)
return image2,det

## 轉正身份證：
image = io.imread("img-0.png")
image2,dets = rotateIdcard(image)

## 可視化修正後的結果
plt.figure()
ax = plt.subplot(111)
# ax.imshow(image2)
plt.axis("off")
# 在圖片中標註人臉，並顯示
left = dets[0].left()
top = dets[0].top()
right = dets[0].right()
bottom = dets[0].bottom()
rect = mpatches.Rectangle((left,bottom), (right - left), (top - bottom),
fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)

## 照片的位置（不怎麼精確）
width = right - left
high = top - bottom
left2 = np.uint(left - 0.5*width)
bottom2 = np.uint(bottom + 0.5*width)
rect = mpatches.Rectangle((left2,bottom2), 1.8*width, 2.2*high,
fill=False, edgecolor='blue', linewidth=1)
ax.add_patch(rect)
plt.show()

## 身份證上人的照片
top2 = np.uint(bottom2+2.2*high)
right2 = np.uint(left2+1.8*width)
image3 = image2[top2:bottom2,left2:right2,:]
# plt.imshow(image3)
plt.axis("off")
plt.show()
# cv2.imshow('image3',image3)
# cv2.waitKey()

# ## 對圖像進行處理，轉化爲灰度圖像=>二值圖像
# imagegray = cv2.cvtColor(image2,cv2.COLOR_RGB2GRAY)
# cv2.imshow('imagegray',imagegray)
#
# cv2.waitKey()
# retval, imagebin = cv2.threshold(imagegray, 120, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)
# ## 將照片去除
# imagebin[0:bottom2,left2:-1] = 255
# # 高斯雙邊濾波
# img_bilateralFilter = cv2.bilateralFilter(imagebin, 40, 75, 75)
#
# cv2.imshow('img_bilateralFilter',img_bilateralFilter)
# cv2.waitKey()
# # plt.imshow(img_bilateralFilter,cmap=plt.cm.gray)
# #
# # plt.axis("off")
# # plt.show()

img=cv2.imread('img-0.png') #打開圖片
gray=cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY) #灰度處理
# cv2.imshow('gray', gray)
retval, imagebin = cv2.threshold(gray, 50, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)
## 將照片去除
imagebin[0:bottom2,left2:-1] = 255
img_bilateralFilter = cv2.bilateralFilter(imagebin, 40, 100, 100) # 高斯雙邊濾波

cv2.namedWindow("img_bilateralFilter", cv2.WINDOW_NORMAL)
cv2.imshow('img_bilateralFilter', img_bilateralFilter)

cv2.waitKey(0)
本文章主要利用pytesseract，dlib，opencv3等庫提取身份證上的信息，主要分爲文字信息和照片信息。首先加載所需要的庫：

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import pytesseract
import cv2
import matplotlib.pyplot as plt
import dlib
import matplotlib.patches as mpatches
from skimage import io,draw,transform,color
import numpy as np
import pandas as pd
import re
針對所需要識別的身份證照片，可能會存在身份證圖像傾斜的情況，所以要對照片進行旋轉修正。主要通過dlib庫識別人臉，找到人臉眼睛特徵點，計算眼睛的傾斜角度，然後對照片進行旋轉。

## 使用dlib.get_frontal_face_detector識別人臉
detector = dlib.get_frontal_face_detector()
image = io.imread("奧巴馬2.jpeg")
dets = detector(image, 2) #使用detector進行人臉檢測 dets爲返回的結果
## 將識別的圖像可視化
plt.figure()
ax = plt.subplot(111)
ax.imshow(image)
plt.axis("off")
for i, face in enumerate(dets):
# 在圖片中標註人臉，並顯示
left = face.left()
top = face.top()
right = face.right()
bottom = face.bottom()
rect = mpatches.Rectangle((left,bottom), right - left, top - bottom,
fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)
plt.show()
得到的結果如下：

找到人臉後，尋找眼睛特徵點：

## 檢測人臉的眼睛所在位置
predictor = dlib.shape_predictor("shape_predictor_5_face_landmarks.dat")
detected_landmarks = predictor(image, dets[0]).parts()
landmarks = np.array([[p.x, p.y] for p in detected_landmarks])
## 將眼睛位置可視化
plt.figure()
ax = plt.subplot(111)
ax.imshow(image)
plt.axis("off")
plt.plot(landmarks[0:4,0],landmarks[0:4,1],'ro')
for ii in np.arange(4):
plt.text(landmarks[ii,0]-10,landmarks[ii,1]-15,ii)
plt.show()

可以發現有四個特徵點被找到，計算特徵點之間逆時針旋轉的傾斜角度：

## 計算多個角度求均值
corner10 = twopointcor(landmarks[1,:],landmarks[0,:])
corner23 = twopointcor(landmarks[3,:],landmarks[2,:])
corner20 = twopointcor(landmarks[2,:],landmarks[0,:])
corner = np.mean([corner10,corner23,corner20])
print(corner10)
print(corner23)
print(corner20)
print(corner)

-9.865806943084369
-7.765166018425334
-10.049348588124873
-9.226773849878192
經過驗證，計算第2個和第0個特徵點的傾斜較合適。

-10.049348588124873
接下來是將照片旋轉：

## 轉正身份證：
image = io.imread("奧巴馬2.jpeg")
image2,dets = rotateIdcard(image)

## 可視化修正後的結果
plt.figure()
ax = plt.subplot(111)
ax.imshow(image2)
plt.axis("off")
# 在圖片中標註人臉，並顯示
left = dets[0].left()
top = dets[0].top()
right = dets[0].right()
bottom = dets[0].bottom()
rect = mpatches.Rectangle((left,bottom), (right - left), (top - bottom),
fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)

## 照片的位置（不怎麼精確）
width = right - left
high = top - bottom
left2 = np.uint(left - 0.3*width)
bottom2 = np.uint(bottom + 0.4*width)
rect = mpatches.Rectangle((left2,bottom2), 1.6*width, 1.8*high,
fill=False, edgecolor='blue', linewidth=1)
ax.add_patch(rect)
plt.show()

提取照片上的頭像：

## 身份證上人的照片
top2 = np.uint(bottom2+1.8*high)
right2 = np.uint(left2+1.6*width)
image3 = image2[top2:bottom2,left2:right2,:]
plt.imshow(image3)
plt.axis("off")
plt.show()

身份證經過轉正後，下面通過pytesseract庫直接識別上面的文字信息，查看效果：

## 可以通過pytesseract庫來查看檢測效果，但是結果並不是很好
text = pytesseract.image_to_string(image2,lang='chi_sim')
print(text)

町名奧巴馬

懂趴男炅濂肯尼亞
瑙藿 1961篆8坷 4H

‖ 剛華盛頓特區宜賓法尼亞
大道160o號白官

_二薹

儉民鼻份號蠍 1 234561 961 08047890
結果不是很好，主要原因是干擾信息太多，而且包含兩種大小不同的字體，下面將圖像轉化爲二值圖像，再次識別：

## 對圖像進行處理，轉化爲灰度圖像=>二值圖像
imagegray = cv2.cvtColor(image2,cv2.COLOR_RGB2GRAY)
retval, imagebin = cv2.threshold(imagegray, 120, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)
## 將照片去除
imagebin[0:bottom2,left2:-1] = 255
plt.imshow(imagebin,cmap=plt.cm.gray)
plt.axis("off")
plt.show()

## 再次通過pytesseract庫來查看檢測效果，但是結果並不是很好
text = pytesseract.image_to_string(imagebin,lang='chi_sim')
print(text)

奧巴馬
男「肯尼亞
1961 8 4)

華盛頓特區宜賓法尼亞
大道1600號白宮

1 234561 961 08047890

`
這次的識別效果好很多，對識別結果進行處理：

textlist = text.split("\n")
textdf = pd.DataFrame({"text":textlist})
textdf["textlen"] = textdf.text.apply(len)
## 去除長度《＝1的行
textdf = textdf[textdf.textlen > 1].reset_index(drop = True)
textdf

text   textlen
0   奧巴馬   3
1   男「肯尼亞   7
2   1961 8 4)   9
3   華盛頓特區宜賓法尼亞   10
4   大道1600號白宮   9
5   1 234561 961 08047890   21
提取更詳細的信息：

## 提取相應的信息
print("姓名:",textdf.text[0])
print("=====================")
print("性別:",textdf.text[1].split(" ")[0])
print("=====================")
print("民族:",textdf.text[1].split(" ")[-1])
print("=====================")
yearnum = textdf.text[2].split(" ")[0] ## 提取數字
yearnum = re.findall("\d+",yearnum)[0]
print("出生年:",yearnum)
print("=====================")
monthnum = textdf.text[2].split(" ")[1] ## 提取數字
monthnum = re.findall("\d+",monthnum)[0]
print("出生月:",monthnum)
print("=====================")
daynum = textdf.text[2].split(" ")[2] ## 提取數字
daynum = re.findall("\d+",daynum)[0]
print("出生日:",daynum)
print("=====================")
IDnum = textdf.text.values[-1]
if (len(IDnum) > 18): ## 去除不必要的空格
IDnum = IDnum.replace(" ","")
print("公民身份證號:",IDnum)
print("=====================")
## 獲取地址，因爲地址可能會是多行
desstext = textdf.text.values[3:(textdf.shape[0] - 1)]
print("地址:","".join(desstext))
print("=====================")

姓名: 奧巴馬
=====================
性別: 男
=====================
民族: 肯尼亞
=====================
出生年: 1961
=====================
出生月: 8
=====================
出生日: 4
=====================
公民身份證號: 123456196108047890
=====================
地址: 華盛頓特區宜賓法尼亞大道1600號白宮
=====================
對整個提取信息過程定義一個函數

## 定義身份證識別函數
def Idcard_im2str(image,threshod = 120):
## 轉正身份證：
image2,dets = rotateIdcard(image)
## 提取照片的頭像
# 在圖片中標註人臉，並顯示
left = dets[0].left()
top = dets[0].top()
right = dets[0].right()
bottom = dets[0].bottom()
## 照片的位置（不怎麼精確）
width = right - left
high = top - bottom
left2 = np.uint(left - 0.3*width)
bottom2 = np.uint(bottom + 0.4*width)
## 身份證上人的照片
top2 = np.uint(bottom2+1.8*high)
right2 = np.uint(left2+1.6*width)
## [(left2,bottom2),(top2,right2)]
rectangle = [(left2,bottom2),(top2,right2)]
imageperson = image2[top2:bottom2,left2:right2,:]
## 對圖像進行處理，轉化爲灰度圖像=>二值圖像
imagegray = cv2.cvtColor(image2,cv2.COLOR_RGB2GRAY)
retval, imagebin = cv2.threshold(imagegray, threshod, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)
## 將照片去除
imagebin[0:bottom2,left2:-1] = 255
## 通過pytesseract庫來查看檢測效果，但是結果並不是很好
text = pytesseract.image_to_string(imagebin,lang='chi_sim')
textlist = text.split("\n")
textdf = pd.DataFrame({"text":textlist})
textdf["textlen"] = textdf.text.apply(len)
## 去除長度《＝1的行
textdf = textdf[textdf.textlen > 1].reset_index(drop = True)
return image2,dets,rectangle,imagebin,textdf
調用函數，察看結果：

## 識別身份證的信息
image = io.imread("奧巴馬2.jpeg")
image2,dets,rectangle,imagebin,textdf = Idcard_im2str(image,threshod = 120)

## 對識別的信息進行可視化查看
plt.figure(figsize=(12,8))
## 原始圖像
plt.subplot(2,2,1)
plt.imshow(image)
plt.axis("off")
## 修正後圖像
ax = plt.subplot(2,2,2)
ax.imshow(image2)
plt.axis("off")
# 在圖片中標註人臉，並顯示
left = dets[0].left()
top = dets[0].top()
right = dets[0].right()
bottom = dets[0].bottom()
rect = mpatches.Rectangle((left,bottom), (right - left), (top - bottom),
fill=False, edgecolor='red', linewidth=1)
ax.add_patch(rect)

## 照片的位置（不怎麼精確）rectangle = [(left2,bottom2),(top2,right2)]
width = rectangle[1][1] - rectangle[0][0]
high = rectangle[1][0] - rectangle[0][1]
left2 = rectangle[0][0]
bottom2 = rectangle[0][1]
rect = mpatches.Rectangle((left2,bottom2), width, high,
fill=False, edgecolor='blue', linewidth=1)
ax.add_patch(rect)

## 顯示人的頭像
plt.subplot(2,2,3)
## 身份證上人的照片
top2 = bottom2+high
right2 = left2+width
image3 = image2[top2:bottom2,left2:right2,:]
plt.imshow(image3)
plt.axis("off")
## 顯示而值化圖像
plt.subplot(2,2,4)
plt.imshow(imagebin,cmap=plt.cm.gray)
plt.axis("off")
plt.show()

使用Python提取身份證上的信息

DAPPER 事務 TRANSACTION

Java中線程的創建方式

一鍵自動化博客發佈工具,chrome和firfox詳細配置

bootstrap模態框模態框以外鼠標單擊不關閉

bootstrap datepicker

springboot讀取配置文件到靜態工具類

百度雲2核4g團購鏈接

springboot activemq 連接池

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結