1.候選框分組
2. 確定每個分組的座標
1.候選框的分組:
im_size[1]:傳入圖片的寬度
box[0]:細粒度候選窗口的左上角橫座標
boxes_table:把所有左上角橫座標相同候選窗口放在一塊,方便下面根據左上角候選窗口的橫座標等位候選窗口的索引。
boxes_table=[[] for _ in range(self.im_size[1])]
for index, box in enumerate(text_proposals):
boxes_table[int(box[0])].append(index)
self.boxes_table=boxes_table
graph:方形矩陣,裏面元素值爲bool類型,行和列均爲候選敞口的個數。初始化爲0,如果兩個候選窗口相關聯設置爲1。
graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool)#text_proposals.shape[0]proposal的個數
get_successions(index):找到跟index候選窗口相關聯的所有候選窗口
succession_index:這些候選窗口中得分最大的那個。
is_succession_node(index, succession_index):跟一個窗口相關聯的候選串口可能有很多,錢向搜索只保留最大分值的那個。
for index, box in enumerate(text_proposals):
#successions proposals的num
successions=self.get_successions(index)
if len(successions)==0:
continue
succession_index=successions[np.argmax(scores[successions])]
if self.is_succession_node(index, succession_index):
# NOTE: a box can have multiple successions(precursors) if multiple successions(precursors)
# have equal scores.
graph[index, succession_index]=True
return Graph(graph)
前面調用的get_successions(index)函數。找到最靠近該候選窗口的一組候選窗口。meet_v_iou爲比較的方法。
def get_successions(self, index):
box=self.text_proposals[index]
results=[]
for left in range(int(box[0])+1, min(int(box[0])+TextLineCfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])):
adj_box_indices=self.boxes_table[left]
for adj_box_index in adj_box_indices:
if self.meet_v_iou(adj_box_index, index):
results.append(adj_box_index)
if len(results)!=0:
return results
return results
size_similarity(index1, index2):比較兩個窗口的高度之比。
overlaps_v(index1, index2):高度上重合部分與短高之比。
def meet_v_iou(self, index1, index2):
def overlaps_v(index1, index2):
h1=self.heights[index1]
h2=self.heights[index2]
y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1])
y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3])
return max(0, y1-y0+1)/min(h1, h2)
def size_similarity(index1, index2):
h1=self.heights[index1]
h2=self.heights[index2]
return min(h1, h2)/max(h1, h2)
return overlaps_v(index1, index2)>=TextLineCfg.MIN_V_OVERLAPS and \
size_similarity(index1, index2)>=TextLineCfg.MIN_SIZE_SIM#TextLineCfg.MIN_V_OVERLAPS 0.6TextLineCfg.MIN_SIZE_SIM 0.6
is_succession_node(index, succession_index)前面所調用的函數。與get_successions(index)同理。
def is_succession_node(self, index, succession_index):
precursors=self.get_precursors(succession_index)
if self.scores[index]>=np.max(self.scores[precursors]):
return True
return False
def get_precursors(self, index):
box=self.text_proposals[index]
results=[]
for left in range(int(box[0])-1, max(int(box[0]-TextLineCfg.MAX_HORIZONTAL_GAP), 0)-1, -1):
adj_box_indices=self.boxes_table[left]
for adj_box_index in adj_box_indices:
if self.meet_v_iou(adj_box_index, index):
results.append(adj_box_index)
if len(results)!=0:
return results
return results
2. 確定每個分組的座標
def get_text_lines(self, text_proposals, scores, im_size):
"""
text_proposals:boxes
"""
# tp=text proposal
tp_groups=self.group_text_proposals(text_proposals, scores, im_size)#首先還是建圖,獲取到文本行由哪幾個小框構成
text_lines=np.zeros((len(tp_groups), 8), np.float32)
for index, tp_indices in enumerate(tp_groups):
text_line_boxes=text_proposals[list(tp_indices)]#每個文本行的全部小框
X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2# 求每一個小框的中心x,y座標
Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
z1 = np.polyfit(X,Y,1)#多項式擬合,根據之前求的中心店擬合一條直線(最小二乘)
x0=np.min(text_line_boxes[:, 0])#文本行x座標最小值
x1=np.max(text_line_boxes[:, 2])#文本行x座標最大值
offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5#小框寬度的一半
# 以全部小框的左上角這個點去擬合一條直線,然後計算一下文本行x座標的極左極右對應的y座標
lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)#爲什麼加減offset
# 以全部小框的左下角這個點去擬合一條直線,然後計算一下文本行x座標的極左極右對應的y座標
lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)
score=scores[list(tp_indices)].sum()/float(len(tp_indices))#求全部小框得分的均值作爲文本行的均值
text_lines[index, 0]=x0
text_lines[index, 1]=min(lt_y, rt_y)#文本行上端 線段 的y座標的小值
text_lines[index, 2]=x1
text_lines[index, 3]=max(lb_y, rb_y)#文本行下端 線段 的y座標的大值
text_lines[index, 4]=score#文本行得分
text_lines[index, 5]=z1[0]#根據中心點擬合的直線的k,b
text_lines[index, 6]=z1[1]
height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )#小框平均高度
text_lines[index, 7]= height + 2.5
text_recs = np.zeros((len(text_lines), 9), np.float32)
index = 0
for line in text_lines:
b1 = line[6] - line[7] / 2 # 根據高度和文本行中心線,求取文本行上下兩條線的b值
b2 = line[6] + line[7] / 2
x1 = line[0]
y1 = line[5] * line[0] + b1 # 左上
x2 = line[2]
y2 = line[5] * line[2] + b1 # 右上
x3 = line[0]
y3 = line[5] * line[0] + b2 # 左下
x4 = line[2]
y4 = line[5] * line[2] + b2 # 右下
disX = x2 - x1
disY = y2 - y1
width = np.sqrt(disX * disX + disY * disY) # 文本行寬度
fTmp0 = y3 - y1 # 文本行高度
fTmp1 = fTmp0 * disY / width
x = np.fabs(fTmp1 * disX / width) # 做補償
y = np.fabs(fTmp1 * disY / width) #爲什麼這樣做補償求解釋?
if line[5] < 0:
x1 -= x
y1 += y
x4 += x
y4 -= y
else:
x2 += x
y2 += y
x3 -= x
y3 -= y
text_recs[index, 0] = x1
text_recs[index, 1] = y1
text_recs[index, 2] = x2
text_recs[index, 3] = y2
text_recs[index, 4] = x3
text_recs[index, 5] = y3
text_recs[index, 6] = x4
text_recs[index, 7] = y4
text_recs[index, 8] = line[4]
index = index + 1
text_recs=clip_boxes(text_recs, im_size)
return text_recs