端對端文字識別訓練集生成

借用了網上的方法修改了下,做個備份:

from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
import glob
import numpy as np
import os,re
import cv2


'''
1. 從文字庫隨機選擇10個字符
2. 生成圖片
3. 隨機使用函數
'''

# 從文字庫中隨機選擇n個字符
def sto_choice_from_info_str(quantity=10):
    start = random.randint(0, len(info_str)-11)
    end = start + 10
    random_word = info_str[start:end]

    return random_word

def random_word_color():
    font_color_choice = [[54,54,54],[54,54,54],[105,105,105]]
    font_color = random.choice(font_color_choice)

    noise = np.array([random.randint(0,10),random.randint(0,10),random.randint(0,10)])
    font_color = (np.array(font_color) + noise).tolist()

    #print('font_color:',font_color)

    return tuple(font_color)

# 生成一張圖片
def create_an_image(bground_path, width, height):
    bground_list = os.listdir(bground_path)
    bground_choice = random.choice(bground_list)
    bground = Image.open(bground_path+bground_choice)
    #print('background:',bground_choice)
    # print(bground.size[0],bground.size[1])
    x, y = random.randint(0,bground.size[0]-width), random.randint(0, bground.size[1]-height)
    bground = bground.crop((x, y, x+width, y+height))

    return bground

# 選取作用函數
def random_choice_in_process_func():
    pass

# 模糊函數
def darken_func(image):
    #.SMOOTH
    #.SMOOTH_MORE
    #.GaussianBlur(radius=2 or 1)
    # .MedianFilter(size=3)
    # 隨機選取模糊參數
    filter_ = random.choice(
                            [ImageFilter.SMOOTH,
                            ImageFilter.SMOOTH_MORE,
                            ImageFilter.GaussianBlur(radius=1.3)]
                            )
    image = image.filter(filter_)
    #image = img.resize((290,32))
    return image


# 隨機選取文字貼合起始的座標, 根據背景的尺寸和字體的大小選擇
def random_x_y(bground_size, font_size):
    width, height = bground_size
    #print(bground_size)
    # 爲防止文字溢出圖片,x,y要預留寬高
    x = random.randint(0, width-font_size*10)
    y = random.randint(0, int((height-font_size)/4))
    #y = height//2
    return x, y

def random_font_size():
    font_size = random.randint(20,26)
    return font_size

def random_font(font_path):
    font_list = os.listdir(font_path)
    random_font = random.choice(font_list)

    return font_path + random_font

#旋轉圖片
def createfont(fontlabels,fontsize,width,height):
    #for i, fts in enumerate(fontlabels):
    angle=random.randint(0,100)
    fx=random.randint(0,1)
    fnum=random.randint(0,len(fontlabels)-1)
    if fx==0:
        angle=angle/100
    else:
        angle=-angle/100
    center=(width/2,height/2)   #中心點
    M = cv2.getRotationMatrix2D(center, angle, 1)
    rotated = cv2.warpAffine(fontlabels[fnum], M, (width+fontsize, height+fontsize), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated


def readfile(filename):
    res = []
    with open(filename, 'r',encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line=re.sub('\n','',line)
            res.append(line)
    dic = {}
    for i,p in enumerate(res):
        dic[i] = p
    return dic

#獲取文字編碼
def getnum(words,dic):
    pnums=''
    for i in range(len(words)):
        for p in dic:
            if words[i]==dic[p]:
                pnums=pnums+str(p)+' '
    return pnums.strip()


def main(save_path, num, file,dic):
    mw=280
    mh=150
    # 隨機選取10個字符
    random_word = sto_choice_from_info_str(10)
    # 生成一張背景圖片,已經剪裁好,寬高爲32*280
    raw_image = create_an_image('./background/', mw, mh)
    # 隨機選取字體大小
    font_size = random_font_size()
    # 隨機選取字體
    font_name = random_font('./font/')
    # 隨機選取字體顏色
    font_color = random_word_color()

    # 隨機選取文字貼合的座標 x,y
    draw_y = (mh-font_size)//2
    # 將文本貼到背景圖片
    font = ImageFont.truetype(font_name, font_size)
    draw = ImageDraw.Draw(raw_image)
    draw.text((10, draw_y), random_word, fill=font_color, font=font)

    # 隨機選取作用函數和數量作用於圖片
    #random_choice_in_process_func()
    raw_image = darken_func(raw_image)
    output=createfont([np.array(raw_image)],font_size,mw-font_size,mh-font_size)
    #raw_image = raw_image.rotate(1)
    # 保存文本信息和對應圖片名稱
    random_word=getnum(random_word,dic)
    #with open(save_path[:-1]+'.txt', 'a+', encoding='utf-8') as file:
    file.write('' + str(num)+ '.png ' + random_word + '\n')
    outputlast = output[draw_y-1:draw_y+31,0:mw]
    cv2.imencode('.png', outputlast)[1].tofile(save_path+str(num)+'.png')

if __name__ == '__main__':
    rint = 0.1
    dic = readfile('char_std_5990.txt')
    # 處理具有工商信息語義信息的語料庫,去除空格等不必要符號
    with open('info.txt', 'r', encoding='utf-8') as file:
        info_list = [part.strip().replace('\t', '') for part in file.readlines()]
        info_str = ''.join(info_list)
    # 圖片標籤
    file  = open('data_set/val_set.txt', 'a+', encoding='utf-8')
    file2 = open('data_set/train_set.txt', 'a+', encoding='utf-8')
    total = 400000
    for num in range(0,total):
        numname='{}_{}'.format(random.randint(10000,99999999),10000000+num)
        if num<total*rint:
            main('data_set/val_set/', numname, file,dic)
        else:
            main('data_set/train_set/', numname, file2, dic)
        if num % 1000 == 0:
            print('[%d/%d]'%(num,total))
    file.close()


效果如下:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章