Github復現之tiramisu(提拉米蘇,甜甜的網絡你喜歡嗎)

鏈接:https://github.com/bfortuner/pytorch_tiramisu
網絡原本是多分類的,而且原始數據輸入函數都是爲CamVid這樣的公開數據集定製的,要用自己的數據還是要花點時間的,我這裏是改成了自己的數據輸入且爲二分類。
環境CUDA10.0 CUDNN7.6.0 pytorch1.2.0(環境可以先不改,試試看能不能行,換環境太麻煩),顯存12G
提示:項目提供的網絡有三種複雜度FCDenseNet57、FCDenseNet67、FCDenseNet103,詳情見/models/tiramisu.py文件,由於機器限制,這裏試了FCDenseNet67,網絡參數有點多,圖像大小256*256,batchsize=2
覺得有幫助一定要點贊喔,不要白嫖!
精度評定:
acc: 0.9275658925374349
acc_cls: 0.8512592612858325
iou: [0.91613816 0.65293152]
miou: 0.7845348416766565
fwavacc: 0.8669676138438737
class_accuracy: 0.7215300562956269
class_recall: 0.6494413368605654
accuracy: 0.850057297858639
f1_score: 0.6835904041249753
注意下,這是初步的改好的訓練情況,只訓練了20個epoch
數據鏈接:https://pan.baidu.com/s/17I831_hpnfEOEMFzthKChg
提取碼:kgep
結果展示:
原圖
原圖
標籤
標籤
預測結果
預測結果
數據存放結構:
訓練、驗證、測試均一致,圖像和標籤都是裁剪標號相同的情況下圖像加_sat標籤加_mask
數據存放結構
參考trainUnet.ipynb文件,以及上一篇的數據輸入,本次的train.py文件如下

import os
import time
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from models import tiramisu
from datasets import camvid
from datasets import joint_transforms
import utils.imgs
import utils.training as train_utils
from utils.data import ImageFolder

batch_size = 2

train_root = 'D:/pytorch_tiramisu-master/data/Build256/train/'
imagelist = filter(lambda x: x.find('sat')!=-1, os.listdir(train_root))
trainlist = map(lambda x: x[:-8], imagelist)
trainlist = list(trainlist)

val_root = 'D:/pytorch_tiramisu-master/data/Build256/val/'
imagelist = filter(lambda x: x.find('sat')!=-1, os.listdir(val_root))
vallist = map(lambda x: x[:-8], imagelist)
vallist = list(vallist)

train_dataset = ImageFolder(trainlist, train_root)
val_dataset = ImageFolder(vallist, val_root)

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False)


# inputs, targets = next(iter(train_loader))


LR = 1e-4
LR_DECAY = 0.995
DECAY_EVERY_N_EPOCHS = 1
N_EPOCHS = 20
torch.cuda.manual_seed(0)

model = tiramisu.FCDenseNet67(n_classes=2).cuda()
model.apply(train_utils.weights_init)
optimizer = torch.optim.RMSprop(model.parameters(), lr=LR, weight_decay=1e-4)
# criterion = nn.NLLLoss2d(weight=camvid.class_weight.cuda()).cuda()  #  原始的loss函數
criterion = nn.CrossEntropyLoss().cuda()  #改用交叉熵loss

for epoch in range(1, N_EPOCHS+1):
    since = time.time()

    ### Train ###
    trn_loss, trn_err = train_utils.train(
        model, train_loader, optimizer, criterion, epoch)
    print('Epoch {:d}: Train - Loss: {:.4f}, Acc: {:.4f}'.format(
        epoch, trn_loss, 1-trn_err))    
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    ### Test ###
    val_loss, val_err = train_utils.test(model, val_loader, criterion, epoch)    
    print('Val - Loss: {:.4f} | Acc: {:.4f}'.format(val_loss, 1-val_err))
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(
        time_elapsed // 60, time_elapsed % 60))
    
    ### Checkpoint ###  
    train_utils.save_weights(model, epoch, val_loss, val_err)

    ### Adjust Lr ###
    train_utils.adjust_learning_rate(LR, LR_DECAY, optimizer, 
                                     epoch, DECAY_EVERY_N_EPOCHS)

data.py文件,用於給網絡輸入數據,數據沒做擴充,想擴充的參考上一篇的數據輸入文件,裏面有數據擴充的函數,該文件放在utils文件夾。

import os
import cv2
import numpy as np
import torch
import torch.utils.data as data

def data_loader(id, root):
	img = cv2.imread(os.path.join(root, '{}_sat.png').format(id))
	mask = cv2.imread(os.path.join(root, '{}_mask.png').format(id),0)
	mask = np.expand_dims(mask, axis=2)
	mask[mask>=0.5] = 1
	mask[mask<=0.5] = 0
	img = np.array(img, np.float32).transpose(2,0,1)
	mask = np.array(mask, np.float32).transpose(2,0,1)
	return img, mask

class ImageFolder(data.Dataset):
    def __init__(self, trainlist, root):
        self.ids = trainlist
        self.loader = data_loader
        self.root = root

    def __getitem__(self, index):
        id = list(self.ids)[index]
        img, mask = self.loader(id, self.root)
        img = torch.Tensor(img)
        mask = torch.Tensor(mask)
        return img, mask

    def __len__(self):
        return len(list(self.ids))

需要修改的文件./utils/training.py

import os
import sys
import math
import string
import random
import shutil
import time
from tqdm import tqdm
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.autograd import Variable
import torch.nn.functional as F

from . import imgs as img_utils

RESULTS_PATH = '../results/'
WEIGHTS_PATH = '../weights/'


def save_weights(model, epoch, loss, err):
    weights_fname = 'weights-%d-%.3f-%.3f.pth' % (epoch, loss, err)
    weights_fpath = os.path.join(WEIGHTS_PATH, weights_fname)
    torch.save({
            'startEpoch': epoch,
            'loss':loss,
            'error': err,
            'state_dict': model.state_dict()
        }, weights_fpath)
    shutil.copyfile(weights_fpath, WEIGHTS_PATH+'latest.th')

def load_weights(model, fpath):
    print("loading weights '{}'".format(fpath))
    weights = torch.load(fpath)
    startEpoch = weights['startEpoch']
    model.load_state_dict(weights['state_dict'])
    print("loaded weights (lastEpoch {}, loss {}, error {})"
          .format(startEpoch-1, weights['loss'], weights['error']))
    return startEpoch

def get_predictions(output_batch):
    bs,c,h,w = output_batch.size()
    tensor = output_batch.data
    values, indices = tensor.cpu().max(1)
    indices = indices.view(bs,h,w)
    return indices

def error(preds, targets):
    assert preds.size() == targets.size()
    bs,h,w = preds.size()
    n_pixels = bs*h*w
    incorrect = preds.ne(targets).cpu().sum()
    err = incorrect/n_pixels
    return round(err.item(),5)

def train(model, trn_loader, optimizer, criterion, epoch): #這裏修改很多,主要是用於調整爲二分類的相關代碼,還有進度條的優化
    model.train()
    trn_loss = 0
    trn_error = 0
    try:
        with tqdm(trn_loader,ncols=10) as t:
            for idx, data in enumerate(t):
                inputs = Variable(data[0].cuda())
                targets = Variable(data[1].cuda())
                optimizer.zero_grad()
                output = model(inputs)
                targets = targets.type(torch.cuda.LongTensor)
                targets1 = targets.squeeze(1)
                loss = criterion(output, targets1)
                loss.backward()
                optimizer.step()
                trn_loss += loss.item()
                pred = get_predictions(output)
                targets = torch.squeeze(targets,1)
                targets = targets.type(torch.cuda.LongTensor)
                trn_error += error(pred, targets.data.cpu())
    except KeyboardInterrupt:
        t.close()
        raise
    t.close()

    trn_loss /= len(trn_loader)
    trn_error /= len(trn_loader)
    return trn_loss, trn_error

def test(model, test_loader, criterion, epoch=1): #這裏修改很多,主要是用於二分類的調整
    model.eval()
    test_loss = 0
    test_error = 0
    try:
        with tqdm(test_loader,ncols=10) as t:
            for data, target in t:
                data = Variable(data.cuda(), volatile=True)
                target = Variable(target.cuda())
                output = model(data)
                target = target.type(torch.cuda.LongTensor)
                target1 = target.squeeze(1)
                test_loss += criterion(output, target1).item()
                pred = get_predictions(output)
                target = torch.squeeze(target,1)
                target = target.type(torch.cuda.LongTensor)
                test_error += error(pred, target.data.cpu())
    except KeyboardInterrupt:
        t.close()
        raise
    t.close()

    test_loss /= len(test_loader)
    test_error /= len(test_loader)
    return test_loss, test_error

def adjust_learning_rate(lr, decay, optimizer, cur_epoch, n_epochs):
    """Sets the learning rate to the initially
        configured `lr` decayed by `decay` every `n_epochs`"""
    new_lr = lr * (decay ** (cur_epoch // n_epochs))
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_uniform(m.weight)
        m.bias.data.zero_()

def predict(model, input_loader, n_batches=1):
    # input_loader.batch_size = 1
    predictions = []
    pre = []
    model.eval()
    for input, target in input_loader:
        data = Variable(input.cuda(), volatile=True)
        label = Variable(target.cuda())
        output = model(data)
        pred = get_predictions(output)
        pre.append(pred)
        # predictions.append([input,target,pred]) #這裏註釋了
    # return predictions #這裏註釋了
    return pre

def view_sample_predictions(model, loader, n):
    inputs, targets = next(iter(loader))
    data = Variable(inputs.cuda(), volatile=True)
    label = Variable(targets.cuda())
    output = model(data)
    pred = get_predictions(output)
    batch_size = inputs.size(0)
    for i in range(min(n, batch_size)):
        img_utils.view_image(inputs[i])
        img_utils.view_annotated(targets[i])
        img_utils.view_annotated(pred[i])

模型文件有小小的改動./models/tiramisu.py,主要是二分類相關的地方,改的很少,但還是貼出來吧

import torch
import torch.nn as nn
#import torch.nn.functional as F
from .layers import *

class FCDenseNet(nn.Module):
    def __init__(self, in_channels=3, down_blocks=(5,5,5,5,5),
                 up_blocks=(5,5,5,5,5), bottleneck_layers=5,
                 growth_rate=16, out_chans_first_conv=48, n_classes=12):
        super().__init__()
        self.down_blocks = down_blocks
        self.up_blocks = up_blocks
        cur_channels_count = 0
        skip_connection_channel_counts = []

        ## First Convolution ##

        self.add_module('firstconv', nn.Conv2d(in_channels=in_channels,
                  out_channels=out_chans_first_conv, kernel_size=3,
                  stride=1, padding=1, bias=True))
        cur_channels_count = out_chans_first_conv

        #####################
        # Downsampling path #
        #####################

        self.denseBlocksDown = nn.ModuleList([])
        self.transDownBlocks = nn.ModuleList([])
        for i in range(len(down_blocks)):
            self.denseBlocksDown.append(
                DenseBlock(cur_channels_count, growth_rate, down_blocks[i]))
            cur_channels_count += (growth_rate*down_blocks[i])
            skip_connection_channel_counts.insert(0,cur_channels_count)
            self.transDownBlocks.append(TransitionDown(cur_channels_count))

        #####################
        #     Bottleneck    #
        #####################

        self.add_module('bottleneck',Bottleneck(cur_channels_count,
                                     growth_rate, bottleneck_layers))
        prev_block_channels = growth_rate*bottleneck_layers
        cur_channels_count += prev_block_channels

        #######################
        #   Upsampling path   #
        #######################

        self.transUpBlocks = nn.ModuleList([])
        self.denseBlocksUp = nn.ModuleList([])
        for i in range(len(up_blocks)-1):
            self.transUpBlocks.append(TransitionUp(prev_block_channels, prev_block_channels))
            cur_channels_count = prev_block_channels + skip_connection_channel_counts[i]

            self.denseBlocksUp.append(DenseBlock(
                cur_channels_count, growth_rate, up_blocks[i],
                    upsample=True))
            prev_block_channels = growth_rate*up_blocks[i]
            cur_channels_count += prev_block_channels

        ## Final DenseBlock ##
        self.transUpBlocks.append(TransitionUp(
            prev_block_channels, prev_block_channels))
        cur_channels_count = prev_block_channels + skip_connection_channel_counts[-1]

        self.denseBlocksUp.append(DenseBlock(
            cur_channels_count, growth_rate, up_blocks[-1],
                upsample=False))
        cur_channels_count += growth_rate*up_blocks[-1]

        ## Softmax ##
        self.finalConv = nn.Conv2d(in_channels=cur_channels_count,
               out_channels=n_classes, kernel_size=1, stride=1,
                   padding=0, bias=True)
        # self.softmax = nn.LogSoftmax(dim=1) #註釋了這裏

    def forward(self, x):
        out = self.firstconv(x)

        skip_connections = []
        for i in range(len(self.down_blocks)):
            out = self.denseBlocksDown[i](out)
            skip_connections.append(out)
            out = self.transDownBlocks[i](out)

        out = self.bottleneck(out)
        for i in range(len(self.up_blocks)):
            skip = skip_connections.pop()
            out = self.transUpBlocks[i](out, skip)
            out = self.denseBlocksUp[i](out)

        out = self.finalConv(out)
        # out = self.softmax(out) #註釋了這裏
        out = torch.sigmoid(out)  #加了這裏
        return out

def FCDenseNet57(n_classes):
    return FCDenseNet(
        in_channels=3, down_blocks=(4, 4, 4, 4, 4),
        up_blocks=(4, 4, 4, 4, 4), bottleneck_layers=4,
        growth_rate=12, out_chans_first_conv=48, n_classes=n_classes)

def FCDenseNet67(n_classes):
    return FCDenseNet(
        in_channels=3, down_blocks=(5, 5, 5, 5, 5),
        up_blocks=(5, 5, 5, 5, 5), bottleneck_layers=5,
        growth_rate=16, out_chans_first_conv=48, n_classes=n_classes)

def FCDenseNet103(n_classes):
    return FCDenseNet(
        in_channels=3, down_blocks=(4,5,7,10,12),
        up_blocks=(12,10,7,5,4), bottleneck_layers=15,
        growth_rate=16, out_chans_first_conv=48, n_classes=n_classes)

預測文件predict.py

import os
import cv2
import torch
import torch.nn as nn
from models import tiramisu
from utils.data import ImageFolder
import utils.training as train_utils

batch_size = 1
test_root = 'D:/pytorch_tiramisu-master/data/Build256/test/'
imagelist = filter(lambda x: x.find('sat')!=-1, os.listdir(test_root))
testlist = map(lambda x: x[:-8], imagelist)
testlist = list(testlist)

test_dataset = ImageFolder(testlist, test_root)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False)

model = tiramisu.FCDenseNet67(n_classes=2).cuda()
model.load_state_dict(torch.load('./weights/latest.th')['state_dict'])
pre = train_utils.predict(model, test_loader, n_batches=1)

save_path = './results1'
for i in range(len(pre)):
    pre_path = os.path.join(save_path,testlist[i]+'_mask.png')
    pre_temp = pre[i].numpy()
    pre_temp[pre_temp>0] = 255
    # print(pre_temp.shape)
    cv2.imwrite(pre_path, pre_temp[0])

精度評定文件eval.py,這段代碼我貼好幾次了

# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
from sklearn.metrics import confusion_matrix

class IOUMetric:
    """
    Class to calculate mean-iou using fast_hist method
    """
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.hist = np.zeros((num_classes, num_classes))
    def _fast_hist(self, label_pred, label_true):
        mask = (label_true >= 0) & (label_true < self.num_classes)        
        hist = np.bincount(
            self.num_classes * label_true[mask].astype(int) +
            label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes)
        return hist

    def evaluate(self, predictions, gts):
        for lp, lt in zip(predictions, gts):
            assert len(lp.flatten()) == len(lt.flatten())
            self.hist += self._fast_hist(lp.flatten(), lt.flatten())    
        # miou
        iou = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist))
        miou = np.nanmean(iou) 
        # mean acc
        acc = np.diag(self.hist).sum() / self.hist.sum()
        acc_cls = np.nanmean(np.diag(self.hist) / self.hist.sum(axis=1))
        freq = self.hist.sum(axis=1) / self.hist.sum()
        fwavacc = (freq[freq > 0] * iou[freq > 0]).sum()
        return acc, acc_cls, iou, miou, fwavacc


if __name__ == '__main__':
    label_path = 'D:/pytorch_tiramisu-master/results/label/'
    predict_path = 'D:/pytorch_tiramisu-master/results/pre/'
    pres = os.listdir(predict_path)
    labels = []
    predicts = []
    for im in pres:
        if im[-4:] == '.png':
            label_name = im.split('.')[0] + '.png'
            lab_path = os.path.join(label_path, label_name)
            pre_path = os.path.join(predict_path, im)
            label = cv2.imread(lab_path,0)
            pre = cv2.imread(pre_path,0)
            label[label>0] = 1
            pre[pre>0] = 1
            labels.append(label)
            predicts.append(pre)
    el = IOUMetric(2)
    acc, acc_cls, iou, miou, fwavacc = el.evaluate(predicts, labels)
    print('acc: ',acc)
    print('acc_cls: ',acc_cls)
    print('iou: ',iou)
    print('miou: ',miou)
    print('fwavacc: ',fwavacc)

    pres = os.listdir(predict_path)
    init = np.zeros((2,2))
    for im in pres:
        lb_path = os.path.join(label_path, im)
        pre_path = os.path.join(predict_path, im)
        lb = cv2.imread(lb_path,0)
        pre = cv2.imread(pre_path,0)
        lb[lb>0] = 1
        pre[pre>0] = 1
        lb = lb.flatten()
        pre = pre.flatten()
        confuse = confusion_matrix(lb, pre)
        init += confuse

    precision = init[1][1]/(init[0][1] + init[1][1]) 
    recall = init[1][1]/(init[1][0] + init[1][1])
    accuracy = (init[0][0] + init[1][1])/init.sum()
    f1_score = 2*precision*recall/(precision + recall)
    print('class_accuracy: ', precision)
    print('class_recall: ', recall)
    print('accuracy: ', accuracy)
    print('f1_score: ', f1_score)

訓練測試相關的內容到此結束了,提示一下關於訓練的時候出現的Accuracy恆等於1,這個是因爲這個相關的代碼我沒改,這個後面有時間再改吧,不過不影響結果,也只不過是個可參考的輸出的

下面是後處理探索,我發現很多人都想用crf來個後處理,下面是crf.py,這個能跑通,但是沒有任何改善,我先貼出來,因爲很多人甚至沒有跑通,這個代碼有可以調整的參數,大家都可以試下看看怎麼能提高結果的評價精度。
pydensecrf需要安裝,安裝包鏈接:https://pan.baidu.com/s/19HjFxyqDd1PffukFGyPj9g
提取碼:ke6g 我的python是3.6的,其他版本訪問https://www.lfd.uci.edu/~gohlke/pythonlibs/#pydensecrf
參考鏈接:https://www.aiuai.cn/aifarm418.html
這個參考鏈接裏面提供的pytorch示例代碼裏已經說得很明白了,crf對結果並沒有什麼提升,注意這句話:“You’re using CRF. Are you sure? In our previous experiments, it has never improved the performance.”

# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
import pydensecrf.densecrf as dcrf

def unary_from_labels(labels, n_labels, gt_prob, zero_unsure=True):
    assert 0 < gt_prob < 1, "`gt_prob must be in (0,1)."

    labels = labels.flatten()

    n_energy = -np.log((1.0 - gt_prob) / (n_labels - 1))
    p_energy = -np.log(gt_prob)

    U = np.full((n_labels, len(labels)), n_energy, dtype='float32')
    U[labels - 1 if zero_unsure else labels, np.arange(U.shape[1])] = p_energy

    if zero_unsure:
        U[:, labels == 0] = -np.log(1.0 / n_labels)

    return U

def dense_crf(img, output_probs):
    h = output_probs.shape[0]
    w = output_probs.shape[1]

    output_probs = np.expand_dims(output_probs, 0)
    output_probs = np.append(1 - output_probs, output_probs, axis=0)

    d = dcrf.DenseCRF2D(w, h, 2)
    U = -np.log(output_probs)
    U = U.reshape((2, -1))
    U = np.ascontiguousarray(U)
    img = np.ascontiguousarray(img)

    U = U.astype(np.float32)
    d.setUnaryEnergy(U)

    d.addPairwiseGaussian(sxy=20, compat=3)
    d.addPairwiseBilateral(sxy=30, srgb=20, rgbim=img, compat=10)

    Q = d.inference(5)
    Q = np.argmax(np.array(Q), axis=0).reshape((h, w))

    return Q

if __name__ == "__main__":
    img_path = "./results/imgs/" #原圖路徑
    pre_path = "./results/pre/" #預測結果路徑
    out_path = "./results/crf/" #輸出路徑

    imgs = os.listdir(img_path)
    for im in imgs:
        name = im[:-7]
        pre_name = name + 'mask.png'
        im_full_path = os.path.join(img_path, im)
        pre_full_path = os.path.join(pre_path, pre_name)
        out_full_path = os.path.join(out_path, pre_name)

        img = cv2.imread(im_full_path)
        pre = cv2.imread(pre_full_path,0)
        pre[pre>0] = 1
        # print(pre.shape)
        crf_re = dense_crf(img, pre)
        cv2.imwrite(out_full_path, crf_re)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章