caffe python layer

caffe的大多數層是由c++寫成的,藉助於c++的高效性,網絡可以快速訓練。但是我們有時候需要自己寫點輸入層以應對各種不同的數據輸入,比如你因爲是需要在圖像中取塊而不想寫成LMDB,這時候可以考慮使用python直接寫一個層。而且輸入層不需要GPU加速,所需寫起來也比較容易。

python層怎麼用

先看一個網上的例子吧(來自http://chrischoy.github.io/research/caffe-python-layer/

layer {
  type: 'Python'
  name: 'loss'
  top: 'loss'
  bottom: 'ipx'
  bottom: 'ipy'
  python_param {
    # the module name -- usually the filename -- that needs to be in $PYTHONPATH
    module: 'pyloss'
    # the layer name -- the class name in the module
    layer: 'EuclideanLossLayer'
  }
  # set loss weight so Caffe knows this is a loss layer
  loss_weight: 1
}

這裏的type就只有Python一種,然後top,bottom和常見的層是一樣的,module就是你的python module名字,一般就是文件名,然後layer就是定義的類的名字。

python層怎麼寫

這裏就以 Fully Convolutional Networks for Semantic Segmentation 論文中公佈的代碼作爲示例,解釋python層該怎麼寫。

import caffe
import numpy as np
from PIL import Image
import random
class VOCSegDataLayer(caffe.Layer):
    """
    Load (input image, label image) pairs from PASCAL VOC
    one-at-a-time while reshaping the net to preserve dimensions.
    Use this to feed data to a fully convolutional network.
    """
    def setup(self, bottom, top):
        """
        Setup data layer according to parameters:
        - voc_dir: path to PASCAL VOC year dir
        - split: train / val / test
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)
        for PASCAL VOC semantic segmentation.
        example
        params = dict(voc_dir="/path/to/PASCAL/VOC2011",
            mean=(104.00698793, 116.66876762, 122.67891434),
            split="val")
        """
        # config
        params = eval(self.param_str)
        self.voc_dir = params['voc_dir']
        self.split = params['split']
        self.mean = np.array(params['mean'])
        self.random = params.get('randomize', True)
        self.seed = params.get('seed', None)
        # two tops: data and label
        if len(top) != 2:
            raise Exception("Need to define two tops: data and label.")
        # data layers have no bottoms
        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")
        # load indices for images and labels
        split_f  = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
                self.split)
        self.indices = open(split_f, 'r').read().splitlines()
        self.idx = 
        # make eval deterministic
        if 'train' not in self.split:
            self.random = False
        # randomization: seed and pick
        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.indices)-1)

    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.indices[self.idx])
        self.label = self.load_label(self.indices[self.idx])
        # reshape tops to fit (leading 1 is for batch dimension)
        top[].reshape(1, *self.data.shape)
        top[].reshape(1, *self.label.shape)

    def forward(self, bottom, top):
        # assign output
        top[].data[...] = self.data
        top[].data[...] = self.label
        # pick next input
        if self.random:
            self.idx = random.randint(, len(self.indices)-1)
        else:
            self.idx += 
            if self.idx == len(self.indices):
                self.idx = 

    def backward(self, top, propagate_down, bottom):
        pass

    def load_image(self, idx):
        """
        Load input image and preprocess for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:,:,::-1]
        in_ -= self.mean
        in_ = in_.transpose((2,0,1))
        return in_

    def load_label(self, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
        label = np.array(im, dtype=np.uint8)
        label = label[np.newaxis, ...]
        return label

class SBDDSegDataLayer(caffe.Layer):
    """
    Load (input image, label image) pairs from the SBDD extended labeling
    of PASCAL VOC for semantic segmentation
    one-at-a-time while reshaping the net to preserve dimensions.
    Use this to feed data to a fully convolutional network.
    """
    def setup(self, bottom, top):
        """
        Setup data layer according to parameters:
        - sbdd_dir: path to SBDD `dataset` dir
        - split: train / seg11valid
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)
        for SBDD semantic segmentation.
        N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
        Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
        example
        params = dict(sbdd_dir="/path/to/SBDD/dataset",
            mean=(104.00698793, 116.66876762, 122.67891434),
            split="valid")
        """
        # config
        params = eval(self.param_str)
        self.sbdd_dir = params['sbdd_dir']
        self.split = params['split']
        self.mean = np.array(params['mean'])
        self.random = params.get('randomize', True)
        self.seed = params.get('seed', None)
        # two tops: data and label
        if len(top) != 2:
            raise Exception("Need to define two tops: data and label.")
        # data layers have no bottoms
        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")
        # load indices for images and labels
        split_f  = '{}/{}.txt'.format(self.sbdd_dir,
                self.split)
        self.indices = open(split_f, 'r').read().splitlines()
        self.idx = 
        # make eval deterministic
        if 'train' not in self.split:
            self.random = False
        # randomization: seed and pick
        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.indices)-1)

    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.indices[self.idx])
        self.label = self.load_label(self.indices[self.idx])
        # reshape tops to fit (leading 1 is for batch dimension)
        top[].reshape(1, *self.data.shape)
        top[].reshape(1, *self.label.shape)

    def forward(self, bottom, top):
        # assign output
        top[].data[...] = self.data
        top[].data[...] = self.label
        # pick next input
        if self.random:
            self.idx = random.randint(, len(self.indices)-1)
        else:
            self.idx += 
            if self.idx == len(self.indices):
                self.idx = 

    def backward(self, top, propagate_down, bottom):
        pass

    def load_image(self, idx):
        """
        Load input image and preprocess for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:,:,::-1]
        in_ -= self.mean
        in_ = in_.transpose((2,0,1))
        return in_

    def load_label(self, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        import scipy.io
        mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
        label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
        label = label[np.newaxis, ...]
        return label

每個類都是層,類的名字就是layer參數的名字。這兩個都是數據輸入層,由於需要一個data,一個label,所以有兩個top,沒有bottomo。 
類直接繼承的是caffe.Layer,然後必須重寫setup(),reshape(),forward(),backward()函數,其他的函數可以自己定義,沒有限制。 
setup()是類啓動時該做的事情,比如層所需數據的初始化。 
reshape()就是取數據然後把它規範化爲四維的矩陣。每次取數據都會調用此函數。 
forward()就是網絡的前向運行,這裏就是把取到的數據往前傳遞,因爲沒有其他運算。 
backward()就是網絡的反饋,data層是沒有反饋的,所以這裏就直接pass。

PS

這裏就把一些資料整合起來,以供參考吧。 
1、caffe官網現在開始有了點pycaffe的資料,但是鑑於caffe經常更新,不知道什麼時候就把它刪除,所需摘錄到此。 
文件: pyloss.py

import caffe
import numpy as np

class EuclideanLossLayer(caffe.Layer):
    """
    Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer
    to demonstrate the class interface for developing layers in Python.
    """
    def setup(self, bottom, top):
        # check input pair
        if len(bottom) != 2:
            raise Exception("Need two inputs to compute distance.")
    def reshape(self, bottom, top):
        # check input dimensions match
        if bottom[0].count != bottom[1].count:
            raise Exception("Inputs must have the same dimension.")
        # difference is shape of inputs
        self.diff = np.zeros_like(bottom[].data, dtype=np.float32)
        # loss output is scalar
        top[].reshape(1)
    def forward(self, bottom, top):
        self.diff[...] = bottom[].data - bottom[1].data
        top[].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.
    def backward(self, top, propagate_down, bottom):
        for i in range(2):
            if not propagate_down[i]:
                continue
            if i == 0:
                sign = 
            else:
                sign = -
            bottom[i].diff[...] = sign * self.diff / bottom[i].num

下面這個就是如何使用這個層了: 
linreg.prototxt

name: 'LinearRegressionExample'
# define a simple network for linear regression on dummy data
# that computes the loss by a PythonLayer.
layer {
  type: 'DummyData'
  name: 'x'
  top: 'x'
  dummy_data_param {
    shape: { dim: 10 dim: 3 dim: 2 }
    data_filler: { type: 'gaussian' }
  }
}
layer {
  type: 'DummyData'
  name: 'y'
  top: 'y'
  dummy_data_param {
    shape: { dim: 10 dim: 3 dim: 2 }
    data_filler: { type: 'gaussian' }
  }
}
# include InnerProduct layers for parameters
# so the net will need backward
layer {
  type: 'InnerProduct'
  name: 'ipx'
  top: 'ipx'
  bottom: 'x'
  inner_product_param {
    num_output: 10
    weight_filler { type: 'xavier' }
  }
}
layer {
  type: 'InnerProduct'
  name: 'ipy'
  top: 'ipy'
  bottom: 'y'
  inner_product_param {
    num_output: 10
    weight_filler { type: 'xavier' }
  }
}
layer {
  type: 'Python'
  name: 'loss'
  top: 'loss'
  bottom: 'ipx'
  bottom: 'ipy'
  python_param {
    # the module name -- usually the filename -- that needs to be in $PYTHONPATH
    module: 'pyloss'
    # the layer name -- the class name in the module
    layer: 'EuclideanLossLayer'
  }
  # set loss weight so Caffe knows this is a loss layer.
  # since PythonLayer inherits directly from Layer, this isn't automatically
  # known to Caffe
  loss_weight: 1
}

pascal_multilabel_datalayers.py

# imports
import json
import time
import pickle
import scipy.misc
import skimage.io
import caffe
import numpy as np
import os.path as osp
from xml.dom import minidom
from random import shuffle
from threading import Thread
from PIL import Image
from tools import SimpleTransformer

class PascalMultilabelDataLayerSync(caffe.Layer):
    """
    This is a simple syncronous datalayer for training a multilabel model on
    PASCAL.
    """
    def setup(self, bottom, top):
        self.top_names = ['data', 'label']
        # === Read input parameters ===
        # params is a python dictionary with layer parameters.
        params = eval(self.param_str)
        # Check the paramameters for validity.
        check_params(params)
        # store input as class variables
        self.batch_size = params['batch_size']
        # Create a batch loader to load the images.
        self.batch_loader = BatchLoader(params, None)
        # === reshape tops ===
        # since we use a fixed input image size, we can shape the data layer
        # once. Else, we'd have to do it in the reshape call.
        top[].reshape(
            self.batch_size, , params['im_shape'][0], params['im_shape'][1])
        # Note the 20 channels (because PASCAL has 20 classes.)
        top[].reshape(self.batch_size, 20)
        print_info("PascalMultilabelDataLayerSync", params)
    def forward(self, bottom, top):
        """
        Load data.
        """
        for itt in range(self.batch_size):
            # Use the batch loader to load the next image.
            im, multilabel = self.batch_loader.load_next_image()
            # Add directly to the caffe data layer
            top[].data[itt, ...] = im
            top[].data[itt, ...] = multilabel
    def reshape(self, bottom, top):
        """
        There is no need to reshape the data, since the input is of fixed size
        (rows and columns)
        """
        pass
    def backward(self, top, propagate_down, bottom):
        """
        These layers does not back propagate
        """
        pass

class BatchLoader(object):
    """
    This class abstracts away the loading of images.
    Images can either be loaded singly, or in a batch. The latter is used for
    the asyncronous data layer to preload batches while other processing is
    performed.
    """
    def __init__(self, params, result):
        self.result = result
        self.batch_size = params['batch_size']
        self.pascal_root = params['pascal_root']
        self.im_shape = params['im_shape']
        # get list of image indexes.
        list_file = params['split'] + '.txt'
        self.indexlist = [line.rstrip('\n') for line in open(
            osp.join(self.pascal_root, 'ImageSets/Main', list_file))]
        self._cur =   # current image
        # this class does some simple data-manipulations
        self.transformer = SimpleTransformer()
        print "BatchLoader initialized with {} images".format(
            len(self.indexlist))
    def load_next_image(self):
        """
        Load the next image in a batch.
        """
        # Did we finish an epoch?
        if self._cur == len(self.indexlist):
            self._cur = 
            shuffle(self.indexlist)
        # Load an image
        index = self.indexlist[self._cur]  # Get the image index
        image_file_name = index + '.jpg'
        im = np.asarray(Image.open(
            osp.join(self.pascal_root, 'JPEGImages', image_file_name)))
        im = scipy.misc.imresize(im, self.im_shape)  # resize
        # do a simple horizontal flip as data augmentation
        flip = np.random.choice()*2-1
        im = im[:, ::flip, :]
        # Load and prepare ground truth
        multilabel = np.zeros().astype(np.float32)
        anns = load_pascal_annotation(index, self.pascal_root)
        for label in anns['gt_classes']:
            # in the multilabel problem we don't care how MANY instances
            # there are of each class. Only if they are present.
            # The "-1" is b/c we are not interested in the background
            # class.
            multilabel[label - ] = 1
        self._cur += 
        return self.transformer.preprocess(im), multilabel

def load_pascal_annotation(index, pascal_root):
    """
    This code is borrowed from Ross Girshick's FAST-RCNN code
    (https://github.com/rbgirshick/fast-rcnn).
    It parses the PASCAL .xml metadata files.
    See publication for further details: (http://arxiv.org/abs/1504.08083).
    Thanks Ross!
    """
    classes = ('__background__',  # always index 0
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')
    class_to_ind = dict(zip(classes, xrange()))
    filename = osp.join(pascal_root, 'Annotations', index + '.xml')
    # print 'Loading: {}'.format(filename)
    def get_data_from_tag(node, tag):
        return node.getElementsByTagName(tag)[0].childNodes[0].data
    with open(filename) as f:
        data = minidom.parseString(f.read())
    objs = data.getElementsByTagName('object')
    num_objs = len(objs)
    boxes = np.zeros((num_objs, 4), dtype=np.uint16)
    gt_classes = np.zeros((num_objs), dtype=np.int32)
    overlaps = np.zeros((num_objs, 21), dtype=np.float32)
    # Load object bounding boxes into a data frame.
    for ix, obj in enumerate(objs):
        # Make pixel indexes 0-based
        x1 = float(get_data_from_tag(obj, 'xmin')) - 1
        y1 = float(get_data_from_tag(obj, 'ymin')) - 1
        x2 = float(get_data_from_tag(obj, 'xmax')) - 1
        y2 = float(get_data_from_tag(obj, 'ymax')) - 1
        cls = class_to_ind[
            str(get_data_from_tag(obj, "name")).lower().strip()]
        boxes[ix, :] = [x1, y1, x2, y2]
        gt_classes[ix] = cls
        overlaps[ix, cls] = 1.0
    overlaps = scipy.sparse.csr_matrix(overlaps)
    return {'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'flipped': False,
            'index': index}

def check_params(params):
    """
    A utility function to check the parameters for the data layers.
    """
    assert 'split' in params.keys(
    ), 'Params must include split (train, val, or test).'
    required = ['batch_size', 'pascal_root', 'im_shape']
    for r in required:
        assert r in params.keys(), 'Params must include {}'.format(r)

def print_info(name, params):
    """
    Ouput some info regarding the class
    """
    print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(
        name,
        params['split'],
        params['batch_size'],
        params['im_shape'])

caffenet.py

from __future__ import print_function
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
# helper function for common structures
def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):
    conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
                                num_output=nout, pad=pad, group=group)
    return conv, L.ReLU(conv, in_place=True)
def fc_relu(bottom, nout):
    fc = L.InnerProduct(bottom, num_output=nout)
    return fc, L.ReLU(fc, in_place=True)
def max_pool(bottom, ks, stride=1):
    return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
def caffenet(lmdb, batch_size=256, include_acc=False):
    data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=,
        transform_param=dict(crop_size=, mean_value=[104, 117, 123], mirror=True))
    # the net itself
    conv1, relu1 = conv_relu(data, , 96, stride=4)
    pool1 = max_pool(relu1, , stride=2)
    norm1 = L.LRN(pool1, local_size=, alpha=1e-4, beta=0.75)
    conv2, relu2 = conv_relu(norm1, , 256, pad=2, group=2)
    pool2 = max_pool(relu2, , stride=2)
    norm2 = L.LRN(pool2, local_size=, alpha=1e-4, beta=0.75)
    conv3, relu3 = conv_relu(norm2, , 384, pad=1)
    conv4, relu4 = conv_relu(relu3, , 384, pad=1, group=2)
    conv5, relu5 = conv_relu(relu4, , 256, pad=1, group=2)
    pool5 = max_pool(relu5, , stride=2)
    fc6, relu6 = fc_relu(pool5, )
    drop6 = L.Dropout(relu6, in_place=True)
    fc7, relu7 = fc_relu(drop6, )
    drop7 = L.Dropout(relu7, in_place=True)
    fc8 = L.InnerProduct(drop7, num_output=)
    loss = L.SoftmaxWithLoss(fc8, label)
    if include_acc:
        acc = L.Accuracy(fc8, label)
        return to_proto(loss, acc)
    else:
        return to_proto(loss)
def make_net():
    with open('train.prototxt', 'w') as f:
        print(caffenet('/path/to/caffe-train-lmdb'), file=f)
    with open('test.prototxt', 'w') as f:
        print(caffenet('/path/to/caffe-val-lmdb', batch_size=50, include_acc=True), file=f)
if __name__ == '__main__':
    make_net()

tools.py

import numpy as np

class SimpleTransformer:
    """
    SimpleTransformer is a simple class for preprocessing and deprocessing
    images for caffe.
    """
    def __init__(self, mean=[128, 128, 128]):
        self.mean = np.array(mean, dtype=np.float32)
        self.scale = 1.0
    def set_mean(self, mean):
        """
        Set the mean to subtract for centering the data.
        """
        self.mean = mean
    def set_scale(self, scale):
        """
        Set the data scaling.
        """
        self.scale = scale
    def preprocess(self, im):
        """
        preprocess() emulate the pre-processing occuring in the vgg16 caffe
        prototxt.
        """
        im = np.float32(im)
        im = im[:, :, ::-1]  # change to BGR
        im -= self.mean
        im *= self.scale
        im = im.transpose((2, 0, 1))
        return im
    def deprocess(self, im):
        """
        inverse of preprocess()
        """
        im = im.transpose(, 2, 0)
        im /= self.scale
        im += self.mean
        im = im[:, :, ::-1]  # change to RGB
        return np.uint8(im)

class CaffeSolver:
    """
    Caffesolver is a class for creating a solver.prototxt file. It sets default
    values and can export a solver parameter file.
    Note that all parameters are stored as strings. Strings variables are
    stored as strings in strings.
    """
    def __init__(self, testnet_prototxt_path="testnet.prototxt",
                 trainnet_prototxt_path="trainnet.prototxt", debug=False):
        self.sp = {}
        # critical:
        self.sp['base_lr'] = '0.001'
        self.sp['momentum'] = '0.9'
        # speed:
        self.sp['test_iter'] = '100'
        self.sp['test_interval'] = '250'
        # looks:
        self.sp['display'] = '25'
        self.sp['snapshot'] = '2500'
        self.sp['snapshot_prefix'] = '"snapshot"'  # string withing a string!
        # learning rate policy
        self.sp['lr_policy'] = '"fixed"'
        # important, but rare:
        self.sp['gamma'] = '0.1'
        self.sp['weight_decay'] = '0.0005'
        self.sp['train_net'] = '"' + trainnet_prototxt_path + '"'
        self.sp['test_net'] = '"' + testnet_prototxt_path + '"'
        # pretty much never change these.
        self.sp['max_iter'] = '100000'
        self.sp['test_initialization'] = 'false'
        self.sp['average_loss'] = '25'  # this has to do with the display.
        self.sp['iter_size'] = '1'  # this is for accumulating gradients
        if (debug):
            self.sp['max_iter'] = '12'
            self.sp['test_iter'] = '1'
            self.sp['test_interval'] = '4'
            self.sp['display'] = '1'
    def add_from_file(self, filepath):
        """
        Reads a caffe solver prototxt file and updates the Caffesolver
        instance parameters.
        """
        with open(filepath, 'r') as f:
            for line in f:
                if line[0] == '#':
                    continue
                splitLine = line.split(':')
                self.sp[splitLine[].strip()] = splitLine[1].strip()
    def write(self, filepath):
        """
        Export solver parameters to INPUT "filepath". Sorted alphabetically.
        """
        f = open(filepath, 'w')
        for key, value in sorted(self.sp.items()):
            if not(type(value) is str):
                raise TypeError('All solver parameters must be strings')
            f.write('%s: %s\n' % (key, value))
發佈了17 篇原創文章 · 獲贊 50 · 訪問量 28萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章