【人工智能筆記】第十四節 Tensorflow 2.0 實現Disout，防止過擬合

原創

PPHT-H

2020-05-25 11:43

華爲諾亞實驗室開源Disout算法，tensorflow 2.0 的實現。

算法通過增加擾動，而不是屏蔽某些特徵，來解決過擬合問題。

論文：https://arxiv.org/abs/2002.11022

實現代碼：

import tensorflow as tf


class Disout(tf.keras.layers.Layer):
    '''
    disout
    論文：https://arxiv.org/abs/2002.11022
    '''

    def __init__(self, dist_prob, block_size=5, alpha=0.5, **kwargs):
        super(Disout, self).__init__(**kwargs)
        self.dist_prob = dist_prob

        self.alpha = alpha
        self.block_size = block_size

    def build(self, input_shape):
        pass

    @tf.function
    def call(self, x):
        '''x：(batch_size,h,w,c)'''
        if not self.trainable:
            return x
        else:
            if tf.math.equal(tf.rank(x),4):
                x_shape = tf.shape(x)
                x_size = x_shape[1:3]
                x_size_f = tf.cast(x_size, tf.float32)
                # 計算block_size
                x_block_size_f = tf.constant((self.block_size, self.block_size), tf.float32)
                x_block_size = tf.cast(x_block_size_f, tf.int32)
                # 根據dist_prob，計算block_num
                x_block_num = (x_size_f[0] * x_size_f[1]) * self.dist_prob / (x_block_size_f[0] * x_block_size_f[1])
                # 計算block在中心區域出現的概率
                x_block_rate = x_block_num / ((x_size_f[0] - x_block_size_f[0] + 1) * (x_size_f[1] - x_block_size_f[1] + 1))
                # 根據概率生成block區域
                x_block_center = tf.random.normal((x_shape[0], x_size[0] - x_block_size[0] + 1, x_size[1] - x_block_size[1] + 1, x_shape[3]), dtype=tf.float32)
                x_block_padding_t = x_block_size[0] // 2
                x_block_padding_b = x_size_f[0] - tf.cast(x_block_padding_t, tf.float32) - (x_size_f[0] - x_block_size_f[0] + 1.0)
                x_block_padding_b = tf.cast(x_block_padding_b, tf.int32)
                x_block_padding_l = x_block_size[1] // 2
                x_block_padding_r = x_size_f[1] - tf.cast(x_block_padding_l, tf.float32) - (x_size_f[1] - x_block_size_f[1] + 1.0)
                x_block_padding_r = tf.cast(x_block_padding_r, tf.int32)
                x_block_padding = tf.pad(x_block_center,[[0, 0],[x_block_padding_t, x_block_padding_b],[x_block_padding_l, x_block_padding_r],[0, 0]])
                x_block = tf.cast(x_block_padding<x_block_rate, tf.float32)
                x_block = tf.nn.max_pool2d(x_block, ksize=[self.block_size, self.block_size], strides=[1, 1], padding='SAME')
                # 疊加擾動
                x_max = tf.reduce_max(x, axis=(1,2), keepdims=True)
                x_min = tf.reduce_min(x, axis=(1,2), keepdims=True)
                x_block_random = tf.random.normal(x_shape, dtype=x.dtype) * (x_max - x_min) + x_min
                x_block_random = x_block_random * (1.0 - self.alpha) + x * self.alpha
                x = x * (1-x_block) + x_block_random * x_block
                return x
            else:
                return x

    def compute_output_shape(self, input_shape):
        '''計算輸出shape'''
        return input_shape

class Disout1D(tf.keras.layers.Layer):
    '''
    disout
    論文：https://arxiv.org/abs/2002.11022
    '''

    def __init__(self, dist_prob, block_size=5, alpha=0.5, **kwargs):
        super(Disout1D, self).__init__(**kwargs)
        self.dist_prob = dist_prob

        self.alpha = alpha
        self.block_size = block_size

    def build(self, input_shape):
        pass

    @tf.function
    def call(self, x):
        '''x：(batch_size,h,w,c)'''
        if not self.trainable:
            return x
        else:
            if tf.math.equal(tf.rank(x),2):
                x_shape = tf.shape(x)
                x_size = x_shape[1]
                x_size_f = tf.cast(x_size, tf.float32)
                # 計算block_size
                x_block_size_f = tf.constant(self.block_size, tf.float32)
                # x_block_size_f = x_size_f * self.block_size
                # x_block_size_f = tf.math.maximum(x_block_size_f, 1)
                x_block_size = tf.cast(x_block_size_f, tf.int32)
                # 根據dist_prob，計算block_num
                x_block_num = (x_size_f) * self.dist_prob / (x_block_size_f)
                # 計算block在中心區域出現的概率
                x_block_rate = x_block_num / ((x_size_f - x_block_size_f + 1))
                # 根據概率生成block區域
                x_block_center = tf.random.normal((x_shape[0], x_size - x_block_size + 1), dtype=tf.float32)
                x_block_padding_t = x_block_size // 2
                x_block_padding_b = x_size_f - tf.cast(x_block_padding_t, tf.float32) - (x_size_f - x_block_size_f + 1.0)
                x_block_padding_b = tf.cast(x_block_padding_b, tf.int32)
                x_block_padding = tf.pad(x_block_center,[[0, 0],[x_block_padding_t, x_block_padding_b]])
                x_block = tf.cast(x_block_padding<x_block_rate, tf.float32)
                x_block = tf.expand_dims(x_block, axis=-1)
                x_block = tf.nn.max_pool1d(x_block, ksize=[self.block_size], strides=[1], padding='SAME')
                x_block = tf.reshape(x_block, x_shape)
                # 疊加擾動
                x_max = tf.reduce_max(x, axis=1, keepdims=True)
                x_min = tf.reduce_min(x, axis=1, keepdims=True)
                x_block_random = tf.random.normal(x_shape, dtype=x.dtype) * (x_max - x_min) + x_min
                x_block_random = x_block_random * (1.0 - self.alpha) + x * self.alpha
                x = x * (1-x_block) + x_block_random * x_block
                return x
            else:
                return x

    def compute_output_shape(self, input_shape):
        '''計算輸出shape'''
        return input_shape

源碼：https://github.com/tfwcn/tensorflow2-disout

【人工智能筆記】第十四節 Tensorflow 2.0 實現Disout，防止過擬合

相關資料：

【數學筆記】第二節三角函數

【數學筆記】第三節函數

【Linux筆記】第一節：CentOS7 自定義啓動服務

【人工智能筆記】第九節：Windows下編譯python OpenCV 4.2.0，SIFT特徵提取，及透視變換糾正物體方向

【後端筆記】第二節：Nginx 安裝SSL模塊，配置Https

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結