參考文章:
1、https://blog.csdn.net/roguesir/article/details/79763204
2、論文:https://arxiv.org/abs/1708.05123
3、https://www.jianshu.com/p/77719fc252fa
4、https://zhuanlan.zhihu.com/p/55234968 edition=yidianzixun&utm_source=yidianzixun&yidian_docid=0L8oiFUo
概述
上篇主要是採用Keras實現,在實現過程中無法進行模塊化,因此決定採用tensorflow2.0的keras api來實現。
具體的DCN模型介紹看上篇的介紹
2.代碼復現
該部分主要是按照網絡結構圖,用代碼的方式實現。在代碼實現的過程中,我們需要緊緊結合數學公式體會其中的含義以及如何用代碼來實現這些數學公式。
該模型主要分爲三部分:1 deep部分、2.cross layer 部分、3.合併部分。
我是基於數據集:https://www.kaggle.com/c/porto-seguro-safe-driver-prediction
1.合併部分
class DCN(tf.keras.Model):
def __init__(self,num_feat,num_field,dropout_deep,deep_layer_sizes,embedding_size=10):
super().__init__()
self.num_feat = num_feat # F =features nums
self.num_field = num_field # N =fields of a feature
self.dropout_deep = dropout_deep
# Embedding 這裏採用embeddings層因此大小爲F* M F爲特徵數量,M爲embedding的維度
feat_embeddings = tf.keras.layers.Embedding(num_feat, embedding_size, embeddings_initializer='uniform') # F * M
self.feat_embeddings = feat_embeddings
self.crosslayer = CrossLayer(output_dim=128, num_layer=8)
self.deep = Deep(dropout_deep,deep_layer_sizes)
self.fc = tf.keras.layers.Dense(1,activation='sigmoid',use_bias=True)
def call(self,feat_index,feat_value):
# embedding part feat_index = inputs爲輸入 feat_embeddings爲一個layer。
feat_embedding_0 = self.feat_embeddings(feat_index) # Batch * N * M
# print(feat_value.get_shape())
feat_embedding = tf.einsum('bnm,bn->bnm',feat_embedding_0,feat_value)
# print("feat_embedding:",feat_embedding.get_shape()) # 32 * 39 * 10
stack_input = tf.keras.layers.Reshape((1,-1))(feat_embedding)
# print("stack_input:",stack_input.get_shape()) # 32 * 1 * 390
x1 = self.crosslayer(stack_input)
x2 = self.deep(stack_input)
x3 = tf.keras.layers.concatenate([x1,x2],axis=-1)
output = self.fc(x3)
return output
2.Cross Layer
該部分主要是定義交叉層代碼,主要實現公式3, 在這裏需要注意是公式3的矩陣乘法是W,但是在我們實現代碼的時候需要先計算W,這樣的結果是得到度下降很多值,大大節省內存,降低計算量。
()W = (*W)
在這裏面採用tf.einsum函數來實現公式的乘法。
class CrossLayer(tf.keras.layers.Layer):
def __init__(self,output_dim,num_layer,**kwargs):
self.output_dim = output_dim
self.num_layer = num_layer
super(CrossLayer,self).__init__(**kwargs)
def build(self,input_shape):
self.input_dim = input_shape[2]
# print(self.input_dim)
self.W = []
self.bias = []
for i in range(self.num_layer):
self.W.append(self.add_weight(shape=[1,self.input_dim],initializer = 'glorot_uniform',name='w_{}'.format(i),trainable=True))
self.bias.append(self.add_weight(shape=[1,self.input_dim],initializer = 'zeros',name='b_{}'.format(i),trainable=True))
self.built = True
def call(self,input):
# 按照論文的公式
# x0 = tf.einsum('bij->bji',input) # output[j][i] = m[i][j]
# print("x0_shape",x0.get_shape())# (9, 390, 1)
# x1 = tf.einsum('bmn,bkm->bnk', input, x0)
# print("x1_shape", x1.get_shape()) # (9, 390, 390)
# print("self.W[0]_shape", self.W[0].get_shape())
# cross = tf.einsum('bmn,kn->bkm',x1,self.W[0]) + self.bias[0] + input
# print("cross0", cross.get_shape())# (9, 1, 390)
# for i in range(1,self.num_layer):
# x0 = tf.einsum('bij->bji',cross) # output[j][i] = m[i][j]
# x1 = tf.einsum('bmn,bkm->bnk',input,x0)
# cross = tf.einsum('bmn,kn->bkm',x1,self.W[i]) + self.bias[i] + cross
# 優化論文公式 改變結合律
x0 = tf.einsum('bij->bji',input) # output[j][i] = m[i][j]
x1 = tf.einsum('bmn,km->bnk', x0, self.W[0])
cross = tf.einsum('bkm,bnk->bnm',input,x1) + self.bias[0] + input
for i in range(1,self.num_layer):
x0 = tf.einsum('bij->bji',cross) # output[j][i] = m[i][j]
x1 = tf.einsum('bmn,km->bnk', x0, self.W[i])
cross = tf.einsum('bkm,bnk->bnm', cross,x1) + self.bias[i] + cross
return cross
3.Deep layer
在這部分主要是進行的常規的全連接層,在裏面採用setattr函數、getattr函數這兩個定義dense層。
輸入 input爲平鋪的[embeddings1,embeddings2,embeddings3,… dense1,dense2,dense3,…]。輸出爲128維的dense。
class Deep(tf.keras.layers.Layer):
def __init__(self,dropout_deep,deep_layer_sizes):
# input_dim = num_size + embed_size = input_size
super(Deep, self).__init__()
self.dropout_deep = dropout_deep
# fc layer
self.deep_layer_sizes = deep_layer_sizes
# 神經網絡方面的參數
for i in range(len(deep_layer_sizes)):
setattr(self, 'dense_' + str(i),tf.keras.layers.Dense(deep_layer_sizes[i]))
setattr(self, 'batchNorm_' + str(i),tf.keras.layers.BatchNormalization())
setattr(self, 'activation_' + str(i),tf.keras.layers.Activation('relu'))
setattr(self, 'dropout_' + str(i),tf.keras.layers.Dropout(dropout_deep[i]))
# last layer
self.fc = tf.keras.layers.Dense(128,activation=None,use_bias=True)
def call(self,input):
y_deep = getattr(self,'dense_' + str(0))(input)
y_deep = getattr(self,'batchNorm_' + str(0))(y_deep)
y_deep = getattr(self,'activation_' + str(0))(y_deep)
y_deep = getattr(self,'dropout_' + str(0))(y_deep)
for i in range(1,len(self.deep_layer_sizes)):
y_deep = getattr(self,'dense_' + str(i))(y_deep)
y_deep = getattr(self,'batchNorm_' + str(i))(y_deep)
y_deep = getattr(self,'activation_' + str(i))(y_deep)
y_deep = getattr(self,'dropout_' + str(i))(y_deep)
output = self.fc(y_deep)
return output
3.總結
你看,通過這樣一步步將公式與代碼對應起來,就好實現多了,對於不同的計算公式採用不同的函數需要多看文檔,這樣纔可以選用正確的api。
最後,如果需要獲取全部代碼,請看下我的github上倉庫:https://github.com/Snail110/recsys
這裏面是用tensorflow2.0框架來寫。如果覺得我實現的還不錯,記得給我一個星星哦。