FM(Factorization Machines)
傳統的LR算法是線性模型,想要提取非線性關係,要麼通過GBDT來提取非線性特徵,要麼手動構建非線性特徵。
FM直接顯示的構建交叉特徵,直接建模二階關係:
公式如下:
其中是二階關係的參數,其個數是,複雜度是
優化時間複雜度,矩陣分解提供了一種解決思路, 來代替上式。則:
其中,是第 i 維特徵的隱向量,⟨⋅,⋅⟩ 代表向量點積。隱向量的長度爲 k(k«n),包含 k 個描述特徵的因子。
直觀上看,FM的複雜度是,但通過數學化簡,可以做到,具體推導如下:
DeepFM
Paper [IJCAI 2017]:
DeepFM: A Factorization-Machine based Neural Network for CTR Prediction
網絡結構圖
整體網絡結構
FM部分:
Deep部分:
代碼實現(第一種)
數據處理
模型需要輸入兩個,input_idxs, input_values
- input_idxs是稀疏編碼,即每一個分類型的field下各個獨一無二的取值,每個連續型field都編碼爲一個定值。
- input_values是特徵取值,分類型field特徵取值變爲1,連續性field特徵取值不變
舉個例子:
需要注意的
- second_order_part裏面,分類型field和連續型field進行了交叉
- deep_part裏面,embedding之後和特徵值相乘,再接Dense
代碼
import tensorflow as tf
def dnn(params):
dnn_model = tf.keras.Sequential()
for size in params['dnn_hidden_units']:
dnn_model.add(tf.keras.layers.Dense(size, activation='relu', use_bias=False))
dnn_model.add(tf.keras.layers.Dense(1, activation=None, use_bias=False))
return dnn_model
class DeepFM(tf.keras.Model):
def __init__(self, params):
'''
:param params:
feature_size: 編碼id大小
factor_size:embedding維度大小,對應公式裏的k
field_size: 輸入變量個數,對應公式裏的f
'''
super(DeepFM, self).__init__()
self.params = params
self.embeddings_1 = tf.keras.layers.Embedding(params['feature_size'], 1)
self.embeddings_2 = tf.keras.layers.Embedding(params['feature_size'], params['factor_size'],
embeddings_regularizer=tf.keras.regularizers.l2(0.00001),
embeddings_initializer=tf.initializers.RandomNormal(
mean=0.0, stddev=0.0001, seed=1024)
)
self.deep_dnn = dnn(params)
self.dense_output = tf.keras.layers.Dense(params['class_num'], activation=params['last_activation'], )
def first_order_part(self, idxs, values):
'''
:return: (n, k)
'''
x = self.embeddings_1(idxs) # (n, f, 1)
x = tf.multiply(x, tf.expand_dims(values, axis=-1)) # (n, f, 1)
x = tf.reduce_sum(x, axis=1) # (n, 1)
return x
def second_order_part(self, idxs, values):
'''2ab = (a+b)^2- (a^2+b^2)
:return (n, k)
'''
x = self.embeddings_2(idxs) # (n, f, k)
x = tf.multiply(x, tf.expand_dims(values, axis=-1)) # (n, f, k)
sum_square = tf.square(tf.reduce_sum(x, axis=1)) # (n, k)
square_sum = tf.reduce_sum(tf.square(x), axis=1) # (n, k)
output = 0.5*(tf.subtract(sum_square, square_sum))
return tf.reduce_sum(output, axis=1, keepdims=True)
return output
def deep_part(self, idxs, values):
'''
:return: (n, 128)
'''
x = self.embeddings_2(idxs)
x = tf.multiply(x, tf.expand_dims(values, axis=-1)) # (n, f, k)
x = tf.reshape(x, (-1, self.params['field_size']*self.params['factor_size']))
x =self.deep_dnn(x)
return x
def call(self, idxs, values):
'''
:param idxs: (n, f)
:param values: (n, f)
:return:
'''
first_order_output = self.first_order_part(idxs, values)
second_order_output = self.second_order_part(idxs, values)
deep_output = self.deep_part(idxs, values)
combined_output = tf.concat([first_order_output, second_order_output, deep_output], axis=1)
output = self.dense_output(combined_output)
return output
if __name__=='__main__':
import numpy as np
params = {
'field_size':12,
'feature_size':5+3,
'factor_size':4,
'class_num': 1,
'last_activation': 'sigmoid',
'dnn_hidden_units': (128, 128)
}
print('Generate fake data...')
x_dense = np.random.random((1000, 5))
x_sparse = np.random.randint(0, 3, (1000, 7))
# 這裏x_idxs沒有做更高的處理
dense_idxs = np.zeros((x_dense.shape))
for i in range(dense_idxs.shape[1]):
dense_idxs[:, i] = i
x_idxs = np.concatenate([dense_idxs, x_sparse+5], axis=1, )
x_values = np.concatenate([x_dense, np.ones(x_sparse.shape)], axis=1)
x_idxs = tf.convert_to_tensor(x_idxs, dtype=tf.int64)
x_values = tf.convert_to_tensor(x_values, dtype=tf.float32)
y = np.random.randint(0, 2, (1000, 1))
model = DeepFM(params)
pred = model(x_idxs, x_values)
print(pred.shape)