一、wide_deep模型
- Wide部分的輸入特徵:
離散特徵
離散特徵之間做組合
不輸入有連續值特徵的,至少在W&D的paper裏面是這樣使用的。
- Deep部分的輸入特徵:
raw input+embeding處理
對非連續值之外的特徵做embedding處理,這裏都是策略特徵,就是乘以個embedding-matrix。在
注:訓練:notice: Wide部分用FTRL來訓練;Deep部分用AdaGrad來訓練。
代碼如下:
此處生成WDL文件
import tensorflow as tf
# - 1、構建TFRecords的輸入數據
# - 2、使用模型進行特徵列指定
# - 3、模型訓練以及預估
FEATURE_COLUMN = ['channel_id', 'vector', 'user_weights', 'article_weights']
class WDL(object):
"""wide&deep訓練排序模型
"""
def __init__(self):
pass
@staticmethod
def get_tfrecords_data():
def parse_example_function(exmaple):
"""解析每個樣本的example
:param exmaple:
:return:
"""
# 定義解析格式,parse_single_example
features = {
'label': tf.FixedLenFeature([], tf.int64),
'feature': tf.FixedLenFeature([], tf.string)
}
label_feature = tf.parse_single_example(exmaple, features)
# 修改其中的特徵類型和形狀
# 解碼 [121]
# feature = tf.reshape(tf.decode_raw(label_feature['feature'], tf.float32), [1, 121])
f = tf.decode_raw(label_feature['feature'], tf.float64)
feature = tf.reshape(tf.cast(f, tf.float32), [1, 121])
# 計算其中向量、用戶權重、文章權重的平均值
channel_id = tf.cast(tf.slice(feature, [0, 0], [1, 1]), tf.int32)
vector = tf.reduce_sum(tf.slice(feature, [0, 1], [1, 100]), axis=1)
user_weights = tf.reduce_sum(tf.slice(feature, [0, 101], [1, 10]), axis=1)
article_weights = tf.reduce_sum(tf.slice(feature, [0, 111], [1, 10]), axis=1)
# 4個特徵值進行名稱構造字典
data = [channel_id, vector, user_weights, article_weights]
feature_dict = dict(zip(FEATURE_COLUMN, data))
label = tf.cast(label_feature['label'], tf.int32)
return feature_dict, label
# Tfrecord dataset讀取數據
dataset = tf.data.TFRecordDataset(['datas/train_ctr_20190605.tfrecords'])
# map 解析
dataset = dataset.map(parse_example_function)
dataset = dataset.batch(64)
dataset = dataset.repeat(10)
return dataset
def train_eval(self):
"""
進行訓練pnggu
:return:
"""
# 指定wide和deep兩邊的feature_column
# wide, channel_id如果就是一個類別具體的數字
# num_buckets必須填寫
channel_id = tf.feature_column.categorical_column_with_identity('channel_id', num_buckets=25)
wide_columns = [channel_id]
# deep ID必須embedding結果,數值型列
# tf.feature_column.embedding_column()或則input_layer
vector = tf.feature_column.numeric_column('vector')
user_weights = tf.feature_column.numeric_column('user_weights')
article_weights = tf.feature_column.numeric_column('article_weights')
deep_columns = [tf.feature_column.embedding_column(channel_id, dimension=25),
vector, user_weights, article_weights]
# 模型輸入訓練
model = tf.estimator.DNNLinearCombinedClassifier(model_dir="./ckpt/wide_and_deep/",
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[1024, 512, 256])
model.train(WDL.get_tfrecords_data, steps=1)
result = model.evaluate(WDL.get_tfrecords_data)
# {'accuracy': 0.9046435, 'accuracy_baseline': 0.9046434, 'auc': 0.57673496, 'auc_precision_recall': 0.12006451, 'average_loss': 0.38107494, 'label/mean': 0.095356554, 'loss': 24.18823, 'precision': 0.0, 'prediction/mean': 0.2390636, 'recall': 0.0, 'global_step': 1}
print(result)
# 模型導入
columns = wide_columns + deep_columns
feature_spec = tf.feature_column.make_parse_example_spec(columns)
serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
model.export_savedmodel("./serving_model/wdl/", serving_input_receiver_fn)
if __name__ == '__main__':
wdl = WDL()
# print(lw.get_tfrecords_data())
wdl.train_eval()
二、基於TF Serving的模型服務
TensorFlow Serving是一種靈活的高性能服務系統,適用於機器學習模型,專爲生產環境而設計。TensorFlow Serving可以輕鬆部署新算法和實驗,同時保持相同的服務器架構和API。TensorFlow Serving提供與TensorFlow模型的開箱即用集成,但可以輕鬆擴展以提供其他類型的模型和數據。
TensorFlow Serving部署
- 1、獲取最新TF Serving docker鏡像
docker pull tensorflow/serving
- 2、查看docker鏡像
docker images
- 3、運行tf serving(即創建一個docker容器來運行)
docker run -p 8501:8501 -p 8500:8500 --mount type=bind,source=/home/ubuntu/detectedmodel/commodity,target=/models/commodity -e MODEL_NAME=commodity -t tensorflow/serving
說明:
-p 8501:8501 爲端口映射,-p 主機端口:docker容器程序(tf serving)使用端口,訪問主機8501端口就相當於訪問了tf serving程序的8501端口
tf serving 使用8501端口對外提供HTTP服務,使用8500對外提供gRPC服務,這裏同時開放了兩個端口的使用
–mount type=bind,source=/home/ubuntu/detectedmodel/commodity,target=/models/commodity 爲文件映射,將主機(source)的模型文件映射到docker容器程序(target)的位置,以便tf serving使用模型,target參數爲/models/我的模型
-e MODEL_NAME=commodity設置了一個環境變量,名爲MODEL_NAME,此變量被tf serving讀取,用來按名字尋找模型,與上面target參數中我的模型對應
-t 爲tf serving創建一個僞終端,供程序運行
tensorflow/serving爲鏡像名
wdl模型服務運行
- 1、運行命令
docker run -p 8501:8501 -p 8500:8500 --mount type=bind,source=/root/toutiao_project/reco_sys/server/models/serving_model/wdl,target=/models/wdl -e MODEL_NAME=wdl -t tensorflow/serving
- 2、查看是否運行
itcast:~$ docker ps
三、在線預測
import tensorflow as tf
from grpc.beta import implementations
from tensorflow_serving.apis import prediction_service_pb2_grpc
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import classification_pb2
import os
import sys
import grpc
from server.utils import HBaseUtils
from server import pool
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
print(BASE_DIR)
sys.path.insert(0, os.path.join(BASE_DIR))
def wdl_sort_service():
"""
wide&deep進行排序預測
:param reco_set:
:param temp:
:param hbu:
:return:
"""
hbu = HBaseUtils(pool)
# 排序
# 1、讀取用戶特徵中心特徵
try:
user_feature = eval(hbu.get_table_row('ctr_feature_user',
'{}'.format(1115629498121846784).encode(),
'channel:{}'.format(18).encode()))
# logger.info("{} INFO get user user_id:{} channel:{} profile data".format(
# datetime.now().strftime('%Y-%m-%d %H:%M:%S'), temp.user_id, temp.channel_id))
except Exception as e:
user_feature = []
if user_feature:
# 2、讀取文章特徵中心特徵
result = []
# examples
examples = []
for article_id in [17749, 17748, 44371, 44368]:
try:
article_feature = eval(hbu.get_table_row('ctr_feature_article',
'{}'.format(article_id).encode(),
'article:{}'.format(article_id).encode()))
except Exception as e:
article_feature = [0.0] * 111
channel_id = int(article_feature[0])
# 求出後面若干向量的平均值
vector = np.mean(article_feature[11:])
# 第三個用戶權重特徵
user_feature = np.mean(user_feature)
# 第四個文章權重特徵
article_feature = np.mean(article_feature[1:11])
# 組建example
example = tf.train.Example(features=tf.train.Features(feature={
"channel_id": tf.train.Feature(int64_list=tf.train.Int64List(value=[channel_id])),
"vector": tf.train.Feature(float_list=tf.train.FloatList(value=[vector])),
'user_weigths': tf.train.Feature(float_list=tf.train.FloatList(value=[user_feature])),
'article_weights': tf.train.Feature(float_list=tf.train.FloatList(value=[article_feature])),
}))
examples.append(example)
with grpc.insecure_channel('127.0.0.1:8500') as channel:
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
# 獲取測試數據集,並轉換成 Example 實例
# 準備 RPC 請求,指定模型名稱。
request = classification_pb2.ClassificationRequest()
request.model_spec.name = 'wdl'
request.input.example_list.examples.extend(examples)
# 獲取結果
response = stub.Classify(request, 10.0)
print(response)
return None
if __name__ == '__main__':
wdl_sort_service()