every blog every motto:
0. 前言
tf1.0 自定義estimator
1. 代碼部分
1. 導入模塊
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl,np,pd,sklearn,tf,keras:
print(module.__name__,module.__version__)
2. 讀取數據
# 讀取數據
train_file = './data/train.csv'
eval_file = './data/eval.csv'
train_df = pd.read_csv(train_file)
eval_df = pd.read_csv(eval_file)
print(train_df.head())
print(eval_df.head())
y_train = train_df.pop('survived')
y_eval = eval_df.pop('survived')
print(train_df.head())
print(eval_df.head())
print(y_train.head())
print(y_eval.head())
3. feature_columns
# 離散特徵
categorical_colums = ['sex','n_siblings_spouses','parch','class','deck','embark_town','alone']
numerical_columns = ['age','fare']
feature_columns = []
# 離散
for categorical_column in categorical_colums:
vocab = train_df[categorical_column].unique() # 獲取所有可能的值
print(categorical_column,vocab)
# one-hot 編碼,
feature_columns.append(
tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list(categorical_column,vocab)))
# 連續
for categorical_column in numerical_columns:
feature_columns.append(tf.feature_column.numeric_column(categorical_column,dtype=tf.float32))
4. 構建dataset
# 構建dataset
def make_dataset(data_df,label_df,epochs=10,shuffle=True,batch_size = 32):
dataset = tf.data.Dataset.from_tensor_slices((dict(data_df),label_df))
if shuffle:
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(epochs).batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
5. 自定義estimator
output_dir = "customized_estimator"
if not os.path.exists(output_dir):
os.mkdir(output_dir)
def model_fn(features,labels,mode,params):
# model runtime state : Train Eval,Predict
input_for_next_layer = tf.feature_column.input_layer(features,params['feature_columns'])
for n_unit in params['hidden_units']:
input_for_next_layer = tf.layers.dense(input_for_next_layer,units=n_unit,activation=tf.nn.relu)
logits = tf.layers.dense(input_for_next_layer,params['n_classes'],activation = None)
predicted_classes = tf.argmax(logits,1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
"classes_ids":predicted_classes[:tf.newaxis],
"probabilities":tf.nn.softmax(logits),
"logits":logits
}
return tf.estimator.EstimatorSpec(mode,predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,logits=logits)
accuracy = tf.metrics.accuracy(labels=labels,predictions=predicted_classes,name="acc_op")
metrics = {"accuracy":accuracy}
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode,loss=loss,eval_metric_ops=metrics)
optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimzer(loss,global_step=tf.train.get_global_step())
if mode == tf.estimator.ModeKeys.TRAINS:
return tf.estimator.EstimatorSpec(mode,loss=loss,train_op = train_op)
estimator = tf.estimator.Estimator(
model_fn = model_fn,
model_dir = output_dir,
params = {
"feature_columns":feature_columns,
"hidden_units":[100,100],
"n_classes":2
}
)
estimator.train(input_fn = lambda : make_dataset(train_df,y_train,epochs=100))