every blog every motto:
0. 前言
預定義estimator的使用
1. 代碼部分
1. 導入模塊
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl,np,pd,sklearn,tf,keras:
print(module.__name__,module.__version__)
2. 讀取數據
# 讀取數據
train_file = './data/train.csv'
eval_file = './data/eval.csv'
train_df = pd.read_csv(train_file)
eval_df = pd.read_csv(eval_file)
print(train_df.head())
print(eval_df.head())
y_train = train_df.pop('survived')
y_eval = eval_df.pop('survived')
print(train_df.head())
print(eval_df.head())
print(y_train.head())
print(y_eval.head())
3. feature_columns
# 離散特徵
categorical_colums = ['sex','n_siblings_spouses','parch','class','deck','embark_town','alone']
numerical_columns = ['age','fare']
feature_columns = []
# 離散
for categorical_column in categorical_colums:
vocab = train_df[categorical_column].unique() # 獲取所有可能的值
print(categorical_column,vocab)
# one-hot 編碼,
feature_columns.append(
tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list(categorical_column,vocab)))
# 連續
for categorical_column in numerical_columns:
feature_columns.append(tf.feature_column.numeric_column(categorical_column,dtype=tf.float32))
4. 構建dataset
# 構建dataset
def make_dataset(data_df,label_df,epochs=10,shuffle=True,batch_size = 32):
dataset = tf.data.Dataset.from_tensor_slices((dict(data_df),label_df))
if shuffle:
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(epochs).batch(batch_size)
return dataset
5. 保存輸出模型
5.1.1 baseline_estimator
# 保存輸出模型
output_dir = 'baseline_model'
if not os.path.exists(output_dir):
os.mkdir(output_dir)
baseline_estimator = tf.estimator.BaselineClassifier(model_dir = output_dir,n_classes=2)
baseline_estimator.train(input_fn = lambda:make_dataset(train_df,y_train,epochs=100))
5.2 evaluate
baseline_estimator.evaluate(input_fn = lambda : make_dataset(eval_df,y_eval,epochs=1,shuffle=False,batch_size=20))
5.2.1 linear
linear_ouput_dir = 'linear_model'
if not os.path.exists(linear_ouput_dir):
os.mkdir(linear_ouput_dir)
linear_estimator = tf.estimator.LinearClassifier(model_dir=linear_ouput_dir,n_classes=2,feature_columns=feature_columns)
linear_estimator.train(input_fn=lambda :make_dataset(train_df,y_train,epochs=100))
5.2.5 evaluate
linear_estimator.evaluate(input_fn=lambda :make_dataset(eval_df,y_eval,epochs=1,shuffle=False))
5.3.1 dnn_estimator
dnn_output_dir = './dnn_model'
if not os.path.exists(dnn_output_dir):
os.mkdir(dnn_output_dir)
dnn_estimator = tf.estimator.DNNClassifier(model_dir=dnn_output_dir,n_classes=2,feature_columns=feature_columns,
hidden_units=[128,128],activation_fn=tf.nn.relu,optimizer='Adam')
dnn_estimator.train(input_fn=lambda : make_dataset(train_df,y_train,epochs=100))
5.3.2 evaluate
dnn_estimator.evaluate(input_fn=lambda : make_dataset(eval_df,y_eval,epochs=1,shuffle=False))