筆記摘自《Google老師親授 TensorFlow2.0 入門到進階_課程》
1. 常量 tf.constant()
定義
t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
# index
print(t)
print(t[:, 1:])
tf.Tensor(
[[1. 2. 3.]
[4. 5. 6.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[2. 3.]
[5. 6.]], shape=(2, 2), dtype=float32)
tf常量與轉爲numpy格式
print(t.numpy())
print(np.square(t))
np_t = np.array([[1., 2., 3.], [4., 5., 6.]])
print(tf.constant(np_t))
2. 變量 tf.Variable()
變量的定義
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())
賦值
使用assign賦值
v.assign(2*v)
print(v.numpy())
[[ 2. 4. 6.]
[ 8. 10. 12.]]
3. 自定義損失函數
def customized_mse(y_true, y_pred):
return tf.reduce_mean(tf.square(y_pred - y_true))
model = keras.models.Sequential([
keras.layers.Dense(30, activation='relu',
input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
model.summary()
# 直接使用自定義的損失函數 customized_mse
model.compile(loss=customized_mse, optimizer="sgd",
metrics=["mean_squared_error"])
callbacks = [keras.callbacks.EarlyStopping(
patience=5, min_delta=1e-2)]
4. 自定義層
調用庫中的層
layer = tf.keras.layers.Dense(100, input_shape=(None, 5))
layer(tf.zeros([10, 5]))
<tf.Tensor: id=29, shape=(10, 100), dtype=float32, numpy=array(…)>
我們知道在這個層中做了x * w + b的運算,其中x是輸入矩陣,上面的代碼中x就是tf.zeros([10, 5]). 矩陣運算,w的維度就是[5, 100],b也是100.
直接查看這個層中的變量:
layer.trainable_variables
<tf.Variable ‘dense_1/kernel:0’ shape=(5, 100) dtype=float32, numpy=array(…)>
自定義激活函數及dense層
# tf.nn.softplus : log(1+e^x)
customized_softplus = keras.layers.Lambda(lambda x : tf.nn.softplus(x))
# customized dense layer.
class CustomizedDenseLayer(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
self.units = units
self.activation = keras.layers.Activation(activation)
super(CustomizedDenseLayer, self).__init__(**kwargs)
def build(self, input_shape):
"""構建所需要的參數"""
# x * w + b. input_shape:[None, a] w:[a,b]output_shape: [None, b]
self.kernel = self.add_weight(name = 'kernel',
shape = (input_shape[1], self.units),
initializer = 'uniform',
trainable = True)
self.bias = self.add_weight(name = 'bias',
shape = (self.units, ),
initializer = 'zeros',
trainable = True)
super(CustomizedDenseLayer, self).build(input_shape)
def call(self, x):
"""完成正向計算"""
return self.activation(x @ self.kernel + self.bias)
model = keras.models.Sequential([
CustomizedDenseLayer(30, activation='relu',
input_shape=x_train.shape[1:]),
CustomizedDenseLayer(1),
customized_softplus,
# keras.layers.Dense(1, activation="softplus"),
# keras.layers.Dense(1), keras.layers.Activation('softplus'),
])
model.summary()
model.compile(loss="mean_squared_error", optimizer="sgd")
callbacks = [keras.callbacks.EarlyStopping(
patience=5, min_delta=1e-2)]
6. tf.function()
能夠將python定義的函數轉爲tensorflow形式的函數,能夠提高執行效率。
例子:
# tf.function and auto-graph.
def scaled_elu(z, scale=1.0, alpha=1.0):
# z >= 0 ? scale * z : scale * alpha * tf.nn.elu(z)
is_positive = tf.greater_equal(z, 0.0)
return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z))
print(scaled_elu(tf.constant(-3.)))
print(scaled_elu(tf.constant([-3., -2.5])))
tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
第一種方式直接使用tf.function()將其轉爲tf函數
# 將其轉爲tf函數
scaled_elu_tf = tf.function(scaled_elu)
print(scaled_elu_tf(tf.constant(-3.)))
print(scaled_elu_tf(tf.constant([-3., -2.5])))
print(scaled_elu_tf.python_function is scaled_elu)
tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
True
第二種方式使用修飾器@tf.function將其轉爲tf函數
# 計算 1 + 1/2 + 1/2^2 + ... + 1/2^n
@tf.function
def converge_to_2(n_iters):
total = tf.constant(0.)
increment = tf.constant(1.)
for _ in range(n_iters):
total += increment
increment /= 2.0
return total
print(converge_to_2(20))
這種情況下要將變量定義在函數外面。
給tf.function()添加限定
這個例子中限定輸入數據的類型爲tf.int32。
@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name='x')])
def cube(z):
return tf.pow(z, 3)
try:
print(cube(tf.constant([1., 2., 3.])))
except ValueError as ex:
print(ex)
print(cube(tf.constant([1, 2, 3])))
Python inputs incompatible with input_signature:
inputs: (
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))
input_signature: (
TensorSpec(shape=(None,), dtype=tf.int32, name=‘x’))
tf.Tensor([ 1 8 27], shape=(3,), dtype=int32)
7. 圖結構
上述的例子中,將python的函數轉爲tf函數之後,能夠獲取其對應的圖結構。
使用get_concrete_function()可以查看被轉換函數中做了哪些操作,
cube_func_int32 = cube.get_concrete_function(
tf.TensorSpec([None], tf.int32))
print(cube_func_int32)
<tensorflow.python.eager.function.ConcreteFunction object at 0x7f766a385cf8>
cube_func_int32.graph.get_operations()
[<tf.Operation ‘x’ type=Placeholder>,
<tf.Operation ‘Pow/y’ type=Const>,
<tf.Operation ‘Pow’ type=Pow>,
<tf.Operation ‘Identity’ type=Identity>]
cube_func_int32.graph.get_operation_by_name("x")
<tf.Operation ‘x’ type=Placeholder>
cube_func_int32.graph.get_tensor_by_name("x:0")
<tf.Tensor ‘x:0’ shape=(None,) dtype=int32>
查看圖定義
cube_func_int32.graph.as_graph_def()
8. 自定義近似求導
自定義一元近似求導
def f(x):
return 3. * x ** 2 + 2. * x - 1
def approximate_derivative(f, x, eps=1e-3):
return (f(x + eps) - f(x - eps)) / (2. * eps)
print(approximate_derivative(f, 1.))
自定義二元求一階偏導數
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
return dg_x1, dg_x2
print(approximate_gradient(g, 2., 3.))
# (8.999999999993236, 41.999999999994486)
使用tf.GradientTape()求導
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# tape調用一次就會關閉,傳入參數persistent = True保存tape
with tf.GradientTape(persistent = True) as tape:
z = g(x1, x2)
# 分開x1,x2的偏導
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
# tf.Tensor(9.0, shape=(), dtype=float32)
# tf.Tensor(42.0, shape=(), dtype=float32)
# 直接一次求兩偏導
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
# [<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]
# 刪除tape
del tape
求兩個函數中同一個變量的導數
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
z1 = 3 * x
z2 = x ** 2
tape.gradient([z1, z2], x)
# <tf.Tensor: id=140, shape=(), dtype=float32, numpy=13.0>
求二階導數
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
with tf.GradientTape(persistent=True) as inner_tape:
z = g(x1, x2)
inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape
# [[None, <tf.Tensor: id=177, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=188, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=186, shape=(), dtype=float32, numpy=14.0>]]
結果是一個海森矩陣,2階導數有四個。
求導更高階導數可以不斷嵌套 with tf.GradientTape(persistent=True) as tape即可。
模擬梯度下降
def f(x):
return 3. * x ** 2 + 2. * x - 1
learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
with tf.GradientTape() as tape:
z = f(x)
dz_dx = tape.gradient(z, x)
x.assign_sub(learning_rate * dz_dx)
print(x)
# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
模擬梯度下降使用不同的更新策略
def f(x):
return 3. * x ** 2 + 2. * x - 1
learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
with tf.GradientTape() as tape:
z = f(x)
dz_dx = tape.gradient(z, x)
optimizer.apply_gradients([(dz_dx, x)])
print(x)
# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
對於多元函數,使用tape.gradient()求出導數,然後修改pply_gradients([(dz_dx, x)])中的參數列表即可。
9. tf.keras中使用tf.GradientTape()求導進行訓練
# 1. batch 遍歷訓練集 metric
# 1.1 自動求導
# 2. epoch結束 驗證集 metric
epochs = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
# 使用MeanSquaredError計算預測值與真實值的平方誤差
metric = keras.metrics.MeanSquaredError()
# 不嚴苛的取數據
def random_batch(x, y, batch_size=32):
idx = np.random.randint(0, len(x), size=batch_size)
return x[idx], y[idx]
# 使用tf.keras建立模型
model = keras.models.Sequential([
keras.layers.Dense(30, activation='relu',
input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
# 迭代更新
for epoch in range(epochs):
# 重設每個epoch的MeanSquaredError
metric.reset_states()
for step in range(steps_per_epoch):
# 取數據
x_batch, y_batch = random_batch(x_train_scaled, y_train,
batch_size)
with tf.GradientTape() as tape:
y_pred = model(x_batch)
y_pred = tf.squeeze(y_pred, 1)
# 需要求導的目標函數
loss = keras.losses.mean_squared_error(y_batch, y_pred)
metric(y_batch, y_pred)
# 對所有變量求導
grads = tape.gradient(loss, model.variables)
# 如上一節所屬,多個參數的導數要與參數意義對應,這裏用zip將其打包
grads_and_vars = zip(grads, model.variables)
# 更新
optimizer.apply_gradients(grads_and_vars)
print("\rEpoch", epoch, " train mse:",
metric.result().numpy(), end="")
y_valid_pred = model(x_valid_scaled)
y_valid_pred = tf.squeeze(y_valid_pred, 1)
valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
print("\t", "valid mse: ", valid_loss.numpy())