笔记摘自《Google老师亲授 TensorFlow2.0 入门到进阶_课程》
1. 常量 tf.constant()
定义
t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
# index
print(t)
print(t[:, 1:])
tf.Tensor(
[[1. 2. 3.]
[4. 5. 6.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[2. 3.]
[5. 6.]], shape=(2, 2), dtype=float32)
tf常量与转为numpy格式
print(t.numpy())
print(np.square(t))
np_t = np.array([[1., 2., 3.], [4., 5., 6.]])
print(tf.constant(np_t))
2. 变量 tf.Variable()
变量的定义
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())
赋值
使用assign赋值
v.assign(2*v)
print(v.numpy())
[[ 2. 4. 6.]
[ 8. 10. 12.]]
3. 自定义损失函数
def customized_mse(y_true, y_pred):
return tf.reduce_mean(tf.square(y_pred - y_true))
model = keras.models.Sequential([
keras.layers.Dense(30, activation='relu',
input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
model.summary()
# 直接使用自定义的损失函数 customized_mse
model.compile(loss=customized_mse, optimizer="sgd",
metrics=["mean_squared_error"])
callbacks = [keras.callbacks.EarlyStopping(
patience=5, min_delta=1e-2)]
4. 自定义层
调用库中的层
layer = tf.keras.layers.Dense(100, input_shape=(None, 5))
layer(tf.zeros([10, 5]))
<tf.Tensor: id=29, shape=(10, 100), dtype=float32, numpy=array(…)>
我们知道在这个层中做了x * w + b的运算,其中x是输入矩阵,上面的代码中x就是tf.zeros([10, 5]). 矩阵运算,w的维度就是[5, 100],b也是100.
直接查看这个层中的变量:
layer.trainable_variables
<tf.Variable ‘dense_1/kernel:0’ shape=(5, 100) dtype=float32, numpy=array(…)>
自定义激活函数及dense层
# tf.nn.softplus : log(1+e^x)
customized_softplus = keras.layers.Lambda(lambda x : tf.nn.softplus(x))
# customized dense layer.
class CustomizedDenseLayer(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
self.units = units
self.activation = keras.layers.Activation(activation)
super(CustomizedDenseLayer, self).__init__(**kwargs)
def build(self, input_shape):
"""构建所需要的参数"""
# x * w + b. input_shape:[None, a] w:[a,b]output_shape: [None, b]
self.kernel = self.add_weight(name = 'kernel',
shape = (input_shape[1], self.units),
initializer = 'uniform',
trainable = True)
self.bias = self.add_weight(name = 'bias',
shape = (self.units, ),
initializer = 'zeros',
trainable = True)
super(CustomizedDenseLayer, self).build(input_shape)
def call(self, x):
"""完成正向计算"""
return self.activation(x @ self.kernel + self.bias)
model = keras.models.Sequential([
CustomizedDenseLayer(30, activation='relu',
input_shape=x_train.shape[1:]),
CustomizedDenseLayer(1),
customized_softplus,
# keras.layers.Dense(1, activation="softplus"),
# keras.layers.Dense(1), keras.layers.Activation('softplus'),
])
model.summary()
model.compile(loss="mean_squared_error", optimizer="sgd")
callbacks = [keras.callbacks.EarlyStopping(
patience=5, min_delta=1e-2)]
6. tf.function()
能够将python定义的函数转为tensorflow形式的函数,能够提高执行效率。
例子:
# tf.function and auto-graph.
def scaled_elu(z, scale=1.0, alpha=1.0):
# z >= 0 ? scale * z : scale * alpha * tf.nn.elu(z)
is_positive = tf.greater_equal(z, 0.0)
return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z))
print(scaled_elu(tf.constant(-3.)))
print(scaled_elu(tf.constant([-3., -2.5])))
tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
第一种方式直接使用tf.function()将其转为tf函数
# 将其转为tf函数
scaled_elu_tf = tf.function(scaled_elu)
print(scaled_elu_tf(tf.constant(-3.)))
print(scaled_elu_tf(tf.constant([-3., -2.5])))
print(scaled_elu_tf.python_function is scaled_elu)
tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
True
第二种方式使用修饰器@tf.function将其转为tf函数
# 计算 1 + 1/2 + 1/2^2 + ... + 1/2^n
@tf.function
def converge_to_2(n_iters):
total = tf.constant(0.)
increment = tf.constant(1.)
for _ in range(n_iters):
total += increment
increment /= 2.0
return total
print(converge_to_2(20))
这种情况下要将变量定义在函数外面。
给tf.function()添加限定
这个例子中限定输入数据的类型为tf.int32。
@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name='x')])
def cube(z):
return tf.pow(z, 3)
try:
print(cube(tf.constant([1., 2., 3.])))
except ValueError as ex:
print(ex)
print(cube(tf.constant([1, 2, 3])))
Python inputs incompatible with input_signature:
inputs: (
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))
input_signature: (
TensorSpec(shape=(None,), dtype=tf.int32, name=‘x’))
tf.Tensor([ 1 8 27], shape=(3,), dtype=int32)
7. 图结构
上述的例子中,将python的函数转为tf函数之后,能够获取其对应的图结构。
使用get_concrete_function()可以查看被转换函数中做了哪些操作,
cube_func_int32 = cube.get_concrete_function(
tf.TensorSpec([None], tf.int32))
print(cube_func_int32)
<tensorflow.python.eager.function.ConcreteFunction object at 0x7f766a385cf8>
cube_func_int32.graph.get_operations()
[<tf.Operation ‘x’ type=Placeholder>,
<tf.Operation ‘Pow/y’ type=Const>,
<tf.Operation ‘Pow’ type=Pow>,
<tf.Operation ‘Identity’ type=Identity>]
cube_func_int32.graph.get_operation_by_name("x")
<tf.Operation ‘x’ type=Placeholder>
cube_func_int32.graph.get_tensor_by_name("x:0")
<tf.Tensor ‘x:0’ shape=(None,) dtype=int32>
查看图定义
cube_func_int32.graph.as_graph_def()
8. 自定义近似求导
自定义一元近似求导
def f(x):
return 3. * x ** 2 + 2. * x - 1
def approximate_derivative(f, x, eps=1e-3):
return (f(x + eps) - f(x - eps)) / (2. * eps)
print(approximate_derivative(f, 1.))
自定义二元求一阶偏导数
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
return dg_x1, dg_x2
print(approximate_gradient(g, 2., 3.))
# (8.999999999993236, 41.999999999994486)
使用tf.GradientTape()求导
def g(x1, x2):
return (x1 + 5) * (x2 ** 2)
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# tape调用一次就会关闭,传入参数persistent = True保存tape
with tf.GradientTape(persistent = True) as tape:
z = g(x1, x2)
# 分开x1,x2的偏导
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
# tf.Tensor(9.0, shape=(), dtype=float32)
# tf.Tensor(42.0, shape=(), dtype=float32)
# 直接一次求两偏导
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
# [<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]
# 删除tape
del tape
求两个函数中同一个变量的导数
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
z1 = 3 * x
z2 = x ** 2
tape.gradient([z1, z2], x)
# <tf.Tensor: id=140, shape=(), dtype=float32, numpy=13.0>
求二阶导数
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
with tf.GradientTape(persistent=True) as inner_tape:
z = g(x1, x2)
inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape
# [[None, <tf.Tensor: id=177, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=188, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=186, shape=(), dtype=float32, numpy=14.0>]]
结果是一个海森矩阵,2阶导数有四个。
求导更高阶导数可以不断嵌套 with tf.GradientTape(persistent=True) as tape即可。
模拟梯度下降
def f(x):
return 3. * x ** 2 + 2. * x - 1
learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
with tf.GradientTape() as tape:
z = f(x)
dz_dx = tape.gradient(z, x)
x.assign_sub(learning_rate * dz_dx)
print(x)
# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
模拟梯度下降使用不同的更新策略
def f(x):
return 3. * x ** 2 + 2. * x - 1
learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
with tf.GradientTape() as tape:
z = f(x)
dz_dx = tape.gradient(z, x)
optimizer.apply_gradients([(dz_dx, x)])
print(x)
# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
对于多元函数,使用tape.gradient()求出导数,然后修改pply_gradients([(dz_dx, x)])中的参数列表即可。
9. tf.keras中使用tf.GradientTape()求导进行训练
# 1. batch 遍历训练集 metric
# 1.1 自动求导
# 2. epoch结束 验证集 metric
epochs = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
# 使用MeanSquaredError计算预测值与真实值的平方误差
metric = keras.metrics.MeanSquaredError()
# 不严苛的取数据
def random_batch(x, y, batch_size=32):
idx = np.random.randint(0, len(x), size=batch_size)
return x[idx], y[idx]
# 使用tf.keras建立模型
model = keras.models.Sequential([
keras.layers.Dense(30, activation='relu',
input_shape=x_train.shape[1:]),
keras.layers.Dense(1),
])
# 迭代更新
for epoch in range(epochs):
# 重设每个epoch的MeanSquaredError
metric.reset_states()
for step in range(steps_per_epoch):
# 取数据
x_batch, y_batch = random_batch(x_train_scaled, y_train,
batch_size)
with tf.GradientTape() as tape:
y_pred = model(x_batch)
y_pred = tf.squeeze(y_pred, 1)
# 需要求导的目标函数
loss = keras.losses.mean_squared_error(y_batch, y_pred)
metric(y_batch, y_pred)
# 对所有变量求导
grads = tape.gradient(loss, model.variables)
# 如上一节所属,多个参数的导数要与参数意义对应,这里用zip将其打包
grads_and_vars = zip(grads, model.variables)
# 更新
optimizer.apply_gradients(grads_and_vars)
print("\rEpoch", epoch, " train mse:",
metric.result().numpy(), end="")
y_valid_pred = model(x_valid_scaled)
y_valid_pred = tf.squeeze(y_valid_pred, 1)
valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
print("\t", "valid mse: ", valid_loss.numpy())