TensorlFlow 2.0基本API

1. 常量 tf.constant()

定义

t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
# index
print(t)
print(t[:, 1:])

tf.Tensor(
[[1. 2. 3.]
[4. 5. 6.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[2. 3.]
[5. 6.]], shape=(2, 2), dtype=float32)

tf常量与转为numpy格式

print(t.numpy())
print(np.square(t))
np_t = np.array([[1., 2., 3.], [4., 5., 6.]])
print(tf.constant(np_t))

2. 变量 tf.Variable()

变量的定义

v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())

v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())

赋值

使用assign赋值

v.assign(2*v)
print(v.numpy())

[[ 2. 4. 6.]
[ 8. 10. 12.]]

3. 自定义损失函数

def customized_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu',
                       input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])
model.summary()
# 直接使用自定义的损失函数 customized_mse
model.compile(loss=customized_mse, optimizer="sgd",
              metrics=["mean_squared_error"])
callbacks = [keras.callbacks.EarlyStopping(
    patience=5, min_delta=1e-2)]

4. 自定义层

调用库中的层

layer = tf.keras.layers.Dense(100, input_shape=(None, 5))
layer(tf.zeros([10, 5]))

<tf.Tensor: id=29, shape=(10, 100), dtype=float32, numpy=array(…)>

我们知道在这个层中做了x * w + b的运算，其中x是输入矩阵，上面的代码中x就是tf.zeros([10, 5]). 矩阵运算，w的维度就是[5, 100]，b也是100.
直接查看这个层中的变量：

layer.trainable_variables

<tf.Variable ‘dense_1/kernel:0’ shape=(5, 100) dtype=float32, numpy=array(…)>

自定义激活函数及dense层

# tf.nn.softplus : log(1+e^x)
customized_softplus = keras.layers.Lambda(lambda x : tf.nn.softplus(x))

# customized dense layer.
class CustomizedDenseLayer(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        self.units = units
        self.activation = keras.layers.Activation(activation)
        super(CustomizedDenseLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        """构建所需要的参数"""
        # x * w + b. input_shape:[None, a] w:[a,b]output_shape: [None, b]
        self.kernel = self.add_weight(name = 'kernel',
                                      shape = (input_shape[1], self.units),
                                      initializer = 'uniform',
                                      trainable = True)
        self.bias = self.add_weight(name = 'bias',
                                    shape = (self.units, ),
                                    initializer = 'zeros',
                                    trainable = True)
        super(CustomizedDenseLayer, self).build(input_shape)
    
    def call(self, x):
        """完成正向计算"""
        return self.activation(x @ self.kernel + self.bias)

model = keras.models.Sequential([
    CustomizedDenseLayer(30, activation='relu',
                         input_shape=x_train.shape[1:]),
    CustomizedDenseLayer(1),
    customized_softplus,
    # keras.layers.Dense(1, activation="softplus"),
    # keras.layers.Dense(1), keras.layers.Activation('softplus'),
])
model.summary()
model.compile(loss="mean_squared_error", optimizer="sgd")
callbacks = [keras.callbacks.EarlyStopping(
    patience=5, min_delta=1e-2)]

6. tf.function()

能够将python定义的函数转为tensorflow形式的函数，能够提高执行效率。
例子：

# tf.function and auto-graph.
def scaled_elu(z, scale=1.0, alpha=1.0):
    # z >= 0 ? scale * z : scale * alpha * tf.nn.elu(z)
    is_positive = tf.greater_equal(z, 0.0)
    return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z))

print(scaled_elu(tf.constant(-3.)))
print(scaled_elu(tf.constant([-3., -2.5])))

tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)

第一种方式直接使用tf.function()将其转为tf函数

# 将其转为tf函数
scaled_elu_tf = tf.function(scaled_elu)
print(scaled_elu_tf(tf.constant(-3.)))
print(scaled_elu_tf(tf.constant([-3., -2.5])))

print(scaled_elu_tf.python_function is scaled_elu)

tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
True

第二种方式使用修饰器@tf.function将其转为tf函数

# 计算 1 + 1/2 + 1/2^2 + ... + 1/2^n

@tf.function
def converge_to_2(n_iters):
    total = tf.constant(0.)
    increment = tf.constant(1.)
    for _ in range(n_iters):
        total += increment
        increment /= 2.0
    return total

print(converge_to_2(20))

这种情况下要将变量定义在函数外面。

给tf.function()添加限定

这个例子中限定输入数据的类型为tf.int32。

@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name='x')])
def cube(z):
    return tf.pow(z, 3)

try:
    print(cube(tf.constant([1., 2., 3.])))
except ValueError as ex:
    print(ex)
    
print(cube(tf.constant([1, 2, 3])))

Python inputs incompatible with input_signature:
inputs: (
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))
input_signature: (
TensorSpec(shape=(None,), dtype=tf.int32, name=‘x’))
tf.Tensor([ 1 8 27], shape=(3,), dtype=int32)

7. 图结构

上述的例子中，将python的函数转为tf函数之后，能够获取其对应的图结构。
使用get_concrete_function()可以查看被转换函数中做了哪些操作，

cube_func_int32 = cube.get_concrete_function(
    tf.TensorSpec([None], tf.int32))
print(cube_func_int32)

<tensorflow.python.eager.function.ConcreteFunction object at 0x7f766a385cf8>

cube_func_int32.graph.get_operations()

[<tf.Operation ‘x’ type=Placeholder>,
<tf.Operation ‘Pow/y’ type=Const>,
<tf.Operation ‘Pow’ type=Pow>,
<tf.Operation ‘Identity’ type=Identity>]

cube_func_int32.graph.get_operation_by_name("x")

<tf.Operation ‘x’ type=Placeholder>

cube_func_int32.graph.get_tensor_by_name("x:0")

<tf.Tensor ‘x:0’ shape=(None,) dtype=int32>

查看图定义

cube_func_int32.graph.as_graph_def()

8. 自定义近似求导

自定义一元近似求导

def f(x):
    return 3. * x ** 2 + 2. * x - 1

def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)

print(approximate_derivative(f, 1.))

自定义二元求一阶偏导数

def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

# (8.999999999993236, 41.999999999994486)

使用tf.GradientTape()求导

def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)
    
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# tape调用一次就会关闭，传入参数persistent = True保存tape
with tf.GradientTape(persistent = True) as tape:
    z = g(x1, x2)
# 分开x1，x2的偏导
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
# tf.Tensor(9.0, shape=(), dtype=float32) 
# tf.Tensor(42.0, shape=(), dtype=float32)
# 直接一次求两偏导
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
# [<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]

# 删除tape
del tape

求两个函数中同一个变量的导数

x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)
# <tf.Tensor: id=140, shape=(), dtype=float32, numpy=13.0>

求二阶导数

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
               for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape
# [[None, <tf.Tensor: id=177, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=188, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=186, shape=(), dtype=float32, numpy=14.0>]]

结果是一个海森矩阵，2阶导数有四个。
求导更高阶导数可以不断嵌套 with tf.GradientTape(persistent=True) as tape即可。

模拟梯度下降

def f(x):
    return 3. * x ** 2 + 2. * x - 1
    
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>

模拟梯度下降使用不同的更新策略

def f(x):
    return 3. * x ** 2 + 2. * x - 1
    
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr = learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)

# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>

对于多元函数，使用tape.gradient()求出导数，然后修改pply_gradients([(dz_dx, x)])中的参数列表即可。

9. tf.keras中使用tf.GradientTape()求导进行训练

# 1. batch 遍历训练集 metric
#    1.1 自动求导
# 2. epoch结束 验证集 metric

epochs = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
# 使用MeanSquaredError计算预测值与真实值的平方误差
metric = keras.metrics.MeanSquaredError()
# 不严苛的取数据
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(0, len(x), size=batch_size)
    return x[idx], y[idx]
# 使用tf.keras建立模型
model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu',
                       input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])
# 迭代更新
for epoch in range(epochs):
	# 重设每个epoch的MeanSquaredError
    metric.reset_states()
    for step in range(steps_per_epoch):
    	# 取数据
        x_batch, y_batch = random_batch(x_train_scaled, y_train,
                                        batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            y_pred = tf.squeeze(y_pred, 1)
            # 需要求导的目标函数
            loss = keras.losses.mean_squared_error(y_batch, y_pred)
            metric(y_batch, y_pred)
        # 对所有变量求导
        grads = tape.gradient(loss, model.variables)
        # 如上一节所属，多个参数的导数要与参数意义对应，这里用zip将其打包
        grads_and_vars = zip(grads, model.variables)
        # 更新
        optimizer.apply_gradients(grads_and_vars)
        print("\rEpoch", epoch, " train mse:",
              metric.result().numpy(), end="")
    y_valid_pred = model(x_valid_scaled)
    y_valid_pred = tf.squeeze(y_valid_pred, 1)
    valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
    print("\t", "valid mse: ", valid_loss.numpy())