TensorlFlow 2.0基本API

筆記摘自《Google老師親授 TensorFlow2.0 入門到進階_課程》

1. 常量 tf.constant()

定義

t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
# index
print(t)
print(t[:, 1:])

tf.Tensor(
[[1. 2. 3.]
[4. 5. 6.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[2. 3.]
[5. 6.]], shape=(2, 2), dtype=float32)

tf常量與轉爲numpy格式

print(t.numpy())
print(np.square(t))
np_t = np.array([[1., 2., 3.], [4., 5., 6.]])
print(tf.constant(np_t))

2. 變量 tf.Variable()

變量的定義

v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())

v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
print(v)
print(v.value())
print(v.numpy())

賦值

使用assign賦值

v.assign(2*v)
print(v.numpy())

[[ 2. 4. 6.]
[ 8. 10. 12.]]

3. 自定義損失函數

def customized_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu',
                       input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])
model.summary()
# 直接使用自定義的損失函數 customized_mse
model.compile(loss=customized_mse, optimizer="sgd",
              metrics=["mean_squared_error"])
callbacks = [keras.callbacks.EarlyStopping(
    patience=5, min_delta=1e-2)]

4. 自定義層

調用庫中的層

layer = tf.keras.layers.Dense(100, input_shape=(None, 5))
layer(tf.zeros([10, 5]))

<tf.Tensor: id=29, shape=(10, 100), dtype=float32, numpy=array(…)>

我們知道在這個層中做了x * w + b的運算,其中x是輸入矩陣,上面的代碼中x就是tf.zeros([10, 5]). 矩陣運算,w的維度就是[5, 100],b也是100.
直接查看這個層中的變量:

layer.trainable_variables

<tf.Variable ‘dense_1/kernel:0’ shape=(5, 100) dtype=float32, numpy=array(…)>

自定義激活函數及dense層

# tf.nn.softplus : log(1+e^x)
customized_softplus = keras.layers.Lambda(lambda x : tf.nn.softplus(x))

# customized dense layer.
class CustomizedDenseLayer(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        self.units = units
        self.activation = keras.layers.Activation(activation)
        super(CustomizedDenseLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        """構建所需要的參數"""
        # x * w + b. input_shape:[None, a] w:[a,b]output_shape: [None, b]
        self.kernel = self.add_weight(name = 'kernel',
                                      shape = (input_shape[1], self.units),
                                      initializer = 'uniform',
                                      trainable = True)
        self.bias = self.add_weight(name = 'bias',
                                    shape = (self.units, ),
                                    initializer = 'zeros',
                                    trainable = True)
        super(CustomizedDenseLayer, self).build(input_shape)
    
    def call(self, x):
        """完成正向計算"""
        return self.activation(x @ self.kernel + self.bias)

model = keras.models.Sequential([
    CustomizedDenseLayer(30, activation='relu',
                         input_shape=x_train.shape[1:]),
    CustomizedDenseLayer(1),
    customized_softplus,
    # keras.layers.Dense(1, activation="softplus"),
    # keras.layers.Dense(1), keras.layers.Activation('softplus'),
])
model.summary()
model.compile(loss="mean_squared_error", optimizer="sgd")
callbacks = [keras.callbacks.EarlyStopping(
    patience=5, min_delta=1e-2)]

6. tf.function()

能夠將python定義的函數轉爲tensorflow形式的函數,能夠提高執行效率。
例子:

# tf.function and auto-graph.
def scaled_elu(z, scale=1.0, alpha=1.0):
    # z >= 0 ? scale * z : scale * alpha * tf.nn.elu(z)
    is_positive = tf.greater_equal(z, 0.0)
    return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z))

print(scaled_elu(tf.constant(-3.)))
print(scaled_elu(tf.constant([-3., -2.5])))

tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)

第一種方式直接使用tf.function()將其轉爲tf函數

# 將其轉爲tf函數
scaled_elu_tf = tf.function(scaled_elu)
print(scaled_elu_tf(tf.constant(-3.)))
print(scaled_elu_tf(tf.constant([-3., -2.5])))

print(scaled_elu_tf.python_function is scaled_elu)

tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915 ], shape=(2,), dtype=float32)
True

第二種方式使用修飾器@tf.function將其轉爲tf函數

# 計算 1 + 1/2 + 1/2^2 + ... + 1/2^n

@tf.function
def converge_to_2(n_iters):
    total = tf.constant(0.)
    increment = tf.constant(1.)
    for _ in range(n_iters):
        total += increment
        increment /= 2.0
    return total

print(converge_to_2(20))

這種情況下要將變量定義在函數外面。

給tf.function()添加限定

這個例子中限定輸入數據的類型爲tf.int32。

@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name='x')])
def cube(z):
    return tf.pow(z, 3)

try:
    print(cube(tf.constant([1., 2., 3.])))
except ValueError as ex:
    print(ex)
    
print(cube(tf.constant([1, 2, 3])))

Python inputs incompatible with input_signature:
inputs: (
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))
input_signature: (
TensorSpec(shape=(None,), dtype=tf.int32, name=‘x’))
tf.Tensor([ 1 8 27], shape=(3,), dtype=int32)

7. 圖結構

上述的例子中,將python的函數轉爲tf函數之後,能夠獲取其對應的圖結構。
使用get_concrete_function()可以查看被轉換函數中做了哪些操作,

cube_func_int32 = cube.get_concrete_function(
    tf.TensorSpec([None], tf.int32))
print(cube_func_int32)

<tensorflow.python.eager.function.ConcreteFunction object at 0x7f766a385cf8>

cube_func_int32.graph.get_operations()

[<tf.Operation ‘x’ type=Placeholder>,
<tf.Operation ‘Pow/y’ type=Const>,
<tf.Operation ‘Pow’ type=Pow>,
<tf.Operation ‘Identity’ type=Identity>]

cube_func_int32.graph.get_operation_by_name("x")

<tf.Operation ‘x’ type=Placeholder>

cube_func_int32.graph.get_tensor_by_name("x:0")

<tf.Tensor ‘x:0’ shape=(None,) dtype=int32>

查看圖定義

cube_func_int32.graph.as_graph_def()

8. 自定義近似求導

自定義一元近似求導

def f(x):
    return 3. * x ** 2 + 2. * x - 1

def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)

print(approximate_derivative(f, 1.))

自定義二元求一階偏導數

def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

# (8.999999999993236, 41.999999999994486)

使用tf.GradientTape()求導

def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)
    
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
# tape調用一次就會關閉,傳入參數persistent = True保存tape
with tf.GradientTape(persistent = True) as tape:
    z = g(x1, x2)
# 分開x1,x2的偏導
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
# tf.Tensor(9.0, shape=(), dtype=float32) 
# tf.Tensor(42.0, shape=(), dtype=float32)
# 直接一次求兩偏導
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
# [<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]

# 刪除tape
del tape

求兩個函數中同一個變量的導數

x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)
# <tf.Tensor: id=140, shape=(), dtype=float32, numpy=13.0>

求二階導數

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
               for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape
# [[None, <tf.Tensor: id=177, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=188, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=186, shape=(), dtype=float32, numpy=14.0>]]

結果是一個海森矩陣,2階導數有四個。
求導更高階導數可以不斷嵌套 with tf.GradientTape(persistent=True) as tape即可。

模擬梯度下降

def f(x):
    return 3. * x ** 2 + 2. * x - 1
    
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>

模擬梯度下降使用不同的更新策略

def f(x):
    return 3. * x ** 2 + 2. * x - 1
    
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr = learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)

# <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>

對於多元函數,使用tape.gradient()求出導數,然後修改pply_gradients([(dz_dx, x)])中的參數列表即可。

9. tf.keras中使用tf.GradientTape()求導進行訓練

# 1. batch 遍歷訓練集 metric
#    1.1 自動求導
# 2. epoch結束 驗證集 metric

epochs = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
# 使用MeanSquaredError計算預測值與真實值的平方誤差
metric = keras.metrics.MeanSquaredError()
# 不嚴苛的取數據
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(0, len(x), size=batch_size)
    return x[idx], y[idx]
# 使用tf.keras建立模型
model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu',
                       input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])
# 迭代更新
for epoch in range(epochs):
	# 重設每個epoch的MeanSquaredError
    metric.reset_states()
    for step in range(steps_per_epoch):
    	# 取數據
        x_batch, y_batch = random_batch(x_train_scaled, y_train,
                                        batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            y_pred = tf.squeeze(y_pred, 1)
            # 需要求導的目標函數
            loss = keras.losses.mean_squared_error(y_batch, y_pred)
            metric(y_batch, y_pred)
        # 對所有變量求導
        grads = tape.gradient(loss, model.variables)
        # 如上一節所屬,多個參數的導數要與參數意義對應,這裏用zip將其打包
        grads_and_vars = zip(grads, model.variables)
        # 更新
        optimizer.apply_gradients(grads_and_vars)
        print("\rEpoch", epoch, " train mse:",
              metric.result().numpy(), end="")
    y_valid_pred = model(x_valid_scaled)
    y_valid_pred = tf.squeeze(y_valid_pred, 1)
    valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
    print("\t", "valid mse: ", valid_loss.numpy())
        

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章