1. 查看可用計算資源
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
from keras import backend as K
K.tensorflow_backend._get_available_gpus()
如果運行上述代碼沒有顯示GPU,則可通過升級tensorflow-gpu來進行解決(如果是多個環境的話,加上–user):
pip3 install --index-url http://pypi.douban.com/simple --trusted-host pypi.douban.com --upgrade tensorflow-gpu
2. 使用空閒的GPU
該方法主要應用於多塊GPU上。
import os
import numpy as np
os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp')
memory_gpu=[int(x.split()[2]) for x in open('tmp','r').readlines()]
memory_gpu_value = np.max(memory_gpu)
memory_gpu_index = np.argmax(memory_gpu)
if memory_gpu_value > 15000:
os.environ['CUDA_VISIBLE_DEVICES'] = str(memory_gpu_index)
os.system('rm tmp')
else:
assert 1==2, 'No available GPU'
3. tensorflow.keras.layers.CuDNNLSTM
使用該庫將會比原生的LSTM快得多,但它只能在GPU環境下運行,並且可能會導致效果的下降。但如果是原生的LSTM,在CPU上運行的速度可能會超出GPU。
可參考討論:
https://stackoverflow.com/questions/52481006/why-is-keras-lstm-on-cpu-three-times-faster-than-gpu
4. 保存和加載模型
4.1 CPU和GPU版本
在GPU版本上保存(save)的模型,是無法在CPU版本上加載(load_model)的。反之亦然。
4.2 自定義層
自定義層需要定義__init__、build、call、compute_output_shape和get_config方法。
from tensorflow.keras import backend as K
from tensorflow.keras import initializers, regularizers, constraints
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import load_model
class Attention(Layer):
def __init__(self, step_dim=350,
W_regularizer=None, b_regularizer=None,
W_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = initializers.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.step_dim = step_dim
self.features_dim = 0
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight(shape=(int(input_shape[-1]),),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
self.features_dim = input_shape[-1]
if self.bias:
self.b = self.add_weight(shape=(int(input_shape[1]),),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
else:
self.b = None
self.build = True
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
features_dim = self.features_dim
step_dim = self.step_dim
e = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) # e = K.dot(x, self.W)
if self.bias:
e += self.b
e = K.tanh(e)
a = K.exp(e)
# apply mask after the exp. will be re-normalized next
if mask is not None:
# cast the mask to floatX to avoid float64 upcasting in theano
a *= K.cast(mask, K.floatx())
# in some cases especially in the early stages of training the sum may be almost zero
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
c = K.sum(a * x, axis=1)
return c
def compute_output_shape(self, input_shape):
return input_shape[0], self.features_dim
def get_config(self):
config = {"step_dim":self.step_dim}
base_config = super(Attention, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
model = load_model(r'\cnn_lstm.h5', custom_objects={'Attention': Attention})
5. No module named tensorflow.keras.engine
from tensorflow.python.keras.layers import Layer, InputSpec
6. from tensorflow.keras import initializers
7. tensorflow.keras.preprocessing.sequence.pad_sequences
默認值爲在前面截斷和填充,向前填充相對來說還好,因爲對CNN和LSTM來說都是比較好的方案(LSTM如果是向後填充,效果就會比較差)。但是向前截斷的話,有時候就不是很好的選擇了。
from tensorflow.keras.preprocessing.sequence import pad_sequences
list_value = [[1, 2, 3, 4, 5]]
pad_sequences(list_value, maxlen=3)
只需做下列修改即可:
truncating='post'
8.多個GPU並行使用
from tensorflow.keras.utils import multi_gpu_model
parallel_model = multi_gpu_model(model, gpus = 4)