bert提取句向量特徵:
from bert_serving.client import BertClient
# message 這個表示一個batch_size(cporch)數據
def _get_message_text(self, message):
all_tokens = []
# msg 是某一句話
for msg in message:
msg_tokens = []
for t in msg.get("tokens"):
text = self._replace_number_blank(t.text)
if text != '':
# msg_tokens 一句話的所有tokens集合[你,能,不能,查下,餘額]
msg_tokens.append(text)
a = str(msg_tokens)
a = a.replace('[', '')
a = a.replace(']', '')
a = a.replace(',', '')
a = a.replace('\'', '')
a = a.replace(' ', '')
# 一個batch_size 所有的tokens集合
all_tokens.append(list(a))
bert_embedding = BertClient.encode(all_tokens, is_tokenized=True)
return np.squeeze(bert_embedding)
全連接層結構:
def conv_net(x, n_classes, num_layers, layer_size, C2, dropout, is_training):
# Define a scope for reusing the variables
# layer_size == [1024,256]
# num_layers == 2
with tf.variable_scope('ConvNet'):
# Flatten the data to a 1-D vector for the fully connected layer
x = tf.contrib.layers.flatten(x)
reg = tf.contrib.layers.l2_regularizer(C2)
name = 'dense'
for i in range(num_layers):
x = tf.layers.dense(inputs=x,
units=layer_size[i],
activation=tf.nn.relu,
kernel_regularizer=reg,
name='hidden_layer_{}_{}'.format(name, i))
x = tf.layers.dropout(x, rate=dropout, training=is_training)
out = tf.layers.dense(inputs=x,
units=n_classes,
kernel_regularizer=reg,
name='dense_layer_{}'.format(name))
return out
logits_train = conv_net(self.a_in, num_classes, self.num_hidden_layers, self.hidden_layer_size, self.C2, self.drop_out, is_training=True)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits_train, labels=self.b_in)
# get_regularization_loss(): 獲取整體的正則化loss。
loss = tf.reduce_mean(cross_entropy) + tf.losses.get_regularization_loss()
self.y_predict = tf.nn.softmax(logits_train)
train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)