本來還想開開心心地結束第五章,誰知5.4節遇上大坑了。。
import tensorflow as tf
import tensorlayer as tl
from stringclean import *
import numpy as np
vocabulary_size = 50000
embedding_size = 128
model_file_name = "model_word2vec_50k_128"
batch_size = None
_UNK = "_UNK"
sess = tf.InteractiveSession()
all_var = tl.files.load_npy_to_any(name=model_file_name + '.npy')
data = all_var['data']
count = all_var['count']
dictionary = all_var['dictionary']
reverse_dictionary = all_var['reverse_dictionary']
print("~~~~~~~Loading npy successfully~~~~~~~~~~~~")
tl.nlp.save_vocab(count, name='vocab_' + model_file_name + '.txt')
del all_var, data, count
# load_params= tl.files.load_npz(name=model_file_name + '.npz')
load_params= tl.files.load_npz(name='53model.npz')
print("~~~~~~~Loading npz successfully~~~~~~~~~~~~")
x = tf.placeholder(tf.int32, shape=[batch_size])
emb_net = tl.layers.EmbeddingInputlayer(inputs=x, vocabulary_size=vocabulary_size, embedding_size=embedding_size, name='embedding_layer')
tl.files.assign_params(sess, load_params, emb_net)
tl.layers.initialize_global_variables(sess)
emb_net.print_params()
emb_net.print_layers()
print('~~~~~~~~~~單詞~~~~~~~~~~~~')
word = 'hello'
word_id = dictionary[word]
print('word_id::::', word_id)
print('~~~~~~~~~~拆詞~~~~~~~~~~~~')
word = 'by'
word_id = tl.nlp.words_to_word_ids(word, dictionary, _UNK)
print('word_id::::', word_id)
context = tl.nlp.word_ids_to_words(word_id, reverse_dictionary)
print('context::::', context)
print('~~~~~~~~多詞~~~~~~~~~~~')
words = ['i', 'am', 'tensor', 'layer']
word_ids = tl.nlp.words_to_word_ids(words, dictionary, _UNK)
print('word_ids::::', word_ids)
context = tl.nlp.word_ids_to_words(word_ids, reverse_dictionary)
print('context::::', context)
vectors = sess.run(emb_net.outputs, feed_dict={x: word_ids})
print('vectors::::', vectors.shape)
輸出如下:
~~~~~~~Loading npy successfully~~~~~~~~~~~~
[TL] 50000 vocab saved to vocab_model_word2vec_50k_128.txt in C:\bbbb\學習\python教材\jfj\一起玩轉Tensorlayer
~~~~~~~Loading npz successfully~~~~~~~~~~~~
[TL] EmbeddingInputlayer embedding_layer: (50000, 128)
[TL] param 0: embedding_layer/embeddings:0 (50000, 128) float32_ref (mean: -3.736475628102198e-05, median: -4.611164331436157e-05, std: 0.057736434042453766)
[TL] num of params: 6400000
[TL] layer 0: embedding_layer/embedding_lookup:0 (?, 128) float32
~~~~~~~~~~單詞~~~~~~~~~~~~
word_id:::: 6436
~~~~~~~~~~拆詞~~~~~~~~~~~~
word_id:::: [73, 495]
context:::: ['b', 'y']
~~~~~~~~多詞~~~~~~~~~~~
word_ids:::: [72, 1226, 13297, 1987]
context:::: ['i', 'am', 'tensor', 'layer']
vectors:::: (4, 128)
我一開始運行時是有報錯的如下:
~~~~~~~Loading npy successfully~~~~~~~~~~~~
[TL] 50000 vocab saved to vocab_model_word2vec_50k_128.txt in C:\bbbb\學習\python教材\jfj\一起玩轉Tensorlayer
Traceback (most recent call last):
File "C:\bbbb\學習\python教材\jfj\一起玩轉Tensorlayer\5.4.py", line 22, in <module>
load_params = tl.files.load_npz(name=model_file_name + '.npz')
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorlayer\files.py", line 1207, in load_npz
return d['params']
File "C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 239, in __getitem__
raise KeyError("%s is not a file in the archive" % key)
KeyError: 'params is not a file in the archive'
[Finished in 4.4s]
拆騰了一天,發現主要問題是第5.3節上有個差錯,
load_params= tl.files.load_npz(name=model_file_name + '.npz')
這行是導入不了npz的,會報上面的錯。
load_params= tl.files.load_npz(name='53model.npz')
這行是我改的,53model.npz 是我後來生成的,我把5.3的代碼這裏改了,注意一下:
if (step % (print_freq * 5) == 0) and (step != 0):
print("******Save model, data and dictionaries***" + "!" * 10)
# Save to ckpt or npz file
# saver = tf.train.Saver()
# save_path = saver.save(sess, model_file_name+'.ckpt')
tl.files.save_npz_dict(emb_net.all_params, name=model_file_name + '.npz', sess=sess)
tl.files.save_npz(save_list=None, name='53model.npz', sess=sess)
tl.files.save_any_to_npy(
save_dict={
'data': data,
'count': count,
'dictionary': dictionary,
'reverse_dictionary': reverse_dictionary
}, name=model_file_name + '.npy'
)
step += 1
對的,我新加了 tl.files.save_npz(save_list=None, name='53model.npz', sess=sess)這行。因爲tl.files.save_npz_dict(emb_net.all_params, name=model_file_name + '.npz', sess=sess)這行生成的npz是不符合load npz規則的,因爲npz起始文件名要params才行。就是因爲這個害我重新run了一遍5.3的程序。。。。。
爲什麼,因爲源碼是這麼要求的啊~~下面是源碼
def load_npz(path='', name='model.npz'):
"""Load the parameters of a Model saved by tl.files.save_npz().
Parameters
----------
path : str
Folder path to `.npz` file.
name : str
The name of the `.npz` file.
Returns
--------
list of array
A list of parameters in order.
Examples
--------
- See ``tl.files.save_npz``
References
----------
- `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`__
"""
d = np.load(path + name)
return d['params']