基於深度學習的音樂推薦系統(三)使用已訓練的卷積神經網絡提取語譜圖特徵並計算圖像間相似度

該模塊包含幾部分:

  1. 調用訓練好的並且已經保存的CNN模型(僅四層卷積層部分)
  2. 逐個讀取tfrecords文件中的元素,並送入已訓練好的CNN中,給每個圖片提取128個特徵
  3. 每首歌包含11個圖片,即11*128個特徵,將每首歌的11*128個特徵之間進行餘弦相似度計算
  4. 逐個歌曲計算,返回每個歌曲的最相似的三首歌歌名,以列表的形式
  • 調用訓練好的並且已經保存的CNN模型(僅四層卷積層部分)

  • 定義CNN模型的參數
lr = tf.Variable(0.001, dtype=tf.float32)
x = tf.placeholder(tf.float32, [None, 256, 256, 1],name='x')
y_ = tf.placeholder(tf.float32, [None],name='y_')
keep_prob = tf.placeholder(tf.float32) 
  • CNN模型結構定義

def weight_variable(shape,name):
	initial = tf.truncated_normal(shape, stddev=0.1)
	return tf.Variable(initial,name=name)


def bias_variable(shape,name):
	initial = tf.constant(0.1, shape=shape)
	return tf.Variable(initial,name=name)

with tf.name_scope('conv2d'):
	def conv2d(x, W):
		# stride [1, x_movement, y_movement, 1]
		# Must have strides[0] = strides[3] = 1
		return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

with tf.name_scope('max_pool_2x2'):
	def max_pool_2x2(x):
	    # stride [1, x_movement, y_movement, 1]
	    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
	def max_pool_4x4(x):
	    # stride [1, x_movement, y_movement, 1]
	    return tf.nn.max_pool(x, ksize=[1,4,4,1], strides=[1,4,4,1], padding='SAME')   


def define_predict_y(x):
	with tf.variable_scope("conv1"):
		## conv1 layer ##
		W_conv1 = weight_variable([3,3, 1,64],'W_conv1') # patch 3x3, in size 1, out size 64
		b_conv1 = bias_variable([64],'b_conv1')
		h_conv1 = tf.nn.elu(conv2d(x, W_conv1) + b_conv1) # output size 28x28x32
		h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')                                         # output size 14x14x32
	with tf.variable_scope("conv2"):
		## conv2 layer ##
		W_conv2 = weight_variable([3,3, 64, 128],'W_conv2') # patch 5x5, in size 32, out size 64
		b_conv2 = bias_variable([128],'b_conv2')
		h_conv2 = tf.nn.elu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64
		h_pool2 = max_pool_4x4(h_conv2)  
	with tf.variable_scope("conv3"):
		## conv3 layer ##
		W_conv3 = weight_variable([3,3, 128, 256],'W_conv3') # patch 5x5, in size 32, out size 64
		b_conv3 = bias_variable([256],'b_conv3')
		h_conv3 = tf.nn.elu(conv2d(h_pool2, W_conv3) + b_conv3) # output size 14x14x64
		h_pool3 = max_pool_4x4(h_conv3) 
	with tf.variable_scope("conv4"):
		## conv4 layer ##
		W_conv4 = weight_variable([3,3, 256, 512],'W_conv4') # patch 5x5, in size 32, out size 64
		b_conv4 = bias_variable([512],'b_conv4')
		h_conv4 = tf.nn.elu(conv2d(h_pool3, W_conv4) + b_conv4) # output size 14x14x64
		h_pool4 = max_pool_4x4(h_conv4)   

	with tf.variable_scope("fc1"):
		## fc1 layer ##
		W_fc1 = weight_variable([2*2*512, 128],'W_fc1')
		b_fc1 = bias_variable([128],'b_fc1')
		# [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]
		h_pool4_flat = tf.reshape(h_pool4, [-1, 2*2*512])
		h_fc1 = tf.nn.elu(tf.matmul(h_pool4_flat, W_fc1) + b_fc1)
		h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

	# ## fc2 layer ##
	# with tf.variable_scope("fc2"):
	# 	W_fc2 = weight_variable([128, 10],'W_fc2')
	# 	b_fc2 = bias_variable([10],'b_fc2')
	# 	predict_y = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

	return h_fc1_drop

prediction = define_predict_y(x)
# 用於保存和載入模型
new_saver=tf.train.Saver()
  • 載入已經保存的模型參數
new_saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Administrator/Desktop/ckpt/'))
		print("導入參數成功!")
  • 逐個讀取tfrecords文件中的元素,並送入已訓練好的CNN中,給每個圖片提取128個特徵

1.逐個讀取tfrecords文件中的元素

def _parse_record(example_proto):
	features = {
				'encoded': tf.FixedLenFeature((), tf.string),
				'fname': tf.FixedLenFeature((), tf.string),
				'width': tf.FixedLenFeature((), tf.int64),
				'height': tf.FixedLenFeature((), tf.int64),
				'label': tf.FixedLenFeature((), tf.int64),}
	parsed_features = tf.parse_single_example(example_proto, features=features)
	return parsed_features



###1.....
img_vec_list = [] #所有圖片的向量,按順序存的



def read_test(input_file):

	# 用 dataset 讀取 tfrecord 文件
	dataset = tf.data.TFRecordDataset(input_file)
	dataset = dataset.map(_parse_record)#解析tfrecord文件中的所有記錄,使用dataset的map方法
	#dataset = dataset.repeat(epochs).shuffle(buffer_size).batch(batch_size)
	iterator = dataset.make_one_shot_iterator()

	with tf.Session() as sess:
		try:
			i =0
			while iterator.get_next():
				i = i+1
				print(i)
				features = sess.run(iterator.get_next())
				img_fname = features['fname']
				img_fname = img_fname.decode()
				img = tf.decode_raw(features['encoded'], tf.uint8)
				img = tf.reshape(img, [256, 256, 1])
				img = tf.cast(img, tf.float32) / 255.0        #將矩陣歸一化0-1之間
				label = tf.cast(features['label'], tf.int32)
				
				one = [sess.run(img),img_fname,sess.run(label)]
				print(one[1])
				img_vec_list.append(one)
		except tf.errors.OutOfRangeError:
			print("..")
		print("-------------",len(img_vec_list))
		img_vec_list.sort(key = lambda x:x[1])
		print("over..")
read_test('F:/data/test0.tfrecords') 
read_test('F:/data/train0.tfrecords') 
read_test('F:/data/test1.tfrecords') 
read_test('F:/data/train1.tfrecords') 
read_test('F:/data/test2.tfrecords') 
read_test('F:/data/train2.tfrecords') 
read_test('F:/data/test3.tfrecords') 
read_test('F:/data/train3.tfrecords') 
read_test('F:/data/test4.tfrecords') 
read_test('F:/data/train4.tfrecords') 
read_test('F:/data/test5.tfrecords') 
read_test('F:/data/train5.tfrecords') 
read_test('F:/data/test6.tfrecords') 
read_test('F:/data/train6.tfrecords') 
read_test('F:/data/test7.tfrecords') 
read_test('F:/data/train7.tfrecords') 
read_test('F:/data/test8.tfrecords') 
read_test('F:/data/train8.tfrecords') 
read_test('F:/data/test9.tfrecords') 
read_test('F:/data/train9.tfrecords') 

2.並送入已訓練好的CNN中

vector_list = []

def get_vector():
	with tf.Session() as sess:
		print("there..")
		# 如果是訓練,初始化參數
		sess.run(tf.global_variables_initializer())
		print("222")
		# 創建一個協調器,管理線程
		coord = tf.train.Coordinator()
		print("333")
		# 啓動QueueRunner,此時文件名隊列已經進隊
		threads = tf.train.start_queue_runners(sess=sess, coord=coord)
		print("444")

		new_saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Administrator/Desktop/ckpt/'))
		print("導入參數成功!")
		
		for i in range(len(img_vec_list)):
			vector = sess.run(prediction,feed_dict={x:np.expand_dims(img_vec_list[i][0],0),y_:np.expand_dims(img_vec_list[i][2],0),keep_prob:0.5})
			vector_list.append(vector)
			#print("vector is :",len(vector[0]))

get_vector()
  • 每首歌包含11個圖片,即11*128個特徵,將每首歌的11*128個特徵之間進行餘弦相似度計算

def cos_sim(vector_a, vector_b):
    """
    計算兩個向量之間的餘弦相似度
    :param vector_a: 向量 a 
    :param vector_b: 向量 b
    :return: sim
    """
    vector_a = np.mat(vector_a)
    vector_b = np.mat(vector_b)
    num = float(vector_a * vector_b.T)
    denom = np.linalg.norm(vector_a) * np.linalg.norm(vector_b)
    cos = num / denom
    sim = 0.5 + 0.5 * cos
    return sim

##########3....
cos_list = []

def get_all_vec_cos():
	for i in range(len(img_vec_list)):
		max_cos = 0
		max_index = i
		for j in range(len(img_vec_list)):
			if int(i/11) == int(j/11):
				continue
			else:
				temp_cos = cos_sim(vector_list[i],vector_list[j])
				
				if temp_cos>max_cos:
					print("temp_cos:",temp_cos,"max_cos",max_cos)
					max_cos = temp_cos
					max_index = int(j/11)
		cos_list.append([int(i/11),max_index,max_cos])
		print("cos:",i,"  ",cos_list[i])
	print("cos_list:",len(cos_list))

get_all_vec_cos()
  • 逐個歌曲計算,返回每個歌曲的最相似的三首歌歌名,以列表的形式

most_video = []

#返回的是vidoe序號
def get_most_video():
	#將cos_list分割,每份11個
	#cos_list = [cos_list[i:i+11] for i in range(0,len(cos_list),11)]
	print("cos_list:",cos_list)
	split_cos_list = []
	for j in range(0,len(cos_list),11):
		split_cos_list.append(cos_list[j:j+11])
	print("split_cos_list:",split_cos_list)
	for i in range(len(split_cos_list)):
		index = []
		for item in split_cos_list[i]:
			index.append(item[1])
		most_index = Counter(index).most_common(3)
		most_video.append(most_index)
	#print("most_video:",len(most_video))

get_most_video()
#print(most_video)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章