《TensorFlow實戰Google深度學習架構》——Tensorflow實現遷移學習

參考：https://blog.csdn.net/nnnnnnnnnnnny/article/details/70244232
遷移學習就是把一個已經訓練好的神經網絡隱層的輸出當做特徵，然後用自己的標記數據訓練一個新的全連接層，用於分類。
# %load transfer_flower.py
#!/usr/bin/env python3

# glob，文件路徑查找模塊
import glob
import os.path
import random
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

# Inception-v3模型瓶頸層的節點個數
BOTTLENECK_TENSOR_SIZE = 2048

# Inception-v3模型中代表瓶頸層結果的張量名稱。
# 在谷歌提出的Inception-v3模型中，這個張量名稱就是'pool_3/_reshape:0'。
# 在訓練模型時，可以通過tensor.name來獲取張量的名稱。
BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0'

# 圖像輸入張量所對應的名稱。
JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0'

# 下載的谷歌訓練好的Inception-v3模型文件目錄
MODEL_DIR = 'model/'

# 下載的谷歌訓練好的Inception-v3模型文件名
MODEL_FILE = 'tensorflow_inception_graph.pb'

# 因爲一個訓練數據會被使用多次，所以可以將原始圖像通過Inception-v3模型計算
# 得到的特徵向量保存在文件中，免去重複的計算。
# 下面的變量定義了這些文件的存放地址。
CACHE_DIR = 'tmp/bottleneck/'

# 圖片數據文件夾。
# 在這個文件夾中每一個子文件夾代表一個需要區分的類別，每個子文件夾中存放了對應類別的圖片。
INPUT_DATA = 'flower_data/'

# 驗證的數據百分比
VALIDATION_PERCENTAGE = 10
# 測試的數據百分比
TEST_PERCENTAGE = 10

# 定義神經網絡的設置
LEARNING_RATE = 0.01
STEPS = 4000
BATCH = 100

# 這個函數從數據文件夾中讀取所有的圖片列表並按訓練、驗證、測試數據分開。
# testing_percentage和validation_percentage參數指定了測試數據集和驗證數據集的大小。
def create_image_lists(testing_percentage, validation_percentage):
    # 得到的所有圖片都存在result這個字典(dictionary)裏。
    # 這個字典的key爲類別的名稱，value也是一個字典，字典裏存儲了所有的圖片名稱。
    result = {}
	
    # 獲取當前目錄下所有的子目錄
    sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
    # 得到的第一個目錄是當前目錄，不需要考慮
    is_root_dir = True
    for sub_dir in sub_dirs:
        if is_root_dir:		# sub_dirs的第一個元素是根目錄，即INPUT_DATA，應跳過
            is_root_dir = False
            continue

        # 獲取當前目錄下所有的有效圖片文件。
        extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
        file_list = []
        dir_name = os.path.basename(sub_dir)
        for extension in extensions:
		   # 得到後綴名爲extensions的文件路徑
            file_glob = os.path.join(INPUT_DATA, dir_name, '*.'+extension)
		   # glob.glob()方法將返回所有匹配的文件路徑列表（list），
		   # list.extend()表示在list的末尾一次性追加另一個列表的多個值，用於擴展列表
            file_list.extend(glob.glob(file_glob))	
		# 如果文件列表爲空，則跳出循環
        if not file_list:
            continue

        # 通過目錄名獲取類別的名稱。
        label_name = dir_name.lower()
        # 初始化當前類別的訓練數據集、測試數據集和驗證數據集（3個空列表）
        training_images = []
        testing_images = []
        validation_images = []
        for file_name in file_list:
            base_name = os.path.basename(file_name)
            # 隨機將數據分到訓練數據集、測試數據集和驗證數據集。
            chance = np.random.randint(100)
            if chance < validation_percentage:
                validation_images.append(base_name)
            elif chance < (testing_percentage + validation_percentage):
                testing_images.append(base_name)
            else:
                training_images.append(base_name)

        # 將當前類別的數據放入結果字典。
		# 字典的key爲類別的名稱，value也是一個字典，字典裏存儲了所有的圖片名稱
        result[label_name] = {
            'dir': dir_name,
            'training': training_images,
            'testing': testing_images,
            'validation': validation_images
            }
    # 返回整理好的所有數據
    return result


# 這個函數通過類別名稱、所屬數據集和圖片編號獲取一張圖片的地址。
# image_lists參數給出了所有圖片信息。
# image_dir參數給出了根目錄。存放圖片數據的根目錄和存放圖片特徵向量的根目錄地址不同。
# label_name參數給定了類別的名稱。
# index參數給定了需要獲取的圖片的編號。
# category參數指定了需要獲取的圖片是在訓練數據集、測試數據集還是驗證數據集。
def get_image_path(image_lists, image_dir, label_name, index, category):
    # 從圖片列表中獲取給定類別中所有圖片的信息。
    label_lists = image_lists[label_name]
    # 從給定類別的列表中，根據所屬數據集的名稱獲取集合中的全部圖片信息。
    category_list = label_lists[category]
	# 根據序號計算圖片所在列表中的編號
    mod_index = index % len(category_list)
    # 從給定類別和所屬集合的圖像列表中，根據序號獲得圖片的文件名。
    base_name = category_list[mod_index]
    # 根據key獲得給定類別文件的子目錄
    sub_dir = label_lists['dir']
    # 最終的地址爲數據根目錄的地址 + 類別的文件夾 + 圖片的名稱
    full_path = os.path.join(image_dir, sub_dir, base_name)
    return full_path


# 這個函數通過類別名稱、所屬數據集和圖片編號獲取經過Inception-v3模型處理之後的特徵向量文件地址。
def get_bottlenect_path(image_lists, label_name, index, category):
	# 文件目錄爲CACHE_DIR，即在bottlenect文件夾中存儲於每幅圖像對應的特徵向量文件
    return get_image_path(image_lists, CACHE_DIR, label_name, index, category) + '.txt';


# 這個函數使用加載的訓練好的Inception-v3模型處理一張圖片，得到這個圖片的特徵向量。
def run_bottleneck_on_image(sess, image_data, image_data_tensor, bottleneck_tensor):
    # 這個過程實際上就是將當前圖片作爲輸入計算瓶頸張量的值。這個瓶頸張量的值就是這張圖片的新的特徵向量。
    bottleneck_values = sess.run(bottleneck_tensor, {image_data_tensor: image_data})
    # 經過卷積神經網絡處理的結果是一個四維數組????，
	# 需要將這個結果壓縮成一個特徵向量（一維數組）
    bottleneck_values = np.squeeze(bottleneck_values)
    return bottleneck_values


# 這個函數獲取一張圖片經過Inception-v3模型處理之後的特徵向量。
# 這個函數會先試圖尋找已經計算且保存下來的特徵向量，如果找不到則先計算這個特徵向量，然後保存到文件。
def get_or_create_bottleneck(sess, image_lists, label_name, index, category, jpeg_data_tensor, bottleneck_tensor):
    # 獲取一張圖片對應的特徵向量文件的路徑。
    label_lists = image_lists[label_name]
    sub_dir = label_lists['dir']
    sub_dir_path = os.path.join(CACHE_DIR, sub_dir)
    if not os.path.exists(sub_dir_path):
        os.makedirs(sub_dir_path)		# 創建文件夾
	# 得到圖像文件對應的bottleneck文件名
    bottleneck_path = get_bottlenect_path(image_lists, label_name, index, category)
    # 如果這個特徵向量文件不存在，則通過Inception-v3模型來計算特徵向量，並將計算的結果存入文件。
    if not os.path.exists(bottleneck_path):
        # 獲取原始的圖片路徑
        image_path = get_image_path(image_lists, INPUT_DATA, label_name, index, category)
        # 讀取圖片內容
        image_data = gfile.FastGFile(image_path, 'rb').read()
        # print(len(image_data))
        # 通過Inception-v3模型計算特徵向量
        # 由於輸入的圖片大小不一致，此處得到的image_data大小也不一致，
        # 但卻都能通過加載的inception-v3模型生成一個2048的特徵向量。
        bottleneck_values = run_bottleneck_on_image(sess, 
                image_data, jpeg_data_tensor, bottleneck_tensor)
        # 將bottleneck_values中的每個數值轉換爲字符串，然後用,連接起來。
        bottleneck_string = ','.join(str(x) for x in bottleneck_values)
	   # 將計算得到的特徵向量存入文件
        with open(bottleneck_path, 'w') as bottleneck_file:
            bottleneck_file.write(bottleneck_string)
    else:
        # 直接從文件中獲取圖片相應的特徵向量。
        with open(bottleneck_path, 'r') as bottleneck_file:
            bottleneck_string = bottleneck_file.read()
	   # bottleneck_string中的序列按,拆開，並轉換爲float。
        bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
    # 返回得到的特徵向量
    return bottleneck_values


# 這個函數隨機獲取一個batch的圖片的特徵向量作爲訓練數據。
# n_classes爲類別個數，how_many爲要獲取的圖像數
def get_random_cached_bottlenecks(sess, n_classes, image_lists, how_many, category,
                                  jpeg_data_tensor, bottleneck_tensor):
    bottlenecks = []
    ground_truths = []
    for _ in range(how_many):
        # 隨機一個類別和圖片的編號加入當前的訓練數據。
        label_index = random.randrange(n_classes)	# 返回0~n_classes-1之間的一個隨機數
        label_name = list(image_lists.keys())[label_index]	# 返回第label_index類的類別名稱
        image_index = random.randrange(65536)	# 返回0-65535之間的隨機數
	   # 獲取或計算隨機選取的圖像的特徵向量
        bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, image_index, category,
                                              jpeg_data_tensor, bottleneck_tensor)
	   # 創建一個label向量，長度 = 類別數
        ground_truth = np.zeros(n_classes, dtype=np.float32)
        ground_truth[label_index] = 1.0
	   # 把計算好的特徵向量和label向量追加到list中。
        bottlenecks.append(bottleneck)	
        ground_truths.append(ground_truth)
    return bottlenecks, ground_truths


# 這個函數獲取全部的測試數據。在最終測試的時候需要在所有的測試數據上計算正確率。
def get_test_bottlenecks(sess, image_lists, n_classes, jpeg_data_tensor, bottleneck_tensor):
    bottlenecks = []
    ground_truths = []
    label_name_list = list(image_lists.keys())
    # 枚舉所有的類別和每個類別中的測試圖片。
	# enumerate() 函數用於將一個可遍歷的數據對象(如列表、元組或字符串)組合爲一個索引序列，
    # 同時列出數據和數據下標
	# 例如，seasons = ['Spring', 'Summer', 'Fall', 'Winter']，
    # list(enumerate(seasons))-------->[(0, 'Spring'), (1, 'Summer'), (2, 'Fall'), (3, 'Winter')]
    for label_index, label_name in enumerate(label_name_list):
        category = 'testing'
		# 枚舉所有的類別爲label_name的測試圖像。
        for index, unused_base_name in enumerate(image_lists[label_name][category]):
            # 通過Inception-v3模型計算圖片對應的特徵向量，並將其加入最終數據的列表。
            bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, index, category,
                                                  jpeg_data_tensor, bottleneck_tensor)
            ground_truth = np.zeros(n_classes, dtype = np.float32)
            ground_truth[label_index] = 1.0
            bottlenecks.append(bottleneck)
            ground_truths.append(ground_truth)
    return bottlenecks, ground_truths


def main(_):
    # 讀取所有圖片。
    image_lists = create_image_lists(TEST_PERCENTAGE, VALIDATION_PERCENTAGE)
    n_classes = len(image_lists.keys())
    # 讀取已經訓練好的Inception-v3模型。
    # 谷歌訓練好的模型保存在了GraphDef Protocol Buffer中，裏面保存了每一個節點取值的計算方法以及變量的取值。
    # TensorFlow模型持久化的問題在第5章中有詳細的介紹。
    with gfile.FastGFile(os.path.join(MODEL_DIR, MODEL_FILE), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    # 加載讀取的Inception-v3模型，並返回數據輸入所對應的張量以及瓶頸層計算結果所對應的張量。
    bottleneck_tensor, jpeg_data_tensor = tf.import_graph_def(graph_def, return_elements=[BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME])
    # 定義新的神經網絡輸入，這個輸入就是新的圖片經過Inception-v3模型前向傳播到達瓶頸層時的結點取值。
    # 可以將這個過程類似的理解爲一種特徵提取。
    bottleneck_input = tf.placeholder(tf.float32, [None, BOTTLENECK_TENSOR_SIZE], name='BottleneckInputPlaceholder')
    # 定義新的標準答案輸入
    ground_truth_input = tf.placeholder(tf.float32, [None, n_classes], name='GroundTruthInput')
    # 定義一層全連接層來解決新的圖片分類問題，Transfer-learning只需要訓練最後一個全連接層的參數。
    # 因爲訓練好的Inception-v3模型已經將原始的圖片抽象爲了更加容易分類的特徵向量了，所以不需要再訓練那麼複雜的神經網絡來完成這個新的分類任務。
    with tf.name_scope('final_training_ops'):
        weights = tf.Variable(tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, n_classes], stddev=0.001))
        biases = tf.Variable(tf.zeros([n_classes]))
        logits = tf.matmul(bottleneck_input, weights) + biases
        final_tensor = tf.nn.softmax(logits)
    # 定義交叉熵損失函數
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=ground_truth_input)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy_mean)
    # 計算正確率
    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(final_tensor, 1), tf.argmax(ground_truth_input, 1))
        evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 訓練過程
        for i in range(STEPS):
            # 每次獲取一個batch的訓練數據，得到對應的特徵向量和類別標籤
            train_bottlenecks, train_ground_truth = get_random_cached_bottlenecks(
                sess, n_classes, image_lists, BATCH, 'training', jpeg_data_tensor, bottleneck_tensor)
		   # 將特徵向量和類別標籤作爲全連接網絡訓練的輸入、輸出
            sess.run(train_step, feed_dict={bottleneck_input: train_bottlenecks, ground_truth_input: train_ground_truth})
            # 在驗證集上測試正確率。
            if i%100 == 0 or i+1 == STEPS:
                validation_bottlenecks, validation_ground_truth = get_random_cached_bottlenecks(
                    sess, n_classes, image_lists, BATCH, 'validation', jpeg_data_tensor, bottleneck_tensor)
                validation_accuracy = sess.run(evaluation_step, feed_dict={
                    bottleneck_input:validation_bottlenecks, ground_truth_input: validation_ground_truth})
                print('Step %d: Validation accuracy on random sampled %d examples = %.1f%%'
                      % (i, BATCH, validation_accuracy*100))
        # 在最後的測試數據上測試正確率
        test_bottlenecks, test_ground_truth = get_test_bottlenecks(sess, image_lists, n_classes,
                                                                       jpeg_data_tensor, bottleneck_tensor)
        test_accuracy = sess.run(evaluation_step, feed_dict={bottleneck_input: test_bottlenecks,
                                                                 ground_truth_input: test_ground_truth})
        print('Final test accuracy = %.1f%%' % (test_accuracy * 100))


if __name__ == '__main__':
    tf.app.run()
《TensorFlow實戰Google深度學習架構》——Tensorflow實現遷移學習

vue項目獲取富文本編輯器wangEditor內容導出爲word（html轉word格式並下載）

dotnet C# 創建 X11 應用時設置窗口背景顏色

Navicat安裝與激活教程

TDengine docker安裝方法

vue3組件通信與props

sapui5

Alpine Linux apk add DNS lookup error

部分JDK版本的發佈時間

工作中用到的腳本合集

合併代碼時Beyond Compare設置

《Sparse and Redundant Representations：From Theory to Applications in Signal and Image Processing》序論

《Sparse and Redundant Representations》第六章迭代收縮算法

《Sparse and Redundant Representations》第五章從精確解到近似解

Matlab運行時出現提示 too many input arguments問題

《Sparse and Redundant Representations》第二章：唯一性與不確定性

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結