caffe中其實已經自帶了這樣的小工具
caffe-master/tools/extra/parse_log.sh
caffe-master/tools/extra/extract_seconds.py和
caffe-master/tools/extra/plot_training_log.py.example;
拷貝以上文件到當前log目錄下(訓練日誌保存目錄下)
並將plot_training_log.py.example改名爲plot_training_log.py
第一步保存日誌文件,用重定向即可:
已有log文件忽略
$TOOLS/caffe train --solver=$SOLVERFILE 2>&1 |tee out.log
第二步直接繪製:
python plot_training_log.py 2 testloss.png out.log
注意,採用的是Python2.7不是Python3
參數2-是選擇畫哪種類型的圖片,具體數字是代表哪個類型可以查看幫助信息看到:
Notes:
1. Supporting multiple logs.
2. Log file name must end with the lower-cased “.log”.
Supported chart types:
0: Test accuracy vs. Iters
1: Test accuracy vs. Seconds
2: Test loss vs. Iters
3: Test loss vs. Seconds
4: Train learning rate vs. Iters
5: Train learning rate vs. Seconds
6: Train loss vs. Iters
7: Train loss vs. Seconds
讀者根據自身需求進行選擇
當遇到如下情況
onds.py: 權限不夠
paste: aux4.txt: 沒有那個文件或目錄
rm: 無法刪除'aux4.txt': 沒有那個文件或目錄
/home/th/data/TH/CARlog/parse_log.sh: 行 47: /home/th/data/TH/CARlog/extract_seconds.py: 權限不夠
paste: aux3.txt: 沒有那個文件或目錄
rm: 無法刪除'aux3.txt': 沒有那個文件或目錄
因爲aux4.txt是由aux3.txt來的,這樣就無法生成aux4.txt,也就報錯說不能paste和rm aux4.txt。
在extract_seconds.py中也是通過尋找sovling來確定開始時間的。如果單獨用parse_log.py生成日誌文件,不會報aux4.txt的錯誤,但會報extract_seconds.py
在parse_log.sh
中兩行代碼:
grep '] Solving ' $1 > aux3.txt
# grep 'Testing net' $1 >> aux3.txt
grep 'Train net' $1 >> aux3.txt
同時需要修改plot_training_log.py
文件中的load_data()
函數
之所以修改這個函數,因爲原函數是從*.log.test和*.log.train的第一行讀取數據,但是第一行是單詞如法轉換成浮點數,必須從第二行開始讀取數據。
修改代碼如下:
def load_data(data_file, field_idx0, field_idx1):
data = [[], []]
with open(data_file, 'r') as f:
for line in f:
line = line.strip()
if line[0] != '#':
line=','.join(filter(lambda x: x, line.split(' ')))
print line
fields = line.split(',')
print fields
data[0].append(float(fields[field_idx0].strip()))
data[1].append(float(fields[field_idx1].strip()))
return data
如果爲Windows平臺,沒有shell可以採用parse_log.py
工具
源代碼如下:
#!/usr/bin/env python
"""
Parse training log
Evolved from parse_log.sh
"""
import os
import re
import extract_seconds
import argparse
import csv
from collections import OrderedDict
def parse_log(path_to_log):
"""Parse log file
Returns (train_dict_list, test_dict_list)
train_dict_list and test_dict_list are lists of dicts that define the table
rows
"""
regex_iteration = re.compile('Iteration (\d+)')
regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
# Pick out lines of interest
iteration = -1
learning_rate = float('NaN')
train_dict_list = []
test_dict_list = []
train_row = None
test_row = None
logfile_year = extract_seconds.get_log_created_year(path_to_log)
with open(path_to_log) as f:
start_time = extract_seconds.get_start_time(f, logfile_year)
for line in f:
iteration_match = regex_iteration.search(line)
if iteration_match:
iteration = float(iteration_match.group(1))
if iteration == -1:
# Only start parsing for other stuff if we've found the first
# iteration
continue
try:
time = extract_seconds.extract_datetime_from_line(line,
logfile_year)
except ValueError:
# Skip lines with bad formatting, for example when resuming solver
continue
seconds = (time - start_time).total_seconds()
learning_rate_match = regex_learning_rate.search(line)
if learning_rate_match:
learning_rate = float(learning_rate_match.group(1))
train_dict_list, train_row = parse_line_for_net_output(
regex_train_output, train_row, train_dict_list,
line, iteration, seconds, learning_rate
)
test_dict_list, test_row = parse_line_for_net_output(
regex_test_output, test_row, test_dict_list,
line, iteration, seconds, learning_rate
)
fix_initial_nan_learning_rate(train_dict_list)
fix_initial_nan_learning_rate(test_dict_list)
return train_dict_list, test_dict_list
def parse_line_for_net_output(regex_obj, row, row_dict_list,
line, iteration, seconds, learning_rate):
"""Parse a single line for training or test output
Returns a a tuple with (row_dict_list, row)
row: may be either a new row or an augmented version of the current row
row_dict_list: may be either the current row_dict_list or an augmented
version of the current row_dict_list
"""
output_match = regex_obj.search(line)
if output_match:
if not row or row['NumIters'] != iteration:
# Push the last row and start a new one
if row:
# If we're on a new iteration, push the last row
# This will probably only happen for the first row; otherwise
# the full row checking logic below will push and clear full
# rows
row_dict_list.append(row)
row = OrderedDict([
('NumIters', iteration),
('Seconds', seconds),
('LearningRate', learning_rate)
])
# output_num is not used; may be used in the future
# output_num = output_match.group(1)
output_name = output_match.group(2)
output_val = output_match.group(3)
row[output_name] = float(output_val)
if row and len(row_dict_list) >= 1 and len(row) == len(row_dict_list[0]):
# The row is full, based on the fact that it has the same number of
# columns as the first row; append it to the list
row_dict_list.append(row)
row = None
return row_dict_list, row
def fix_initial_nan_learning_rate(dict_list):
"""Correct initial value of learning rate
Learning rate is normally not printed until after the initial test and
training step, which means the initial testing and training rows have
LearningRate = NaN. Fix this by copying over the LearningRate from the
second row, if it exists.
"""
if len(dict_list) > 1:
dict_list[0]['LearningRate'] = dict_list[1]['LearningRate']
def save_csv_files(logfile_path, output_dir, train_dict_list, test_dict_list,
delimiter=',', verbose=False):
"""Save CSV files to output_dir
If the input log file is, e.g., caffe.INFO, the names will be
caffe.INFO.train and caffe.INFO.test
"""
log_basename = os.path.basename(logfile_path)
train_filename = os.path.join(output_dir, log_basename + '.train')
write_csv(train_filename, train_dict_list, delimiter, verbose)
test_filename = os.path.join(output_dir, log_basename + '.test')
write_csv(test_filename, test_dict_list, delimiter, verbose)
def write_csv(output_filename, dict_list, delimiter, verbose=False):
"""Write a CSV file
"""
if not dict_list:
if verbose:
print('Not writing %s; no lines to write' % output_filename)
return
dialect = csv.excel
dialect.delimiter = delimiter
with open(output_filename, 'w') as f:
dict_writer = csv.DictWriter(f, fieldnames=dict_list[0].keys(),
dialect=dialect)
dict_writer.writeheader()
dict_writer.writerows(dict_list)
if verbose:
print 'Wrote %s' % output_filename
def parse_args():
description = ('Parse a Caffe training log into two CSV files '
'containing training and testing information')
parser = argparse.ArgumentParser(description=description)
parser.add_argument('logfile_path',
help='Path to log file')
parser.add_argument('output_dir',
help='Directory in which to place output CSV files')
parser.add_argument('--verbose',
action='store_true',
help='Print some extra info (e.g., output filenames)')
parser.add_argument('--delimiter',
default=',',
help=('Column delimiter in output files '
'(default: \'%(default)s\')'))
args = parser.parse_args()
return args
def main():
args = parse_args()
train_dict_list, test_dict_list = parse_log(args.logfile_path)
save_csv_files(args.logfile_path, args.output_dir, train_dict_list,
test_dict_list, delimiter=args.delimiter)
if __name__ == '__main__':
main()
來生成*****log.test,*****log.train兩個文件
Windows後續操作可參考該博客:https://blog.csdn.net/sunshine_in_moon/article/details/53541573
本文在此只講述Ubuntu16.04下caffe訓練日誌繪製loss曲線以及accuracy
修改後的完整代碼請到此處下載:https://download.csdn.net/download/tanghong1996/10601483