前言
本節使用百度飛漿的預訓練模型RESNET-50訓練分類問題,這裏實現的是識別科比打開pycahrm識別到庫裏打開chrome。
效果圖
運行環境
ubuntu16.04
pycharm2019
paddlepaddle1.8.1
命令行切換窗口
首先安裝工具wmctrl 在linux的終端下執行:
sudo apt-get install wmctrl
切換窗口的命令:
wmctrl -a "pycharm"
主要任務
- 任務一:使用opencv讀取本地攝像頭
- 任務二:讀取攝像頭數據幀放入網絡識別匹配識別結果
- 任務三:根據識別結果切換窗口
這裏因爲opencv讀取攝像頭數據比較佔用資源,數據幀識別也比較佔資源,所以我開啓了兩個線程處理這兩個問題,定義了一個標誌位,只有數據幀處理完後才寫入本地攝像頭數據,寫入後才處理數據幀。
訓練網絡代碼
這個就是配置網絡的一些參數,導入網絡模型,基本是固定格式。 train.py文件如下:
import paddlehub as hub
from paddlehub.dataset.base_cv_dataset import BaseCVDataset
class DemoDataset(BaseCVDataset):
def __init__(self):
# 數據集存放位置
self.dataset_dir = ""
super(DemoDataset, self).__init__(
base_path=self.dataset_dir,
train_list_file="dataset/train_list.txt",
validate_list_file="dataset/validate_list.txt",
test_list_file="dataset/test_list.txt",
label_list_file="dataset/label_list.txt",
)
module = hub.Module(name="resnet_v2_50_imagenet")
dataset = DemoDataset()
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
config = hub.RunConfig(
use_cuda=False, # 是否使用GPU訓練,默認爲False;
num_epoch=5, # Fine-tune的輪數;
checkpoint_dir="cv_finetune_turtorial_demo", # 模型checkpoint保存路徑, 若用戶沒有指定,程序會自動生成;
batch_size=10, # 訓練的批大小,如果使用GPU,請根據實際情況調整batch_size;
eval_interval=10, # 模型評估的間隔,默認每100個step評估一次驗證集;
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) #Fine-tune優化策略;
#strategy=hub.finetune.strategy.AdamWeightDecayStrategy())
input_dict, output_dict, program = module.context(trainable=True)
img = input_dict["image"]
feature_map = output_dict["feature_map"]
feed_list = [img.name]
task = hub.ImageClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
run_states = task.finetune_and_eval()
任務一代碼
顯示攝像頭數據並且寫入本地
def showImg():
global flag
cap = cv2.VideoCapture(0)
while 1:
ret,frame = cap.read()
cv2.imshow("cap",frame)
if flag is 0:
cv2.imwrite("./temp_out/cap.jpg",frame)
flag = 1
if cv2.waitKey(100) & 0xff == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
任務二代碼
識別數據幀並切換窗口
def recognize():
global flag
module = hub.Module(name="resnet_v2_50_imagenet")
dataset = DemoDataset()
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
config = hub.RunConfig(
use_cuda=False, # 是否使用GPU訓練,默認爲False;
num_epoch=5, # Fine-tune的輪數;
checkpoint_dir="cv_finetune_turtorial_demo", # 模型checkpoint保存路徑, 若用戶沒有指定,程序會自動生成;
batch_size=10, # 訓練的批大小,如果使用GPU,請根據實際情況調整batch_size;
eval_interval=10, # 模型評估的間隔,默認每100個step評估一次驗證集;
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) #Fine-tune優化策略;
#strategy=hub.finetune.strategy.AdamWeightDecayStrategy())
input_dict, output_dict, program = module.context(trainable=True)
img = input_dict["image"]
feature_map = output_dict["feature_map"]
feed_list = [img.name]
task = hub.ImageClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
label_map = dataset.label_dict()
#run_states = task.finetune_and_eval()
while 1:
if flag is 1:
data = []
data.append("/home/xmy/PycharmProjects/test/paddle/proj3_recognizeMyself/temp_out/cap.jpg")
index = 0
run_states = task.predict(data=data)
results = [run_state.run_results for run_state in run_states]
for batch_result in results:
batch_result = np.argmax(batch_result, axis=2)[0]
for result in batch_result:
index += 1
result = label_map[result]
#print("input %i is %s, and the predict result is %s" %
#(index, data[index - 1], result))
if "科比" in result:
os.system("wmctrl -a \"pycharm\"")
elif "庫裏" in result:
os.system("wmctrl -a \"chrome\"")
flag = 0
完整識別代碼
import paddlehub as hub
from paddlehub.dataset.base_cv_dataset import BaseCVDataset
import numpy as np
import cv2
import threading
import os
flag = 0
class DemoDataset(BaseCVDataset):
def __init__(self):
# 數據集存放位置
self.dataset_dir = ""
super(DemoDataset, self).__init__(
base_path=self.dataset_dir,
train_list_file="dataset/train_list.txt",
validate_list_file="dataset/validate_list.txt",
test_list_file="dataset/test_list.txt",
label_list_file="dataset/label_list.txt",
)
def showImg():
global flag
cap = cv2.VideoCapture(0)
while 1:
ret,frame = cap.read()
cv2.imshow("cap",frame)
if flag is 0:
cv2.imwrite("./temp_out/cap.jpg",frame)
flag = 1
if cv2.waitKey(100) & 0xff == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def recognize():
global flag
module = hub.Module(name="resnet_v2_50_imagenet")
dataset = DemoDataset()
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
config = hub.RunConfig(
use_cuda=False, # 是否使用GPU訓練,默認爲False;
num_epoch=5, # Fine-tune的輪數;
checkpoint_dir="cv_finetune_turtorial_demo", # 模型checkpoint保存路徑, 若用戶沒有指定,程序會自動生成;
batch_size=10, # 訓練的批大小,如果使用GPU,請根據實際情況調整batch_size;
eval_interval=10, # 模型評估的間隔,默認每100個step評估一次驗證集;
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) #Fine-tune優化策略;
#strategy=hub.finetune.strategy.AdamWeightDecayStrategy())
input_dict, output_dict, program = module.context(trainable=True)
img = input_dict["image"]
feature_map = output_dict["feature_map"]
feed_list = [img.name]
task = hub.ImageClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
label_map = dataset.label_dict()
#run_states = task.finetune_and_eval()
while 1:
if flag is 1:
data = []
data.append("/home/xmy/PycharmProjects/test/paddle/proj3_recognizeMyself/temp_out/cap.jpg")
index = 0
run_states = task.predict(data=data)
results = [run_state.run_results for run_state in run_states]
for batch_result in results:
batch_result = np.argmax(batch_result, axis=2)[0]
for result in batch_result:
index += 1
result = label_map[result]
#print("input %i is %s, and the predict result is %s" %
#(index, data[index - 1], result))
if "科比" in result:
os.system("wmctrl -a \"pycharm\"")
elif "庫裏" in result:
os.system("wmctrl -a \"chrome\"")
flag = 0
if __name__ == '__main__':
t1 = threading.Thread(target=showImg)
t2 = threading.Thread(target=recognize)
t1.start()
t2.start()
完整項目以及數據集以及訓練好的模型下載
github地址
使用代碼記得修改代碼裏的路徑因爲使用相對路徑paddle總是報錯找不到… …我是真鬱悶所以所有路徑都是寫的絕對路徑。