siamese-fc matlab tracking代碼解讀

siamese-fc matlab tracking代碼解讀

tracking部分包含以下代碼
在這裏插入圖片描述
1.run_tracker.m

function run_tracker(video, visualization)
% RUN_TRACKER  is the external function of the tracker - does initialization and calls tracker.m
%初始化,設置matconvnet、util、tracking
startup;
%% Parameters that should have no effect on the result.
%視頻路徑
params.video = video;
%可視化
params.visualization = visualization;
%shiyonggpu
params.gpus = 1;
%% Parameters that should be recorded.
% params.foo = 'blah';
%% Call the main tracking function
%跟蹤主程序
tracker(params);    
end

2.tracker.m

% 超參
function bboxes = tracker(varargin)
p.numScale = 3;% 尺度個數
p.scaleStep = 1.0375;% 尺度比例
p.scalePenalty = 0.9745;% 尺度懲罰因子
p.scaleLR = 0.59; % 尺度更新衰減係數
p.responseUp = 16; % 對17x17響應圖上採樣有利於提升準確度
p.windowing = 'cosine'; % 懲罰大的位移
p.wInfluence = 0.176; % 窗口影響因子(凸和)
p.net = '2016-08-17_gray025.net.mat';% 網絡模型名稱

% 執行、可視化、基準測試參數
p.video = 'vot15_bag';% 圖像數據路徑上一層路徑
p.visualization = false;% 非可視化
p.gpus = 1;% 使用gpu
p.bbox_output = false;% 是否輸出每一幀的目標位置
p.fout = -1;% 

% 來自網絡架構的參數,必須與訓練參數一致
p.exemplarSize = 127;  % 輸入尺寸
p.instanceSize = 255;  % 搜索尺寸
p.scoreSize = 17;% 得分置信度圖尺寸(輸出尺寸)
p.totalStride = 8;
p.contextAmount = 0.5; % context amount for the exemplar
p.subMean = false;

% siamFC前綴和ids
p.prefix_z = 'a_'; % 識別exemplar的網絡層
p.prefix_x = 'b_'; % 識別instance的網絡層
p.prefix_join = 'xcorr';
p.prefix_adj = 'adjust';
p.id_feat_z = 'a_feat';
p.id_score = 'score';
p = vl_argparse(p, varargin);% 利用varargin覆蓋默認參數

% 獲取特定環境默認路徑
p = env_paths_tracking(p);% 網絡模型、視頻序列路徑
 % 加載ImageNet視頻統計信息
 if exist(p.stats_path,'file')
     stats = load(p.stats_path);
 else
     warning('No stats found at %s', p.stats_path);
     stats = [];
 end
 
% 拷貝預訓練模型,並加載這兩個模型,爲什麼一個用gpu一個不用?
 net_z = load_pretrained([p.net_base_path p.net], p.gpus);
 net_x = load_pretrained([p.net_base_path p.net], []);
 % 加載視頻序列,獲取連續圖像,初始目標位置,初始目標大小
 [imgFiles, targetPosition, targetSize] = load_video_info(p.seq_base_path, p.video);
 nImgs = numel(imgFiles);% 連續圖像幀數
 startFrame = 1;% 起始幀

% 將網絡分離爲兩個分支
% exemplar分支用於計算目標特徵 (對於每個視頻只是用一次)
remove_layers_from_prefix(net_z, p.prefix_x);%  net_z中移除prefix_x,
remove_layers_from_prefix(net_z, p.prefix_join);%  net_z中移除prefix_join,
remove_layers_from_prefix(net_z, p.prefix_adj);%  net_z中移除prefix_adj
% instance分支計算搜索區域x的特徵以及與z特徵的關相關度
remove_layers_from_prefix(net_x, p.prefix_z);%  net_x中移除prefix_z,
zFeatId = net_z.getVarIndex(p.id_feat_z);% 
scoreId = net_x.getVarIndex(p.id_score);

%獲取第一幀圖像,轉換成single類型(4字節存儲),然後轉換到gpu上
im = gpuArray(single(imgFiles{startFrame}));

% 如果是灰度圖像則複製一個通道去適配濾波器的size
if(size(im, 3)==1)
      im = repmat(im, [1 1 3]);
end

% 初始化播放器
videoPlayer = [];
if p.visualization && isToolboxAvailable('Computer Vision System Toolbox')
      videoPlayer = vision.VideoPlayer('Position', [100 100 [size(im,2), size(im,1)]+30]);
end

% 獲取平均值用於padding
avgChans = gather([mean(mean(im(:,:,1))) mean(mean(im(:,:,2))) mean(mean(im(:,:,3)))]);

wc_z = targetSize(2) + p.contextAmount*sum(targetSize);% w + 0.5 * sum(w+h)
hc_z = targetSize(1) + p.contextAmount*sum(targetSize);% h + 0.5 * sum(w+h)
s_z = sqrt(wc_z*hc_z);
scale_z = p.exemplarSize / s_z;% 127 / s_z 

% 初始化exemplar
%提取點擊位置附近區域並縮放至127x127x3大小
[z_crop, ~] = get_subwindow_tracking(im, targetPosition, [p.exemplarSize p.exemplarSize], [round(s_z) round(s_z)], avgChans);
if p.subMean
     z_crop = bsxfun(@minus, z_crop, reshape(stats.z.rgbMean, [1 1 3]));
end
d_search = (p.instanceSize - p.exemplarSize)/2;% (255-127)/2=64
pad = d_search/scale_z;% 64/scale_z
s_x = s_z + 2*pad;
% 尺度的範圍
min_s_x = 0.2*s_x;
max_s_x = 5*s_x;

switch p.windowing
        case 'cosine'
            window = single(hann(p.scoreSize*p.responseUp) * hann(p.scoreSize*p.responseUp)');
        case 'uniform'
            window = single(ones(p.scoreSize*p.responseUp, p.scoreSize*p.responseUp));
    end
% window歸一化
window = window / sum(window(:));
% scales = 1/1.0375, 1, 1.0375,ceil向上取整,floor向下取整
scales = (p.scaleStep .^ ((ceil(p.numScale/2)-p.numScale) : floor(p.numScale/2)));

% 評估離線訓練網絡的樣本z特徵 
net_z.eval({'exemplar', z_crop});% 這句沒理解
z_features = net_z.vars(zFeatId).value;
z_features = repmat(z_features, [1 1 1 p.numScale]);
% nlmgs * 4個結果
bboxes = zeros(nImgs, 4);

%開始循環跟蹤了
tic;
    for i = startFrame:nImgs
        if i>startFrame
            % 在GPU上加載圖像
            im = gpuArray(single(imgFiles{i}));
   			% 如果是灰度圖像則複製一個通道去適配濾波器的size
    		if(size(im, 3)==1)
        		im = repmat(im, [1 1 3]);
    		end
    		%搜索區域乘以尺度
            scaledInstance = s_x .* scales;
            %三尺度目標尺寸
            scaledTarget = [targetSize(1) .* scales; targetSize(2) .* scales];
            % 在上一幀目標位置附近提取多層crops
            x_crops = make_scale_pyramid(im, targetPosition, scaledInstance, p.instanceSize, avgChans, stats, p);
            % 計算最大響應的目標位置和尺度
            [newTargetPosition, newScale] = tracker_eval(net_x, round(s_x), scoreId, z_features, x_crops, targetPosition, window, p);
            targetPosition = gather(newTargetPosition);
            %更新s_x
            s_x = max(min_s_x, min(max_s_x, (1-p.scaleLR)*s_x + p.scaleLR*scaledInstance(newScale)));
            % 更新尺寸
            targetSize = (1-p.scaleLR)*targetSize + p.scaleLR*[scaledTarget(1,newScale) scaledTarget(2,newScale)];
        else
            % at the first frame output position and size passed as input (ground truth)
        end
		% 更新座標
        rectPosition = [targetPosition([2,1]) - targetSize([2,1])/2, targetSize([2,1])];
        % output bbox in the original frame coordinates
        oTargetPosition = targetPosition; % .* frameSize ./ newFrameSize;
        oTargetSize = targetSize; % .* frameSize ./ newFrameSize;
        bboxes(i, :) = [oTargetPosition([2,1]) - oTargetSize([2,1])/2, oTargetSize([2,1])];

        if p.visualization
            if isempty(videoPlayer)
                figure(1), imshow(im/255);
                figure(1), rectangle('Position', rectPosition, 'LineWidth', 4, 'EdgeColor', 'y');
                drawnow
                fprintf('Frame %d\n', startFrame+i);
            else
                im = gather(im)/255;
                im = insertShape(im, 'Rectangle', rectPosition, 'LineWidth', 4, 'Color', 'yellow');
                % Display the annotated video frame using the video player object.
                step(videoPlayer, im);
            end
        end

        if p.bbox_output
            fprintf(p.fout,'%.2f,%.2f,%.2f,%.2f\n', bboxes(i, :));
        end

    end

    bboxes = bboxes(startFrame : i, :);

end
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章