Caffe SourceCode Learning-solve()

原創

2018-12-08 00:38

初始化完Solver類之後，調用基類Solver::Solve()進行迭代

template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
CHECK(Caffe::root_solver());
LOG(INFO) << "Solving " << net_->name();
LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();

// Initialize to false every time we start solving.
requested_early_exit_ = false;

if (resume_file) {
LOG(INFO) << "Restoring previous solver status from " << resume_file;
Restore(resume_file);
}

// For a network that is trained by the solver, no bottom or top vecs
// should be given, and we will just provide dummy vecs.
int start_iter = iter_;

# 迭代
Step(param_.max_iter() - iter_);
// If we haven't already, save a snapshot after optimization, unless
// overridden by setting snapshot_after_train := false
if (param_.snapshot_after_train()
&& (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
Snapshot();
}
if (requested_early_exit_) {
LOG(INFO) << "Optimization stopped early.";
return;
}
// After the optimization is done, run an additional train and test pass to
// display the train and test loss/outputs if appropriate (based on the
// display and test_interval settings, respectively). Unlike in the rest of
// training, for the train net we only run a forward pass as we've already
// updated the parameters "max_iter" times -- this final pass is only done to
// display the loss, which is computed in the forward pass.
if (param_.display() && iter_ % param_.display() == 0) {
int average_loss = this->param_.average_loss();
Dtype loss;
net_->Forward(&loss);

UpdateSmoothedLoss(loss, start_iter, average_loss);

LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
}
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
TestAll();
}
LOG(INFO) << "Optimization Done.";
}

template <typename Dtype>
void Solver<Dtype>::Step(int iters) {
const int start_iter = iter_;
const int stop_iter = iter_ + iters;

#獲取設置的要計算之前多少次的loss均值，默認的average_loss爲1
int average_loss = this->param_.average_loss();
losses_.clear();
smoothed_loss_ = 0;

while (iter_ < stop_iter) {
// zero-init the params

#清零上一次反向傳輸過程中產生的梯度數據
net_->ClearParamDiffs();
// 每隔test_iter進行一次測試
if (param_.test_interval() && iter_ % param_.test_interval() == 0
&& (iter_ > 0 || param_.test_initialization())
&& Caffe::root_solver()) {
TestAll();
if (requested_early_exit_) {
// Break out of the while loop because stop was requested while testing.
break;
}
}

for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_start();
}
const bool display = param_.display() && iter_ % param_.display() == 0;
net_->set_debug_info(display && param_.debug_info());
// accumulate the loss and gradient
Dtype loss = 0;

#累加多個batch的誤差，以免batch_size過大，內存不夠
for (int i = 0; i < param_.iter_size(); ++i) {

#執行前向計算和後向計算
loss += net_->ForwardBackward();
}
loss /= param_.iter_size();
// average the loss across iterations for smoothed reporting

#若average_loss爲1：loss_容器裏面只存當前獲得的真實loss值，而smooth_loss_當然也是這個值；若average_loss爲n：loss_容器裏面就會存儲前n個loss的值，而smooth_loss_相當於做了一個loss平均

UpdateSmoothedLoss(loss, start_iter, average_loss);
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
<< ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
const Dtype* result_vec = result[j]->cpu_data();
const string& output_name =
net_->blob_names()[net_->output_blob_indices()[j]];
const Dtype loss_weight =
net_->blob_loss_weights()[net_->output_blob_indices()[j]];
for (int k = 0; k < result[j]->count(); ++k) {
ostringstream loss_msg_stream;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * result_vec[k] << " loss)";
}
LOG_IF(INFO, Caffe::root_solver()) << " Train net output #"
<< score_index++ << ": " << output_name << " = "
<< result_vec[k] << loss_msg_stream.str();
}
}
}
for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_gradients_ready();
}

#權重更新
ApplyUpdate();

// Increment the internal iter_ counter -- its value should always indicate
// the number of times the weights have been updated.
++iter_;

SolverAction::Enum request = GetRequestedAction();

// Save a snapshot if needed.
if ((param_.snapshot()
&& iter_ % param_.snapshot() == 0
&& Caffe::root_solver()) ||
(request == SolverAction::SNAPSHOT)) {
Snapshot();
}
if (SolverAction::STOP == request) {
requested_early_exit_ = true;
// Break out of training loop.
break;
}
}
}

# net.cpp

Dtype ForwardBackward() {
Dtype loss;
Forward(&loss); # -> Net<Dtype>::Forward(Dtype* loss) -> Net<Dtype>::ForwardFromTo (int start, int end)
Backward(); # -> Net<Dtype>::Forward -> Net<Dtype>::BackwardFromTo(int start, int end)
return loss;
}

前向計算卷積實現原理：將每個像素位置作爲模板中心時，被卷積模板覆蓋的區域按一維排列（K*K），BGR通道依次排列（C*K*K）。一共有H*W個這樣的一維數組（H*W）*（C*K*K）。每一組卷積模板也展開成一維形式（K*K），並以BGR通道依次排列爲一位數組（C*K*K），共Cout個，這樣點積之後，再reshape就可以得到Cout*(H*W)特徵圖。

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Caffe SourceCode Learning-solve()

關於遊戲付費的一點想法

我通過CKA和CKS啦！

AI知識基礎筆記

Pytorch-讀數據 or 模型

目標檢測-Review

Caffe SourceCode Learning-solve()

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結