TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀三

前言

接上上篇及上篇，本篇將繼續介紹跟trtcaffe::BlobProto有關的函數。

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.cpp

/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "caffeMacros.h"
#include "caffeWeightFactory.h"
#include "half.h"

using namespace nvinfer1;
using namespace nvcaffeparser1;

//...

/*
trtcaffe::BlobProto
定義於TensorRT/parsers/caffe/proto/trtcaffe.proto
message BlobProto {
  optional BlobShape shape = 7;
  repeated float data = 5 [packed = true]; //必為float型別
  repeated float diff = 6 [packed = true];
  repeated double double_data = 8 [packed = true]; //必為double型別
  repeated double double_diff = 9 [packed = true];
  // New raw storage (faster and takes 1/2 of space for FP16)
  optional Type raw_data_type = 10;
  optional Type raw_diff_type = 11;
  optional bytes raw_data = 12 [packed = false]; //有可能是半，單，雙精度
  optional bytes raw_diff = 13 [packed = false];
  // 4D dimensions -- deprecated.  Use "shape" instead.
  optional int32 num = 1 [default = 0];
  optional int32 channels = 2 [default = 0];
  optional int32 height = 3 [default = 0];
  optional int32 width = 4 [default = 0];
}
*/
/*
新申請一塊記憶體，用於存放被轉換為type型別的blobMsg裡的數據（raw_data，data或double_data）。
這塊記憶體會被存放到tmpAllocs這個向量裡面。
將這塊記憶體的起始位置加上其元素個數，構成一個pair後回傳
如果type或count或raw_data，data，double_data不符合預期，
則回傳std::make_pair(nullptr, 0UL)
*/
// The size returned here is the number of array entries, not bytes
std::pair<const void*, size_t> CaffeWeightFactory::getBlobProtoData(const trtcaffe::BlobProto& blobMsg,
                                                                        trtcaffe::Type type, std::vector<void*>& tmpAllocs)
{
    // NVCaffe new binary format. It may carry any type.
    //trtcaffe::BlobProto::has_raw_data的定義在哪？
    if (blobMsg.has_raw_data())
    {
        //trtcaffe::BlobProto::has_raw_data_type的定義在哪？
        assert(blobMsg.has_raw_data_type());
        //這裡只考慮raw_data的型別等於type的情況，下面會考慮兩個不相符的情況
        if (blobMsg.raw_data_type() == type)
        {
            //指向起始位置的指標跟元素個數？
            return std::make_pair(&blobMsg.raw_data().front(),
                                    blobMsg.raw_data().size() / sizeOfCaffeType(type));
        }
    }
    // Old BVLC format.
    //從data裡獲取數據，單精度
    if (blobMsg.data_size() > 0 && type == trtcaffe::FLOAT)
    {
        //trtcaffe::BlobProto::data_size的定義在哪？
        return std::make_pair(&blobMsg.data().Get(0), blobMsg.data_size());
    }

    // Converting to the target type otherwise
    //trtcaffe::BlobProto::double_data_size的定義在哪？
    //回傳raw_data的長度或data_size或double_data_size
    const int count = blobMsg.has_raw_data() ? blobMsg.raw_data().size() / sizeOfCaffeType(blobMsg.raw_data_type()) : (blobMsg.data_size() > 0 ? blobMsg.data_size() : blobMsg.double_data_size());

    if (count > 0)
    {
        void* new_memory = malloc(count * sizeOfCaffeType(type));
        //將這塊新申請的記憶體存到tmpAllocs裡
        tmpAllocs.push_back(new_memory);
        
        if (type == trtcaffe::FLOAT)
        {
            //將數據轉為單精度，存到dst（即new_memory）這個指標所指向的地方
            
            auto* dst = reinterpret_cast<float*>(new_memory);
            //從raw_data裡獲取數據
            if (blobMsg.has_raw_data())
            {
                //raw_data的型別是半精度的
                if (blobMsg.raw_data_type() == trtcaffe::FLOAT16)
                {
                    //將指標轉為float16*型別的，再依次存取
                    const auto* src = reinterpret_cast<const float16*>(&blobMsg.raw_data().front());
                    for (int i = 0; i < count; ++i)
                    {
                        //把每個元素都轉為單精度
                        dst[i] = float(src[i]);
                    }
                }
                //raw_data的型別是雙精度的
                else if (blobMsg.raw_data_type() == trtcaffe::DOUBLE)
                {
                    //將指標轉為float16*型別的，再依次存取
                    const auto* src = reinterpret_cast<const double*>(&blobMsg.raw_data().front());
                    for (int i = 0; i < count; ++i)
                    {    
                        //把每個元素都轉為單精度
                        dst[i] = float(src[i]);
                    }
                }
                //raw_data的型別是單精度的情況在函數NVCaffe new binary format這一段就考慮過了
            }
            //從double_data裡獲取數據
            else if (blobMsg.double_data_size() == count)
            {
                for (int i = 0; i < count; ++i)
                {
                    //把數據由雙精度轉為單精度
                    //BlobProto::double_data必為double型別
                    dst[i] = float(blobMsg.double_data(i));
                }
            }
            //data的型別是單精度的情況在函數Old BVLC format這一段就考慮過了
            return std::make_pair(new_memory, count);
        }
        if (type == trtcaffe::FLOAT16)
        {
            //將數據轉為半精度，存到dst（即new_memory）這個指標所指向的地方
            
            auto* dst = reinterpret_cast<float16*>(new_memory);

            //從raw_data裡獲取數據
            if (blobMsg.has_raw_data())
            {
                if (blobMsg.raw_data_type() == trtcaffe::FLOAT)
                {
                    const auto* src = reinterpret_cast<const float*>(&blobMsg.raw_data().front());
                    for (int i = 0; i < count; ++i)
                    {
                        /*
                        float16
                        定義於TensorRT/parsers/common/half.h
                        typedef half_float::half float16;
                        */
                        dst[i] = float16(src[i]);
                    }
                }
                else if (blobMsg.raw_data_type() == trtcaffe::DOUBLE)
                {
                    const auto* src = reinterpret_cast<const double*>(&blobMsg.raw_data().front());
                    for (int i = 0; i < count; ++i)
                    {
                        dst[i] = float16(float(src[i]));
                    }
                }
            }
            //從data裡獲取數據
            else if (blobMsg.data_size() == count)
            {
                for (int i = 0; i < count; ++i)
                {
                    //BlobProto::data必為float型別
                    dst[i] = float16(blobMsg.data(i));
                }
            }
            //從double_data裡獲取數據
            else if (blobMsg.double_data_size() == count)
            {
                for (int i = 0; i < count; ++i)
                {
                    dst[i] = float16(float(blobMsg.double_data(i)));
                }
            }
            return std::make_pair(new_memory, count);
        }
        //沒有考慮type是雙精度的情況？
    }
    return std::make_pair(nullptr, 0UL);
}

//獲取blobMsg的資料型別後回傳
trtcaffe::Type CaffeWeightFactory::getBlobProtoDataType(const trtcaffe::BlobProto& blobMsg)
{
//blobMsg裡的數據可能放在raw_data，double_data或data裡
    //raw_data的型別不定，要靠raw_data_type才能得知
    if (blobMsg.has_raw_data())
    {
        assert(blobMsg.has_raw_data_type());
        return blobMsg.raw_data_type();
    }
    //double_data的型別必為double
    if (blobMsg.double_data_size() > 0)
    {
        return trtcaffe::DOUBLE;
    }
    //data的型別必為float
    return trtcaffe::FLOAT;
}

0UL

在getBlobProtoData函數中用到了0UL，詳見C 常數0L，0LL，0UL，0.0f，0.0L。

NVCaffe v.s. old BVLC

在getBlobProtoData函數裡，分成NVCaffe及old BVLC兩種情況來考慮。

根據NVCaffe User Guide：

Caffe is a deep-learning framework made with flexibility, speed, 
and modularity in mind. 
NVCaffe is an NVIDIA-maintained fork of BVLC Caffe 
tuned for NVIDIA GPUs, particularly in multi-GPU configurations.

我們可以知道，BVLC Caffe是原版的Caffe，NVCaffe則是Nvidia對原版的fork，特別針對Nvidia GPU做了改良。

reinterpret_cast

在getBlobProtoData中將void*轉為float*時用到了reinterpret_cast：

void* new_memory = malloc(count * sizeOfCaffeType(type));
//...
auto* dst = reinterpret_cast<float*>(new_memory);

但是根據Should I use static_cast or reinterpret_cast when casting a void* to whatever，事實上對於這種情況，我們能用較安全的static_cast來取代，而不必用到reinterpret_cast。

在CaffeWeightFactory::checkForNans中將const void*轉為const T*時也用到了reinterpret_cast：

//const void* values
const T* v = reinterpret_cast<const T*>(values);

根據How to convert const void* to unsigned char*?，這種情況下同樣也能用static_cast來取代。

static member function

getBlobProtoDataType，sizeOfCaffeType及getBlobProtoData等三個函數是static member function。類別裡定義的static member function表示這些function屬於class本身，而不是屬於class所產生的instance。

為何要將它們定義為static而非一般的成員函數呢？一個可能的原因是因為它們不需要用到non-static的成員變數，為了安全（？）起見，才將它們定義成static的。

Follow Up：CaffeWeightFactory::checkForNans同樣也未使用到non-static的成員變數，為何沒被定義成static呢？

參考連結

C 常數0L，0LL，0UL，0.0f，0.0L

NVCaffe User Guide

Should I use static_cast or reinterpret_cast when casting a void* to whatever

How to convert const void* to unsigned char*?

C/C++ 中的 static, extern 的變數

Can static function access non static variables in C++?

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀一

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀二

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀三

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀三

前言

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.cpp

0UL

NVCaffe v.s. old BVLC

reinterpret_cast

static member function

參考連結

如何使用 JS 判斷用戶是否處於活躍狀態

通過HPA+CronHPA組合應對業務複雜彈性伸縮場景

❤️‍🔥 Solon Cloud Event 新的事務特性與應用

TensorRT/parsers/caffe/caffeParser/opParsers/opParsers.h源碼研讀

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀一

C size_t

C 常數0L，0LL，0UL，0.0f，0.0L

TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h，caffeWeightFactory.cpp源碼研讀三

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結