商湯科技(2018 COCO 目標檢測挑戰賽冠軍)和香港中文大學最近開源了一個基於Pytorch實現的深度學習目標檢測工具箱mmdetection,支持Faster-RCNN,Mask-RCNN,Fast-RCNN等主流的目標檢測框架,後續會加入Cascade-RCNN以及其他一系列目標檢測框架。
相比於Facebook開源的Detectron框架,作者聲稱mmdetection有三點優勢:performance稍高、訓練速度稍快、所需顯存稍小。
我很早就聽說了這個工具箱,但是一直沒有開源。現在總算是開源了,發現官方沒有對Windows系統進行適配,於是就迫不及待地對win10 進行了適配。下面將記錄一下
首先官方給出的編譯的方法是./compile.sh 我們發現這裏面其實是執行了4 個python腳本,但是這4個setup.py 在win下執行會報錯,我修改了一個版本。
首先dcn 目錄下的setup.py 修改爲兩個文件,否則鏈接時候會出現錯誤。分別爲setup_conv.py setup_pool.py
import os
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension,CppExtension,CUDA_HOME
import torch
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
sources=[
'src/deform_conv_cuda.cpp',
'src/deform_conv_cuda_kernel.cu']
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
ext_modules = [
extension(
"deform_conv_cuda",
sources,
extra_compile_args=extra_compile_args,
),
]
return ext_modules
setup(
name='deform_conv',
ext_modules=get_extensions(),
cmdclass={'build_ext': BuildExtension})
import os
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension,CppExtension,CUDA_HOME
import torch
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
sources=[
'src/deform_pool_cuda.cpp',
'src/deform_pool_cuda_kernel.cu'
]
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
ext_modules = [
extension(
"deform_pool_cuda",
sources,
extra_compile_args=extra_compile_args,
),
]
return ext_modules
setup(
name='deform_conv',
ext_modules=get_extensions(),
cmdclass={'build_ext': BuildExtension})
接着 nms 目錄下,
import os.path as osp
from setuptools import setup, Extension
import numpy as np
from Cython.Build import cythonize
from Cython.Distutils import build_ext
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
ext_args = dict(
include_dirs=[np.get_include()],
language='c++',
extra_compile_args={
'cc': ['-Wno-unused-function', '-Wno-write-strings'],
'nvcc': ['-c', '--compiler-options', '-fPIC'],
},
)
extensions = [
Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args),
]
def customize_compiler_for_nvcc(self):
"""inject deep into distutils to customize how the dispatch
to cc/nvcc works.
If you subclass UnixCCompiler, it's not trivial to get your subclass
injected in, and still have the right customizations (i.e.
distutils.sysconfig.customize_compiler) run on it. So instead of going
the OO route, I have this. Note, it's kindof like a wierd functional
subclassing going on."""
# tell the compiler it can processes .cu
self.src_extensions.append('.cu')
super = self._compile
# now redefine the _compile method. This gets executed for each
# object but distutils doesn't have the ability to change compilers
# based on source extension: we add it.
def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
if osp.splitext(src)[1] == '.cu':
# use the cuda for .cu files
self.set_executable('nvcc')
# use only a subset of the extra_postargs, which are 1-1 translated
# from the extra_compile_args in the Extension class
postargs = extra_postargs['nvcc']
else:
postargs = extra_postargs['cc']
super(obj, src, ext, cc_args, postargs, pp_opts)
# inject our redefined _compile method into the class
self._compile = _compile
class custom_build_ext(build_ext):
def build_extensions(self):
customize_compiler_for_nvcc(self.compiler)
build_ext.build_extensions(self)
setup(
name='soft_nms',
cmdclass={'build_ext': custom_build_ext},
ext_modules=cythonize(extensions),
)
setup(
name='nms_cuda',
ext_modules=[
CUDAExtension('nms_cuda', [
'src/nms_cuda.cpp',
'src/nms_kernel.cu',
]),
CUDAExtension('nms_cpu', [
'src/nms_cpu.cpp',
]),
],
cmdclass={'build_ext': BuildExtension})
roi_align 目錄下
import os
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension,CppExtension,CUDA_HOME
import torch
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
sources=[
'src/roi_align_cuda.cpp',
'src/roi_align_kernel.cu']
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
ext_modules = [
extension(
"roi_align_cuda",
sources,
extra_compile_args=extra_compile_args,
)
]
return ext_modules
setup(
name='roi_align_cuda',
ext_modules=get_extensions(),
cmdclass={'build_ext': BuildExtension})
roi_pool 目錄下
import os
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension,CppExtension,CUDA_HOME
import torch
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
sources=[
'src/roi_pool_cuda.cpp',
'src/roi_pool_kernel.cu']
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
ext_modules = [
extension(
"roi_pool_cuda",
sources,
extra_compile_args=extra_compile_args,
)
]
return ext_modules
setup(
name='roi_pool_cuda',
ext_modules=get_extensions(),
cmdclass={'build_ext': BuildExtension})
至此,就修改完了全部的setup.py 然後去各個目錄分別執行python setup.py build_ext --inplace
執行完成之後,到項目根目錄下執行 python setuy.py install 即可。
注意,VC++ 版本要和cuda 版本對應上,pytorch版本1.0 以上,
Windows不支持distribution 庫,訓練時候,可能需要相應的修改源碼,否則會報錯。