Gluon基礎
1、data模塊:數據讀取接口和數據增強操作
2、nn模塊:定義網絡結構的層結構
3、model zoo模塊:提供豐富的網絡結構定義,加載預訓練模型和直接使用定義好的模型;還可以修改現有的網絡結構;
1、data模塊
(1)vision模塊–>datasets模塊和transforms模塊
(2)使用datasets下載mnist、fashionMnist和cifar10等
MXNet官網的講解:官方鏈接
裏面包含具體的使用方法和具體的參數說明;講解的很簡單明瞭
gluon.data.transform的源代碼:源代碼python
from mxnet.gluon.data import vision
import matplotlib.pyplot as plt
#cifar10對應的圖像標籤
label_text = ['airplane','automoblie','bird','cat','deer','dog','frog','horse','ship','truck']
#加載cifar10數據集、root表示下載後保存數據的地址,train爲true表示是讀取訓練數據,false則是測試數據
#這裏的root和程序是在同一路徑下面,也可以修改其他位置
cifar10 = vision.datasets.CIFAR10(root='data/',train=True,transform=None)
fig = plt.figure()
img_num = 6
for i in range(img_num):
#2行3列顯示圖像且步長爲1
fig.add_subplot(2,3,i+1)
#獲取圖像和標籤;因爲數據集的標籤是用數字表示的,需要轉化爲實際的標籤;也就是label是對應的數字0~9
img,label = cifar10[i]
plt.imshow(img.asnumpy())
plt.title(label_text[label])
#忽略座標信息
plt.xticks([])
plt.yticks([])
plt.savefig('cifar10_image.png')
[外鏈圖片轉存失敗,源站可能有防盜鏈機制,建議將圖片保存下來直接上傳(img-37xD8qtO-1575939721993)(output_2_0.png)]
數據增強模塊transform
1、常用的數據增強的接口
import mxnet as mx
from mxnet.gluon.data import vision
#將數據映射到0~1之間,這裏生成的是0~255之間的
input_data = mx.nd.random.uniform(0,255,shape=(2,4,3)).astype('uint8')
print(input_data)
[[[139 151 182]
[215 153 218]
[138 216 108]
[159 164 98]]
[[111 75 227]
[ 14 245 69]
[ 97 121 201]
[207 134 122]]]
<NDArray 2x4x3 @cpu(0)>
#轉化數據類型,實例化Tensor對象,因爲mxmet中的數據格式爲:[批量大小,通道數,高,寬];值域在[0,1]之間的32位浮點型數據
import mxnet as mx
transformer_tensor = vision.transforms.ToTensor()
tensor_data = transformer_tensor(input_data)
print(tensor_data)
[[[0.54509807 0.84313726 0.5411765 0.62352943]
[0.43529412 0.05490196 0.38039216 0.8117647 ]]
[[0.5921569 0.6 0.84705883 0.6431373 ]
[0.29411766 0.9607843 0.4745098 0.5254902 ]]
[[0.7137255 0.85490197 0.42352942 0.38431373]
[0.8901961 0.27058825 0.7882353 0.47843137]]]
<NDArray 3x2x4 @cpu(0)>
#減去指定均值和除以標註差,然後再對tensor數據進行處理
transformer_normalize = vision.transforms.Normalize(mean=0.13,std=0.31)
normal_data = transformer_normalize(tensor_data)
print(normal_data)
[[[ 1.339026 2.3004427 1.3263758 1.5920304 ]
[ 0.98481977 -0.24225174 0.80771667 2.199241 ]]
[[ 1.4908286 1.5161291 2.313093 1.6552815 ]
[ 0.5294118 2.6799493 1.1113219 1.275775 ]]
[[ 2.3023403 2.7577484 1.3662239 1.2397217 ]
[ 2.8716002 0.8728653 2.5426946 1.543327 ]]]
<NDArray 3x2x4 @cpu(0)>
#中心裁剪centercrop
import numpy as np
transformer = vision.transforms.CenterCrop(size=(1000,500))
image = mx.nd.random.uniform(0,255,(2321,3485,3)).astype(dtype=np.uint8)
transformer(image)
[[[ 88 175 202]
[193 50 201]
[ 4 162 113]
...
[148 132 64]
[138 117 194]
[162 38 155]]
...
[ 36 119 187]
[ 34 212 226]
[137 165 90]]]
<NDArray 500x1000x3 @cpu(0)>
#compose
#順序包含多個數據增強操作;多個數據增強操作用列表添加,數據輸入按照第一個數據增強操作的要求sizelai
#輸出是和最後一個數據增強操作來
from mxnet.gluon.data import vision
transformer = vision.transforms.Compose([transforms.Resize(300),
transforms.CenterCrop(256),
transforms.ToTensor()])
image = mx.nd.random.uniform(0,255,(244,244,3)).astype(dtype=np.uint8)
transformer(image)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-28-13690cc54a59> in <module>
3 #輸出是和最後一個數據增強操作來
4 from mxnet.gluon.data import vision
----> 5 transformer = vision.transforms.Compose([transforms.Resize(300),
6 transforms.CenterCrop(256),
7 transforms.ToTensor()])
NameError: name 'transforms' is not defined
#選擇性裁剪圖片
#x,y對應的是裁剪區域的左邊界和右邊界,從左上角開始,width和height對應寬度和高度
transformer = vision.transforms.CropResize(x=0,y=0,width=100,height=100)
image = mx.nd.random.uniform(0,255,(224,224,3)).astype(dtype=np.uint8)
transformer(image)
#注意觀察輸出的尺寸
image = mx.nd.random.uniform(0,255,(3,224,224,3)).astype(dtype=np.uint8)
transformer(image)
#對裁剪後的圖片再指定size
#這裏的雙線性插值,記得百度查一下
transformer = vision.transforms.CropResize(x=0,y=0,width=100,height=100,size=(50,50),interpolation=1)
transformer(image)
[[[[140 137 132]
[167 117 108]
[110 98 133]
...
[168 140 91]
[141 111 97]
[153 177 124]]
...
[130 210 129]
[122 85 156]
[106 115 124]]
[[170 158 132]
[207 74 170]
[100 85 165]
...
[ 90 139 178]
[175 203 78]
[175 92 112]]]]
<NDArray 3x50x50x3 @cpu(0)>
#normalize數據歸一化操作
#就是給定均值mean和std標準差
#然後將每個通道的輸入數據減去均值然後再除以標準差:output[i] = (input[i] - mi) / si
#均值和標準差可以是浮點數和浮點數元組:
transformer = vision.transforms.Normalize(mean=(0,1,2),std=(3,2,1))
image = mx.nd.random.uniform(0,1,(3,4,2))
transformer(image)
[[[ 8.5686486e-05 1.7225291e-01]
[ 1.7098707e-01 2.8412196e-01]
[ 1.8576695e-01 4.0903598e-02]
[ 6.7590825e-02 4.6748843e-02]]
[[-1.9833636e-01 -5.0588846e-03]
[-3.8575363e-01 -4.3460625e-01]
[-3.5424203e-01 -1.8465784e-01]
[-1.3795346e-02 -1.8049380e-01]]
[[-1.4975955e+00 -1.6129224e+00]
[-1.8787965e+00 -1.5643482e+00]
[-1.1881535e+00 -1.4829817e+00]
[-1.9154357e+00 -1.5232810e+00]]]
<NDArray 3x4x2 @cpu(0)>
#隨機增強圖像的亮度:[max(0, 1 - brightness), 1 + brightness].
transformer = vision.transforms.RandomBrightness(0.1)
image = mx.nd.random.uniform(0,255,(2,4,3)).astype(dtype=np.uint8)
transformer(image)
[[[188 161 76]
[ 16 217 58]
[ 27 145 127]
[144 18 61]]
[[221 23 240]
[ 53 181 136]
[222 97 40]
[ 75 117 12]]]
<NDArray 2x4x3 @cpu(0)>
#隨機增強圖像的亮度、對比度,飽和度和色調
#brightness:[max(0, 1 - brightness), 1 + brightness].
#其他的參數都是如此
transformer = vision.transforms.RandomColorJitter(brightness=0,contrast=0,
saturation=0,hue=0)
#隨機增強圖像對比度:[max(0, 1 - contrast), 1 + contrast].
#基本上所有的參數都是這麼計算的,記住
transformer = vision.transforms.RandomContrast(contrast=0)
#隨機將圖像從左向右翻轉,默認0.5的概率
transformer = vision.transforms.RandomFlipLeftRight()
#隨機將圖像從上向下翻轉,默認概率依舊0.5
transformer = vision.transforms.RandomFlipTopBottom()
#隨機增強色調
transformer = vision.transforms.RandomHue(hue=1)
#Add AlexNet-style PCA-based noise to an image.
transformer = vision.transforms.RandomLighting(alpha=0.1)
#隨機比例和寬高比裁剪圖像
transformer = vision.transforms.RandomResizedCrop(size=(224,224),scale=(0.08,1.0),ratio=(0.75,1.333333333333333),
interpolation=1)
#隨機增強圖像的飽和度
transformer = vision.transforms.RandomSaturation(saturation=1)
#修改圖像的尺寸
#這個操作應該跟在ToTensor後面
transformer = vision.transforms.Resize(size=(1000,500))
image = mx.nd.random.uniform(0,255,(224,224,3)).astype(dtype=np.uint8)
transformer(image)
image = mx.nd.random.uniform(0,255,(3,224,224,3)).astype(dtype=np.uint8)
transformer(image)
[[[[ 45 177 222]
[ 45 177 222]
[ 46 181 219]
...
[136 222 98]
[140 223 100]
[140 223 100]]
[[ 63 156 197]
[ 63 156 197]
[ 63 160 195]
[[ 16 226 10]
[ 16 226 10]
[ 26 215 16]
...
[133 153 195]
[141 156 200]
[141 156 200]]]]
<NDArray 3x500x1000x3 @cpu(0)>
#Converts an image NDArray of shape (H x W x C) in the range [0, 255] to
#a float32 tensor NDArray of shape (C x H x W) in the range [0, 1].
transformer = vision.transforms.ToTensor()
image = mx.nd.random.uniform(0,255,(4,2,3)).astype(dtype=np.uint8)
transformer(image)
[[[0.12941177 0.8901961 ]
[0.74509805 0.24705882]
[0.2784314 0.1764706 ]
[0.8352941 0.64705884]]
[[0.9882353 0.8901961 ]
[0.6039216 0.6 ]
[0.3254902 0.7490196 ]
[0.07843138 0.8392157 ]]
[[0.77254903 0.69411767]
[0.9490196 0.22352941]
[0.8666667 0.76862746]
[0.5254902 0.8901961 ]]]
<NDArray 3x4x2 @cpu(0)>