python3使用pickle讀取文件提示TypeError或者UnicodeDecodeError的解決辦法

一、分割數據成train和test 代碼：

# data segmentation code # resize origin code to match classifier model and split the data set into training and test set
import sys
import os
import shutil
import csv
import subprocess
import random
import time
import itertools
from PIL import Image

#UCM path
imagesPath = 'data/EuroSAT-databack'
converted_path ='gen'

#NUPW Path
#imagesPath = '/home/hpc-126/remote-host/NUPW-45/NWPU-RESISC45'
#converted_path ='/home/hpc-126/remote-host/NUPW-45/train224x224'

train_path = ''
test_path =''
imageWidth =32
imageHeight =32
split_ratio =0.80 # ratio of train and test set size
datatype ='euro'
labels = ''
if datatype == 'euro':
labels = {
'SeaLake': 9,
'River': 8,
'PermanentCrop': 6,
'AnnualCrop': 0,
'Pasture': 5,
'Forest': 1,
'HerbaceousVegetation': 2,
'Highway': 3,
'Residential': 7,
'Industrial': 4,

}
elif datatype =='NUPW':
labels = {
'airplane': 0,
'airport' : 1,
'baseball_diamond': 2,
'basketball_court': 3,
'beach':4,
'bridge':5,
'chaparral':6,
'church':7,
'circular_farmland':8,
'cloud':9,
'commercial_area':10,
'dense_residential':11,
'desert':12,
'forest':13,
'freeway':14,
'golf_course':15,
'ground_track_field':16,
'harbor':17,
'industrial_area':18,
'intersection':19,
'island':20,
'lake':21,
'meadow':22,
'medium_residential':23,
'mobile_home_park':24,
'mountain':25,
'overpass':26,
'palace':27,
'parking_lot':28,
'railway':29,
'railway_station':30,
'rectangular_farmland':31,
'river':32,
'roundabout':33,
'runway':34,
'sea_ice':35,
'ship':36,
'snowberg':37,
'sparse_residential':38,
'stadium':39,
'storage_tank':40,
'tennis_court':41,
'terrace':42,
'thermal_power_station':43,
'wetland':44
}
else :
print ('please specify the data type : euro NUPW')

def remove_dir(path):
try:
shutil.rmtree(path)
except OSError, e:
if e.errno == 2:
pass
else:
raise

def convert_images(path):
images = []
train_path = os.path.join(converted_path, 'train')
test_path = os.path.join(converted_path, 'test')
os.mkdir(train_path)
os.mkdir(test_path)
for root, dirs, files in os.walk(path):
if root == path:
continue
category = os.path.basename(root)
label = labels[category]
UCMjpgpath_train =(os.path.join(train_path, str(label)))
UCMjpgpath_test = (os.path.join(test_path, str(label)))
os.mkdir(UCMjpgpath_train)
os.mkdir(UCMjpgpath_test)
random.shuffle(files)
count =0
for name in files:
im = Image.open(os.path.join(root, name))
(width, height) = im.size
if width != imageWidth or height != imageHeight:
im = im.resize((imageWidth, imageHeight), Image.ANTIALIAS)
if name.find('.tif') ==-1:
jpeg_name=name
else:
jpeg_name = name.replace(".tif", ".jpg")
if count < int(len(files)*split_ratio):
im.save(os.path.join(UCMjpgpath_train, jpeg_name))
else:
im.save(os.path.join(UCMjpgpath_test, jpeg_name))
count+=1
return images

def main (argv):
if os.path.exists(converted_path):
remove_dir(converted_path)
os.mkdir(converted_path)
convert_images(imagesPath)

if __name__== "__main__":
main(sys.argv)
二、執行轉化pickle文件操作代碼：（python3下用pickle庫 python2 用cpickle 但pickle讀取時候出錯）

import PIL.Image as Image
from scipy.misc import imsave
import numpy as np
import random
import pickle
import os

#
def initPKL(imgSet_shuffle, train_or_test):
imgSet = []
labels = []
label_names = []

if train_or_test == 'train':
set_name = 'trainSet.pkl'
else:
set_name = 'testSet.pkl'

for i in imgSet_shuffle:
imgSet.append(i[0])
labels.append(i[1])
label_names.append(i[2])

imgSet = np.array(imgSet)
labels = np.array(labels)
label_names = np.array(label_names)
arr = (imgSet,labels,label_names)

#
data = (arr[0],arr[1],arr[2])
output = open(set_name, 'wb')
pickle.dump(data, output)
output.close()

def initArr(folders_path):

i = 0
imgSet = []
folders = os.listdir(folders_path)

for folder in folders:
#
label = [0,0,0,0,0,0,0,0,0,0]
files = os.listdir(folders_path + '/'+folder)
label[i] = 1
for file in files:
#
img_arr = np.array(Image.open(folders_path + '/' + folder + '/' + file)) / 127.5*2.0 -1.0
print(folders_path + '/' + folder + '/' + file)
#print(img_arr)
imgSet.append((img_arr, label, folder))
i += 1
return imgSet

#
train_folders_path = 'gen/train'
test_folders_path = 'gen/test/'

train_imgSet = initArr(train_folders_path)
test_imgSet = initArr(test_folders_path)

#
random.shuffle(train_imgSet)
random.shuffle(test_imgSet)

train_set_shuffle = np.array(train_imgSet)
test_set_shuffle = np.array(test_imgSet)

#
initPKL(train_set_shuffle, 'train')
initPKL(test_set_shuffle, 'test')

#
f = open('./trainSet.pkl', 'rb')
x, y, z = pickle.load(f)
f.close()

print(np.shape(x[3]), y[3], z[3])

三、Mgan中調用代碼：

def main(_):
tmp,label,lname = pickle.load(open("trainSet.pkl", "rb"))#cifar10_train.pkl 原來是cifar10_train.pkl
#print(tmp)
#x_train = tmp['data'].astype(np.float32).reshape([-1, 32, 32, 3]) / 127.5 - 1. #data tmp['data'] 原來可能生成的時候用的data屬性。上面代碼生成的不用加['data']直接寫成下面格式即可
x_train = tmp.astype(np.float32).reshape([-1, 32, 32, 3]) # / 127.5 - 1. #data tmp['data'] 這裏相除相減的問題，其實可以看作正則化？

四、python3下讀取python2生成的pickle文件時候出錯。解決方法是生成文件python3下生成，這樣就可以用python3下的方法讀取不出錯。或者按下面的方法解決。

python的pickle模塊實現了基本的數據序列和反序列化。通過pickle模塊的序列化操作我們能夠將程序中運行的對象信息保存到文件中去，永久存儲；通過pickle模塊的反序列化操作，我們能夠從文件中創建上一次程序保存的對象。

python2使用的是cPickle模塊，而在python3中cPickle已經被取消，取而代之的是pickle模塊。

開發過程中，我曾經遇到一個奇怪的問題，在讀取一個文件時候，使用python2的如下方式：

import cPickle
train, test, dicts = cPickle.load(open(“./dataset/atis.pkl”))

是可以正常讀取文件的。
可是當換做python3的方式讀取文件時候，如下：

import pickle
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”))

卻獲得了錯誤提示，提示信息如下：

Traceback (most recent call last):
File “Main.py”, line 4, in
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”))
TypeError: ‘str’ does not support the buffer interface

查詢錯誤信息後得知解決辦法鏈接，應該指明用二進制方式打開文件，於是代碼改爲：

import pickle
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”))

可是這時候錯誤變成了：

Traceback (most recent call last):
File “Main.py”, line 4, in
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”))
UnicodeDecodeError: ‘ascii’ codec can’t decode byte 0xe8 in position 0: ordinal not in range(128)

於是再次求助萬能的google，終於找到了解決辦法鏈接，我們需要告訴pickle：how to convert Python bytestring data to Python 3 strings，The default is to try and decode all string data as ASCII，所以代碼改爲：

import pickle
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”), encoding=’iso-8859-1’)
問題終於的到了解決。

ISO8859-1，通常叫做 Latin-1。Latin-1 包括了書寫所有西方歐洲語言不可缺少的附加字符。iso8859-1 和 ascii 編碼相似。但爲了方便表示各種各樣的語言，逐漸出現了很多標準編碼，重要的有如下幾個。
而 gb2312 是標準中文字符集。
UTF-8 是 UNICODE 的一種變長字符編碼，即 RFC 3629。簡單的說——大字符集。可以解決多種語言文本顯示問題，從而實現應用國際化和本地化。

https://zhidao.baidu.com/question/26613602.html
https://www.cnblogs.com/doudou-taste/p/7351278.html

python3使用pickle讀取文件提示TypeError或者UnicodeDecodeError的解決辦法

vue項目獲取富文本編輯器wangEditor內容導出爲word（html轉word格式並下載）

dotnet C# 創建 X11 應用時設置窗口背景顏色

Navicat安裝與激活教程

TDengine docker安裝方法

vue3組件通信與props

sapui5

Alpine Linux apk add DNS lookup error

部分JDK版本的發佈時間

工作中用到的腳本合集

合併代碼時Beyond Compare設置

GAN-based-HRRS-Sample-Generation-for-Image-Classification執行

tensorflow/stream_executor/cuda/cuda_dnn.cc:378] Loaded runtime CuDNN library: 7301--2019.5.12

Ubuntu16.04 RTX2080ti 安裝NVIDIA驅動+cuda9.0+cudnn7.3.0

torchnet安裝解決from torchnet.meter import ClassErrorMeter錯誤問題（cbam.pytorch執行問題）

Ubuntu18.04雙系統安裝+GPU+CUDA10+CUDNN7+ANACONDA3+PYTHON+PYCHARM

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結