python3使用pickle讀取文件提示TypeError或者UnicodeDecodeError的解決辦法

一、分割數據成train和test 代碼:


# data segmentation code  # resize  origin code to match classifier model  and split the data set into training  and test set
import sys
import os
import shutil
import csv
import subprocess
import random
import time
import itertools
from PIL import Image

#UCM path
imagesPath = 'data/EuroSAT-databack'
converted_path ='gen'

#NUPW Path
#imagesPath = '/home/hpc-126/remote-host/NUPW-45/NWPU-RESISC45'
#converted_path ='/home/hpc-126/remote-host/NUPW-45/train224x224'

train_path = ''
test_path =''
imageWidth =32
imageHeight =32
split_ratio =0.80 # ratio of train and test set size
datatype ='euro'
labels = ''
if datatype == 'euro':
    labels = {
      'SeaLake': 9,
      'River': 8,
      'PermanentCrop': 6,
      'AnnualCrop': 0,
      'Pasture': 5,
      'Forest': 1,
      'HerbaceousVegetation': 2,
      'Highway': 3,
      'Residential': 7,
      'Industrial': 4,

    }
elif datatype =='NUPW':
    labels = {
        'airplane': 0,
        'airport' : 1,
        'baseball_diamond': 2,
        'basketball_court': 3,
        'beach':4,
        'bridge':5,
        'chaparral':6,
        'church':7,
        'circular_farmland':8,
        'cloud':9,
        'commercial_area':10,
        'dense_residential':11,
        'desert':12,
        'forest':13,
        'freeway':14,
        'golf_course':15,
        'ground_track_field':16,
        'harbor':17,
        'industrial_area':18,
        'intersection':19,
        'island':20,
        'lake':21,
        'meadow':22,
        'medium_residential':23,
        'mobile_home_park':24,
        'mountain':25,
        'overpass':26,
        'palace':27,
        'parking_lot':28,
        'railway':29,
        'railway_station':30,
        'rectangular_farmland':31,
        'river':32,
        'roundabout':33,
        'runway':34,
        'sea_ice':35,
        'ship':36,
        'snowberg':37,
        'sparse_residential':38,
        'stadium':39,
        'storage_tank':40,
        'tennis_court':41,
        'terrace':42,
        'thermal_power_station':43,
        'wetland':44
}
else :
    print ('please specify the data type : euro   NUPW')

def remove_dir(path):
    try:
      shutil.rmtree(path)
    except OSError, e:
      if e.errno == 2:
        pass
      else:
        raise


def convert_images(path):
  images = []
  train_path = os.path.join(converted_path, 'train')
  test_path = os.path.join(converted_path, 'test')
  os.mkdir(train_path)
  os.mkdir(test_path)
  for root, dirs, files in os.walk(path):
    if root == path:
      continue
    category = os.path.basename(root)
    label = labels[category]
    UCMjpgpath_train =(os.path.join(train_path, str(label)))
    UCMjpgpath_test = (os.path.join(test_path, str(label)))
    os.mkdir(UCMjpgpath_train)
    os.mkdir(UCMjpgpath_test)
    random.shuffle(files)
    count =0
    for name in files:
        im = Image.open(os.path.join(root, name))
        (width, height) = im.size
        if width != imageWidth or height != imageHeight:
            im = im.resize((imageWidth, imageHeight), Image.ANTIALIAS)
        if  name.find('.tif') ==-1:
            jpeg_name=name
        else:
            jpeg_name = name.replace(".tif", ".jpg")
        if count < int(len(files)*split_ratio):
            im.save(os.path.join(UCMjpgpath_train, jpeg_name))
        else:
            im.save(os.path.join(UCMjpgpath_test, jpeg_name))
        count+=1
  return images

def main (argv):
    if os.path.exists(converted_path):
        remove_dir(converted_path)
    os.mkdir(converted_path)
    convert_images(imagesPath)

if __name__== "__main__":
    main(sys.argv)
二、執行轉化pickle文件操作 代碼:(python3下用pickle庫 python2 用cpickle 但pickle讀取時候出錯)

import PIL.Image as Image
from scipy.misc import imsave
import numpy as np
import random
import pickle
import os

#
def initPKL(imgSet_shuffle, train_or_test):
    imgSet = []
    labels = []
    label_names = []

    if train_or_test == 'train':
        set_name = 'trainSet.pkl'
    else:
        set_name = 'testSet.pkl'

    for i in imgSet_shuffle:
        imgSet.append(i[0])
        labels.append(i[1])
        label_names.append(i[2])

    imgSet = np.array(imgSet)
    labels = np.array(labels)
    label_names = np.array(label_names)
    arr = (imgSet,labels,label_names)

    #
    data = (arr[0],arr[1],arr[2])
    output = open(set_name, 'wb')
    pickle.dump(data, output)
    output.close()

def initArr(folders_path):

    i = 0
    imgSet = []
    folders = os.listdir(folders_path)

    for folder in folders:
        #
        label = [0,0,0,0,0,0,0,0,0,0]
        files = os.listdir(folders_path + '/'+folder)
        label[i] = 1
        for file in files:
            #
            img_arr = np.array(Image.open(folders_path + '/' + folder + '/' + file)) / 127.5*2.0 -1.0
            print(folders_path + '/' + folder + '/' + file)
            #print(img_arr)
            imgSet.append((img_arr, label, folder))
        i += 1
    return imgSet

#
train_folders_path = 'gen/train'
test_folders_path = 'gen/test/'

train_imgSet = initArr(train_folders_path)
test_imgSet = initArr(test_folders_path)


#
random.shuffle(train_imgSet)
random.shuffle(test_imgSet)

train_set_shuffle = np.array(train_imgSet)
test_set_shuffle = np.array(test_imgSet)

#
initPKL(train_set_shuffle, 'train')
initPKL(test_set_shuffle, 'test')

#
f = open('./trainSet.pkl', 'rb')
x, y, z = pickle.load(f)
f.close()

print(np.shape(x[3]), y[3], z[3])
 

三、Mgan中調用  代碼:

def main(_):
    tmp,label,lname = pickle.load(open("trainSet.pkl", "rb"))#cifar10_train.pkl 原來是cifar10_train.pkl
    #print(tmp)
    #x_train = tmp['data'].astype(np.float32).reshape([-1, 32, 32, 3]) / 127.5 - 1. #data tmp['data'] 原來可能生成的時候用的data屬性。上面代碼生成的不用加['data']直接寫成下面格式即可
    x_train = tmp.astype(np.float32).reshape([-1, 32, 32, 3])  # / 127.5 - 1. #data tmp['data']  這裏 相除相減的問題,其實可以看作正則化?
 

四、python3下讀取python2生成的pickle文件時候出錯。解決方法是 生成文件python3下生成,這樣就可以 用python3下的方法讀取不出錯。或者 按下面的方法解決。

python的pickle模塊實現了基本的數據序列和反序列化。通過pickle模塊的序列化操作我們能夠將程序中運行的對象信息保存到文件中去,永久存儲;通過pickle模塊的反序列化操作,我們能夠從文件中創建上一次程序保存的對象。

python2使用的是cPickle模塊,而在python3中cPickle已經被取消,取而代之的是pickle模塊。

開發過程中,我曾經遇到一個奇怪的問題,在讀取一個文件時候,使用python2的如下方式:

import cPickle 
train, test, dicts = cPickle.load(open(“./dataset/atis.pkl”))

是可以正常讀取文件的。 
可是當換做python3的方式讀取文件時候,如下:

import pickle 
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”))

卻獲得了錯誤提示,提示信息如下:

Traceback (most recent call last): 
File “Main.py”, line 4, in 
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”)) 
TypeError: ‘str’ does not support the buffer interface

查詢錯誤信息後得知解決辦法 鏈接,應該指明用二進制方式打開文件,於是代碼改爲:

import pickle 
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”))

可是這時候錯誤變成了:

Traceback (most recent call last): 
File “Main.py”, line 4, in 
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”)) 
UnicodeDecodeError: ‘ascii’ codec can’t decode byte 0xe8 in position 0: ordinal not in range(128)

於是再次求助萬能的google,終於找到了解決辦法 鏈接,我們需要告訴pickle:how to convert Python bytestring data to Python 3 strings,The default is to try and decode all string data as ASCII,所以代碼改爲:

import pickle 
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”), encoding=’iso-8859-1’) 

問題終於的到了解決。

ISO8859-1,通常叫做 Latin-1。Latin-1 包括了書寫所有西方歐洲語言不可缺少的附加字符。iso8859-1 和 ascii 編碼相似。但爲了方便表示各種各樣的語言,逐漸出現了很多標準編碼,重要的有如下幾個。 
而 gb2312 是標準中文字符集。 
UTF-8 是 UNICODE 的一種變長字符編碼,即 RFC 3629。簡單的說——大字符集。可以解決多種語言文本顯示問題,從而實現應用國際化和本地化。

https://zhidao.baidu.com/question/26613602.html 
https://www.cnblogs.com/doudou-taste/p/7351278.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章