sklearn pandas
系統環境
CentOS Linux release 7.7.1908 (Core)
Linux version 3.10.0-1062.1.1.el7.x86_64 ([email protected]) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC) ) #1 SMP Fri Sep 13 22:55:44 UTC 2019
1.安裝pandas
在已經安裝的環境
# 激活該環境
conda activate tensorflow2_env
# 安裝pandas
測試
>>> import pandas
>>> pandas.__version__
'1.0.3'
出現問題
# 1
ImportError: libSM.so.6: cannot open shared object file: No such file or directory
yum whatprovides libSM.so.6
yum install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false
# 2
ImportError: libXrender.so.1: cannot open shared object file: No such file or directory
yum whatprovides libXrender.so.1
yum isntall libXrender
# 3
ImportError: libXext.so.6: cannot open shared object file: No such file or directory
>>> exit()
yum install libXext
2. 安裝sklearn
conda install sklearn
# conda沒有
pip install sklearn
3. 配置離線fashion_mnist數據集
能連上google的時候是這樣
from tensorflow import keras
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
不能科學上網的,我們就下載數據集
http://www.obs.sixmillions.cn/fashion_mnist/t10k-images-idx3-ubyte.gz
http://www.obs.sixmillions.cn/fashion_mnist/t10k-labels-idx1-ubyte.gz
http://www.obs.sixmillions.cn/fashion_mnist/train-images-idx3-ubyte.gz
http://www.obs.sixmillions.cn/fashion_mnist/train-labels-idx1-ubyte.gz
我下載到了 /opt/packages/keras_datasets/fashion_mnist/
文件夾
編輯當前python路徑裏面的fashion_mnist.py
文件
因爲我是用anaconda安裝的環境,所以我去anaconda環境中找這個文件
編輯這個文件
本來是這樣的
from __future__ import print_function
import gzip
import os
import numpy as np
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.util.tf_export import keras_export
@keras_export('keras.datasets.fashion_mnist.load_data')
def load_data():
"""Loads the Fashion-MNIST dataset.
Returns:
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
License:
The copyright for Fashion-MNIST is held by Zalando SE.
Fashion-MNIST is licensed under the [MIT license](
https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE).
"""
dirname = os.path.join('datasets', 'fashion-mnist')
base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/'
files = [
'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
]
paths = []
for fname in files:
paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname))
with gzip.open(paths[0], 'rb') as lbpath:
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
with gzip.open(paths[2], 'rb') as lbpath:
y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
return (x_train, y_train), (x_test, y_test)
將數據源那個地方換成我們的
from __future__ import print_function
import gzip
import os
import numpy as np
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.util.tf_export import keras_export
@keras_export('keras.datasets.fashion_mnist.load_data')
def load_data():
"""Loads the Fashion-MNIST dataset.
Returns:
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
License:
The copyright for Fashion-MNIST is held by Zalando SE.
Fashion-MNIST is licensed under the [MIT license](
https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE).
"""
dirname = os.path.join('datasets', 'fashion-mnist')
base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/'
files = [
'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
]
paths = []
for fname in files:
paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname))
with gzip.open(paths[0], 'rb') as lbpath:
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
with gzip.open(paths[2], 'rb') as lbpath:
y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
return (x_train, y_train), (x_test, y_test)
編輯後
import gzip
import os
import numpy as np
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.util.tf_export import keras_export
@keras_export('keras.datasets.fashion_mnist.load_data')
def load_data():
"""Loads the Fashion-MNIST dataset.
Returns:
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
License:
The copyright for Fashion-MNIST is held by Zalando SE.
Fashion-MNIST is licensed under the [MIT license](
https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE).
"""
dirname = os.path.join('datasets', 'fashion-mnist')
#base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/'
base = '/opt/packages/keras_datasets/fashion_mnist/'
files = [
'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
]
paths = []
for fname in files:
#paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname))
paths.append(base + fname)
with gzip.open(paths[0], 'rb') as lbpath:
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
with gzip.open(paths[2], 'rb') as lbpath:
y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
return (x_train, y_train), (x_test, y_test)
注意路徑後面有個斜槓
4. 安裝matplotlib
conda install matplotlib
5. 測試
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
#import cv2
from tensorflow import keras
print(tf.__version__)
#print(cv2.__version__)
print(sys.version_info)
for moudule in mpl, np, pd, sklearn, tf, keras:
print(moudule.__name__, moudule.__version__)
fashion_mnist = keras.datasets.fashion_mnist
# 訓練集測試集分開
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
# 訓練集拆分爲驗證集(前5000)和訓練(後)
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]
print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
def show_single_image(img_arr):
plt.imshow(img_arr, cmap='binary')
plt.show()
show_single_image(x_train[0])