查看數據 2020/5/27
=====================================================================================
1.1.查看數據維度,類型屬性;
1.2.簡單的數據統計分析;數據的相關係及分佈;
=====================================================================================
2.實例:
#說明:在pycharm中數據不美觀;在spyder中打印顯示對的相當齊
import csv,pandas as pd,numpy as np
pd.set_option('display.width', 100)
pd.set_option('precision', 4) # 設置數據的精確度
pd.set_option('display.max_columns', 1000)
pd.set_option('display.unicode.ambiguous_as_wide', True)#pd.DataFrame打印顯示對齊
pd.set_option('display.unicode.east_asian_width', True)
#這兩個函數爲print對齊;在pycharm中仍不美觀
def fillEmptyStr(str0,width=12,precision=4):
n,n_pos=len(str0),str0.find('.')
if n_pos<0:
if n>=width:
result=str0[0:width]
else:
result=str0+' '*(width-n)
else:
if n_pos+1+precision<=n:
result=str0[0:(n_pos+1+precision)]
else:
result=str0+' '*(n-n_pos-1-precision)
return result
def print_df(df,width=12,precision=4):
if isinstance(df,pd.DataFrame):
str_i='{:<'+str(width)+'}'
rows,cols=df.index,df.columns
data=df.to_numpy()
n=0
r,c=data.shape
print(str_i.format(' '*width),end='')
for col in cols:
tmp=fillEmptyStr(str(col),width,precision)
print(str_i.format(tmp),end='')
print()
for i in range(r):
tmp=fillEmptyStr(str(rows[n]),width,precision)
print(str_i.format(tmp),end='')
n=n+1
for j in range(c):
tmp=fillEmptyStr(str(data[i,j]),width,precision)
print(str_i.format(tmp),end='')
print()
else:
print(df)
=====================================================================================
# 顯示數據的行和列數據
filename = 'pima_data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = pd.read_csv(filename, names=names)
print('data.dtypes=')
print(data.dtypes)# 顯示數據的行和列數據
print('data.shape=',data.shape)# 顯示數據的行和列數據
print('data.class=')
print_df(data.groupby('class').size())# 數據分類分佈統計
print('data.skew=')
print_df(data.skew())# 計算數據的高斯偏離
print('data.corr=')
print_df(data.corr(method='pearson'))# 顯示數據的相關性
print('data.describe=')
print_df(data.describe())# 描述性統計
print('data.head(4)=')
print_df(data.head(4))# 顯示數據最初4行
=====================================================================================
"""
data.dtypes=
preg int64
plas int64
pres int64
skin int64
test int64
mass float64
pedi float64
age int64
class int64
dtype: object
data.shape= (768, 9)
data.class=
class
0 500
1 268
dtype: int64
data.skew=
preg 0.9017
plas 0.1738
pres -1.8436
skin 0.1094
test 2.2723
mass -0.4290
pedi 1.9199
age 1.1296
class 0.6350
dtype: float64
data.corr=
preg plas pres skin test mass pedi age class
preg 1.0 0.1294 0.1412 -0.0816 -0.0735 0.0176 -0.0335 0.5443 0.2218
plas 0.1294 1.0 0.1525 0.0573 0.3313 0.2210 0.1373 0.2635 0.4665
pres 0.1412 0.1525 1.0 0.2073 0.0889 0.2818 0.0412 0.2395 0.0650
skin -0.0816 0.0573 0.2073 1.0 0.4367 0.3925 0.1839 -0.1139 0.0747
test -0.0735 0.3313 0.0889 0.4367 1.0 0.1978 0.1850 -0.0421 0.1305
mass 0.0176 0.2210 0.2818 0.3925 0.1978 1.0 0.1406 0.0362 0.2926
pedi -0.0335 0.1373 0.0412 0.1839 0.1850 0.1406 1.0 0.0335 0.1738
age 0.5443 0.2635 0.2395 -0.1139 -0.0421 0.0362 0.0335 1.0 0.2383
class 0.2218 0.4665 0.0650 0.0747 0.1305 0.2926 0.1738 0.2383 1.0
data.describe=
preg plas pres skin test mass pedi age class
count 768.0 768.0 768.0 768.0 768.0 768.0 768.0 768.0 768.0
mean 3.8450 120.8945 69.1054 20.5364 79.7994 31.9925 0.4718 33.2408 0.3489
std 3.3695 31.9726 19.3558 15.9522 115.2440 7.8841 0.3313 11.7602 0.4769
min 0.0 0.0 0.0 0.0 0.0 0.0 0.078 21.0 0.0
25% 1.0 99.0 62.0 0.0 0.0 27.3 0.2437 24.0 0.0
50% 3.0 117.0 72.0 23.0 30.5 32.0 0.3725 29.0 0.0
75% 6.0 140.25 80.0 32.0 127.25 36.6 0.6262 41.0 1.0
max 17.0 199.0 122.0 99.0 846.0 67.1 2.42 81.0 1.0
data.head(4)=
preg plas pres skin test mass pedi age class
0 6.0 148.0 72.0 35.0 0.0 33.6 0.627 50.0 1.0
1 1.0 85.0 66.0 29.0 0.0 26.6 0.3510 31.0 0.0
2 8.0 183.0 64.0 0.0 0.0 23.3 0.672 32.0 1.0
3 1.0 89.0 66.0 23.0 94.0 28.1 0.1669 21.0 0.0
"""
======================================================================================