sklearn-查看數據(第2講)

查看數據   2020/5/27
=====================================================================================
1.1.查看數據維度,類型屬性;
1.2.簡單的數據統計分析;數據的相關係及分佈;

=====================================================================================
2.實例:
#說明:在pycharm中數據不美觀;在spyder中打印顯示對的相當齊

import csv,pandas as pd,numpy as np

pd.set_option('display.width', 100)
pd.set_option('precision', 4)                       # 設置數據的精確度
pd.set_option('display.max_columns', 1000)

pd.set_option('display.unicode.ambiguous_as_wide', True)#pd.DataFrame打印顯示對齊
pd.set_option('display.unicode.east_asian_width', True)

#這兩個函數爲print對齊;在pycharm中仍不美觀
def fillEmptyStr(str0,width=12,precision=4):
    n,n_pos=len(str0),str0.find('.')
    if n_pos<0:
        if n>=width:
            result=str0[0:width]
        else:
            result=str0+' '*(width-n)
    else:
        if n_pos+1+precision<=n:
            result=str0[0:(n_pos+1+precision)]
        else:
            result=str0+' '*(n-n_pos-1-precision)
    return result

def print_df(df,width=12,precision=4):
    if isinstance(df,pd.DataFrame):
        str_i='{:<'+str(width)+'}'
        rows,cols=df.index,df.columns
        data=df.to_numpy()

        n=0
        r,c=data.shape
        print(str_i.format(' '*width),end='')
        for col in cols:
            tmp=fillEmptyStr(str(col),width,precision)
            print(str_i.format(tmp),end='')
        print()

        for i in range(r):
            tmp=fillEmptyStr(str(rows[n]),width,precision)
            print(str_i.format(tmp),end='')
            n=n+1
            for j in range(c):
                tmp=fillEmptyStr(str(data[i,j]),width,precision)
                print(str_i.format(tmp),end='')

            print()
    else:
        print(df)
=====================================================================================
# 顯示數據的行和列數據
filename = 'pima_data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = pd.read_csv(filename, names=names)

print('data.dtypes=')
print(data.dtypes)# 顯示數據的行和列數據
print('data.shape=',data.shape)# 顯示數據的行和列數據

print('data.class=')
print_df(data.groupby('class').size())# 數據分類分佈統計

print('data.skew=')
print_df(data.skew())# 計算數據的高斯偏離

print('data.corr=')
print_df(data.corr(method='pearson'))# 顯示數據的相關性

print('data.describe=')
print_df(data.describe())# 描述性統計

print('data.head(4)=')
print_df(data.head(4))# 顯示數據最初4行
=====================================================================================
"""
data.dtypes=
preg       int64
plas       int64
pres       int64
skin       int64
test       int64
mass     float64
pedi     float64
age        int64
class      int64
dtype: object
data.shape= (768, 9)
data.class=
class
0    500
1    268
dtype: int64
data.skew=
preg     0.9017
plas     0.1738
pres    -1.8436
skin     0.1094
test     2.2723
mass    -0.4290
pedi     1.9199
age      1.1296
class    0.6350
dtype: float64
data.corr=
            preg        plas        pres        skin        test        mass        pedi        age         class       
preg        1.0         0.1294      0.1412      -0.0816     -0.0735     0.0176      -0.0335     0.5443      0.2218      
plas        0.1294      1.0         0.1525      0.0573      0.3313      0.2210      0.1373      0.2635      0.4665      
pres        0.1412      0.1525      1.0         0.2073      0.0889      0.2818      0.0412      0.2395      0.0650      
skin        -0.0816     0.0573      0.2073      1.0         0.4367      0.3925      0.1839      -0.1139     0.0747      
test        -0.0735     0.3313      0.0889      0.4367      1.0         0.1978      0.1850      -0.0421     0.1305      
mass        0.0176      0.2210      0.2818      0.3925      0.1978      1.0         0.1406      0.0362      0.2926      
pedi        -0.0335     0.1373      0.0412      0.1839      0.1850      0.1406      1.0         0.0335      0.1738      
age         0.5443      0.2635      0.2395      -0.1139     -0.0421     0.0362      0.0335      1.0         0.2383      
class       0.2218      0.4665      0.0650      0.0747      0.1305      0.2926      0.1738      0.2383      1.0         
data.describe=
            preg        plas        pres        skin        test        mass        pedi        age         class       
count       768.0       768.0       768.0       768.0       768.0       768.0       768.0       768.0       768.0       
mean        3.8450      120.8945    69.1054     20.5364     79.7994     31.9925     0.4718      33.2408     0.3489      
std         3.3695      31.9726     19.3558     15.9522     115.2440    7.8841      0.3313      11.7602     0.4769      
min         0.0         0.0         0.0         0.0         0.0         0.0         0.078       21.0        0.0         
25%         1.0         99.0        62.0        0.0         0.0         27.3        0.2437      24.0        0.0         
50%         3.0         117.0       72.0        23.0        30.5        32.0        0.3725      29.0        0.0         
75%         6.0         140.25      80.0        32.0        127.25      36.6        0.6262      41.0        1.0         
max         17.0        199.0       122.0       99.0        846.0       67.1        2.42        81.0        1.0         
data.head(4)=
            preg        plas        pres        skin        test        mass        pedi        age         class       
0           6.0         148.0       72.0        35.0        0.0         33.6        0.627       50.0        1.0         
1           1.0         85.0        66.0        29.0        0.0         26.6        0.3510      31.0        0.0         
2           8.0         183.0       64.0        0.0         0.0         23.3        0.672       32.0        1.0         
3           1.0         89.0        66.0        23.0        94.0        28.1        0.1669      21.0        0.0         
"""
======================================================================================

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章