數據挖掘工具pandas(七)邏輯運算

1,使用邏輯運算符進行篩選(pandas的布爾索引)
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://root:root@localhost:3306/yoyo')
sql = """select * from book_management_signin;"""
df = pd.read_sql(sql,engine)

print(df.describe())
print(df[df['id'] < 20 ])

import numpy as np
import pandas as pd
import numpy as np
day_data = np.random.normal(0,1,(500,507))
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)

print(df[df["第0天"]>0].sort_values("第0天",ascending=False))
2,使用復合邏輯運算符進行篩選(pandas的布爾索引)
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://root:root@localhost:3306/yoyo')
sql = """select * from role_info;"""
df = pd.read_sql(sql,engine)

print(df.dtypes)
print(df[(df['role_name'].str.len()>2 ) | (df['id'] < 10 ) ]['role_name'])
print(df[(df['role_name'].str.len()<2 ) | (df['id'] < 10 ) ].loc[:,("id","role_name")])

import numpy as np
import pandas as pd
import numpy as np
day_data = np.random.normal(0,1,(500,507))
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)

# print(df[(df["第0天"]>0) & (df["第1天"]<0)].sort_values(["第0天","第1天"],ascending=False))

# 通過query查詢更加方便
print(df.query("第0天>0 & 第1天<0"))
3,使用isin()運算符進行篩選
import numpy as np
import pandas as pd
import numpy as np
np.random.seed(4)
day_data = np.random.normal(0,1,(500,507))
day_data = np.round(day_data,1)
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)

# 可以指定值進行一個判斷,從而進行篩選操作
print(df[df["第0天"].isin([0.1,0.3])])
4,數學運算
import numpy as np
import pandas as pd
import numpy as np
np.random.seed(4)
day_data = np.random.normal(0,1,(500,507))
day_data = np.round(day_data,1)
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)

# 邏輯運算-加法
# df["第0天"] = df["第0天"].add(1)

# 邏輯運算-減法
df["change"] = df["第1天"].(df["第0天"])
psubrint(df)
*5,自定義運算函數
import numpy as np
import pandas as pd
import numpy as np
np.random.seed(4)
day_data = np.random.normal(0,1,(500,507))
day_data = np.round(day_data,1)
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)

# 返回幾個字段的數據
# print(df[["第0天","第1天"]])

# 利用apply函數運算
print(df[["第0天","第1天"]].apply(lambda x : x.max() - x.min(),axis=0))
print(df[["第0天","第1天"]].apply(lambda x : x.max() - x.min(),axis=1))
def disting_type(strs):
    strs = strs.split("body0/")[1]
    exist_num = [''.join(list(g)) for k, g in groupby(strs, key=lambda x: x.isdigit())][1]
    
    if int(exist_num) <= 23:
        return 3
    elif strs.find("tbl") == -1:
        return 1
    else:
        return 2
        
# merged['level'] = merged['level'].apply(lambda x: -1 if x == 'nan' else x)
transfor["type"] = transfor["logic_id"].apply(lambda x : disting_type(x))
def insert_level_data(content,sort,name_level_dicts,start_index,end_index):
    if (sort > start_index) and (sort <= end_index):
        pass 
    else :
        if content in name_level_dicts:
            return name_level_dicts[content]

merged['level'] = merged.apply(lambda x: insert_level_data(x.content, x.sort, name_level_dicts, start_index, end_index), axis=1)
 
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章