import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:root@localhost:3306/yoyo')
sql ="""select * from book_management_signin;"""
df = pd.read_sql(sql,engine)print(df.describe())print(df[df['id']<20])
import numpy as np
import pandas as pd
import numpy as np
day_data = np.random.normal(0,1,(500,507))
stock_list =["股票"+str(i)for i inrange(day_data.shape[0])]
date =["第"+str(i)+"天"for i inrange(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)print(df[df["第0天"]>0].sort_values("第0天",ascending=False))
2,使用復合邏輯運算符進行篩選(pandas的布爾索引)
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:root@localhost:3306/yoyo')
sql ="""select * from role_info;"""
df = pd.read_sql(sql,engine)print(df.dtypes)print(df[(df['role_name'].str.len()>2)|(df['id']<10)]['role_name'])print(df[(df['role_name'].str.len()<2)|(df['id']<10)].loc[:,("id","role_name")])
import numpy as np
import pandas as pd
import numpy as np
day_data = np.random.normal(0,1,(500,507))
stock_list =["股票"+str(i)for i inrange(day_data.shape[0])]
date =["第"+str(i)+"天"for i inrange(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)# print(df[(df["第0天"]>0) & (df["第1天"]<0)].sort_values(["第0天","第1天"],ascending=False))# 通過query查詢更加方便print(df.query("第0天>0 & 第1天<0"))
3,使用isin()運算符進行篩選
import numpy as np
import pandas as pd
import numpy as np
np.random.seed(4)
day_data = np.random.normal(0,1,(500,507))
day_data = np.round(day_data,1)
stock_list =["股票"+str(i)for i inrange(day_data.shape[0])]
date =["第"+str(i)+"天"for i inrange(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)# 可以指定值進行一個判斷,從而進行篩選操作print(df[df["第0天"].isin([0.1,0.3])])
4,數學運算
import numpy as np
import pandas as pd
import numpy as np
np.random.seed(4)
day_data = np.random.normal(0,1,(500,507))
day_data = np.round(day_data,1)
stock_list =["股票"+str(i)for i inrange(day_data.shape[0])]
date =["第"+str(i)+"天"for i inrange(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)# 邏輯運算-加法# df["第0天"] = df["第0天"].add(1)# 邏輯運算-減法
df["change"]= df["第1天"].(df["第0天"])
psubrint(df)
*5,自定義運算函數
import numpy as np
import pandas as pd
import numpy as np
np.random.seed(4)
day_data = np.random.normal(0,1,(500,507))
day_data = np.round(day_data,1)
stock_list =["股票"+str(i)for i inrange(day_data.shape[0])]
date =["第"+str(i)+"天"for i inrange(day_data.shape[1])]
df = pd.DataFrame(day_data,index=stock_list,columns=date)# 返回幾個字段的數據# print(df[["第0天","第1天"]])# 利用apply函數運算print(df[["第0天","第1天"]].apply(lambda x : x.max()- x.min(),axis=0))print(df[["第0天","第1天"]].apply(lambda x : x.max()- x.min(),axis=1))
defdisting_type(strs):
strs = strs.split("body0/")[1]
exist_num =[''.join(list(g))for k, g in groupby(strs, key=lambda x: x.isdigit())][1]ifint(exist_num)<=23:return3elif strs.find("tbl")==-1:return1else:return2# merged['level'] = merged['level'].apply(lambda x: -1 if x == 'nan' else x)
transfor["type"]= transfor["logic_id"].apply(lambda x : disting_type(x))