exam_data = {'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes'],
'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19]}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
import pandas as pd
import numpy as np
df=pd.DataFrame(data=exam_data,index=labels)
print(df)
1.選擇滿足attempt<=2和score>=15條件的數據
print(df.loc[(df['attempts']<=2)&(df['score']>=15)])
2.計算attemps總和
sum_of_attempts=0
for i in df['attempts']:
sum_of_attempts+=i
# print(" the sum of the examination attempts by the students is:""%d"%sum_of_attempts)
print(sum_of_attempts) # the answer is 19
3.計算score的平均值
# Method1:推薦使用方法1
sum_of_score=0
j=0
for i in df['score']:
if pd.isnull(i)==False:
sum_of_score+=i
j+=1
else:
sum_of_score=sum_of_score
j=j
mean_of_score=sum_of_score/j
print(mean_of_score) # the answer is 13.5625
# Methpod2:
df=df.fillna(0) #把score=NaN修改爲score=0
sum_of_score=0
j=0
for i in df['score']:
if i!=0:
sum_of_score+=i
j+=1
else:
sum_of_score=sum_of_score
j=j
mean_of_score=sum_of_score/j
print(sum_of_score)
print(j)
print(mean_of_score)
4.對score進行排序
print(df.sort_values(axis=0,ascending=False,by=['score']))
5.輸出列名
print(df.columns.tolist()) # 注意 df.columns的輸出形式
6.添加行和刪除行
print(df)
df1=pd.DataFrame({ "attempts": 1,"name": "Suresh", "qualify": "yes", "score": 15.5} ,index=list("k"))
gd=[df,df1]
result1=pd.concat(gd)
print(result1)
result2=result1.drop('k',axis=0) # axis=0表示行,axis=1表示列。用來刪除列的另一種方法:del df['column,name']
print(result2)
7.將列數據改爲bool值
df['qualify']=df['qualify']=='yes'
print(df)
8.修改列值
#query查詢
new_data = df.query("name=='James'")
new_data.name = 'Suresh'
df.loc[new_data.index] = new_data
print(df)