本文想要將各個國家的恐怖襲擊次數(頻率),按照國家編號,依次賦給原始數據中113249條記錄。
實現如下效果:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#讀入數據
data=pd.read_excel('E:/E盤備份/C盤jupyter代碼備份/C2018/data/C2018mathmodel.xlsx')
columns=data.columns.tolist()
df1=data[['country','latitude','longitude']]
df1.isnull().any() #查看缺失值
df2=df1[~df1['latitude'].isnull() | ~df1['longitude'].isnull()]
df3=df2.sort_values(by=df2.columns.tolist(),axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')
zzhou=list(df3.groupby(['country']))
color_energy=[]
for i in range(len(zzhou)):
color_energy.append([zzhou[i][0],zzhou[i][1].shape[0]])
#df3['frequence']=0
#del df3['frequence']
df3.index=range(df3.shape[0])
prizedCount=df3['country'].value_counts(ascending=True)
#將原來的國家編號作爲索引的索引部分放到屬性列表中。
prizedCount=prizedCount.reset_index().rename(columns={'index':'country_No'})
prizedCount=prizedCount.sort_values(by=['country_No'],axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')
prizedCount.index=range(prizedCount.shape[0])
prizedCount.rename(columns=lambda x:x.replace('country','frequence'), inplace=True)
prizedCount.rename(columns=lambda x:x.replace('frequence_No','country'), inplace=True)
result=pd.merge(df3,prizedCount) #有相同的鍵(country),即可融合數據
"""如下兩種方法都太慢了"""
"""
#構造一個list,在轉換爲DataFrame
TTxinxi=[]
for i in range(len(df3)):
xinxi=[]
for j in range(len(prizedCount)):
if df3['country'][i]==prizedCount['country_No'][j]:
xinxi.append(df3['country'][i])
xinxi.append(df3['latitude'][i])
xinxi.append(df3['longitude'][i])
xinxi.append(prizedCount['country'][j])
else:
continue
TTxinxi.append(xinxi)
#另一種想法
for i in range(len(df3)):
for j in range(len(prizedCount)):
if df3['country'][i]==prizedCount['country_No'][j]:
df3['frequence'][i]=prizedCount['country'][j]
else:
continue
"""
import mpl_toolkits.basemap
import matplotlib.pyplot as plt
plt.subplots(figsize=(20,9))
basemap = mpl_toolkits.basemap.Basemap()
basemap.drawcoastlines()
basemap.drawcountries(linewidth=1.5)
cm = plt.cm.get_cmap('RdYlBu')
#cm = plt.cm.get_cmap('gist_rainbow')
#直接將頻率賦給顏色作爲能量漸變值
sc=plt.scatter(result['longitude'], result['latitude'], c=range(len(result)), vmin=0, vmax=24368, s=7,cmap=cm)
plt.savefig('D:/world.png')
plt.rcParams['font.sans-serif']=['SimHei'] #標題不能顯示漢字,這麼處理
plt.title('1998-2017世界恐怖襲擊案發地分佈圖')
plt.legend() #這裏怎麼寫???
plt.colorbar(sc)
plt.show()
#cm = plt.cm.get_cmap('RdYlBu')
#xy = range(20)
#z = xy
#sc = plt.scatter(xy, xy, c=z, vmin=0, vmax=20, s=35, cmap=cm)
#plt.colorbar(sc)
#plt.show()