6. 綜合練習
import numpy as np
import pandas as pd
# 不用print,直接顯示結果
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# 顯示所有列
pd.set_option('display.max_columns', 600)
一、2002 年-2018 年上海機動車牌照拍賣
(1) 哪一次拍賣的中標率首次小於 5%?
(考察點:創建列,索引)
(2) 按年統計拍賣最低價的下列統計量:最大值、均值、 0.75 分位數,要求顯示在同一張表上。
建議先完成第三問,再做第二問
(考察點:創建列,索引,分組,統計量,分列)
(3) 將第一列時間列拆分成兩個列,一列爲年份(格式爲 20××),另一列爲月份(英語縮寫),添加到列表作爲第一第二列,並將原表第一列刪除,其他列依次向後順延。
(考察點:拆分列)
(4) 現在將表格行索引設爲多級索引,外層爲年份,內層爲原表格第二至第五列的變量名,列索引爲月份。
(考察點:多級索引)
(5) 一般而言某個月最低價與上月最低價的差額,會與該月均值與上月均值的差額具有相同的正負號,哪些拍賣時間不具有這個特點?
(考察點:統計量,分組,合併,索引排序)
(6) 將某一個月牌照發行量與其前兩個月發行量均值的差額定義爲發行增益,最初的兩個月用 0 填充,求發行增益極值出現的時間。
包含極大值極小值
(考察點:統計量,索引,排序,差分)
# MVL = Motor Vehicle License
MVL = pd.read_csv('./2002年-2018年上海機動車拍照拍賣.csv')
MVL.head()
# MVL.info()
Date | Total number of license issued | lowest price | avg price | Total number of applicants | |
---|---|---|---|---|---|
0 | 2-Jan | 1400 | 13600 | 14735 | 3718 |
1 | 2-Feb | 1800 | 13100 | 14057 | 4590 |
2 | 2-Mar | 2000 | 14300 | 14662 | 5190 |
3 | 2-Apr | 2300 | 16000 | 16334 | 4806 |
4 | 2-May | 2350 | 17800 | 18357 | 4665 |
(1) 哪一次拍賣的中標率首次小於 5%?
(考察點:創建列,索引)
MVL[(MVL['Total number of license issued']/MVL['Total number of applicants'])<=0.05]
Total number of license issued | lowest price | avg price | Total number of applicants | ||
---|---|---|---|---|---|
年份 | 月份 | ||||
2001 | May | 7482 | 79000 | 79099 | 156007 |
Jun | 7441 | 80000 | 80020 | 172205 | |
Jul | 7531 | 83100 | 83171 | 166302 | |
Aug | 7454 | 82600 | 82642 | 166939 | |
Oct | 7763 | 85300 | 85424 | 170995 | |
Nov | 7514 | 84600 | 84703 | 169159 | |
Dec | 7698 | 84500 | 84572 | 179133 | |
Feb | 8363 | 83200 | 83244 | 196470 | |
Mar | 8310 | 83100 | 83148 | 221109 | |
Apr | 11829 | 85100 | 85127 | 256897 | |
May | 11598 | 85000 | 85058 | 277889 | |
Jun | 11546 | 84400 | 84483 | 275438 | |
Jul | 11475 | 87200 | 87235 | 240750 | |
Aug | 11549 | 86900 | 86946 | 251188 | |
Feb | 10157 | 88200 | 88240 | 251717 | |
Mar | 10356 | 87800 | 87916 | 262010 | |
Apr | 12196 | 89800 | 89850 | 252273 | |
May | 10316 | 90100 | 90209 | 270197 | |
Jun | 10312 | 89400 | 89532 | 244349 | |
Jul | 10325 | 92200 | 92250 | 269189 | |
Aug | 10558 | 91600 | 91629 | 256083 | |
Sep | 12413 | 91300 | 91415 | 250566 | |
Oct | 11388 | 93500 | 93540 | 244868 | |
Nov | 11002 | 93100 | 93130 | 226911 | |
Mar | 9855 | 88100 | 88176 | 217056 |
(2) 按年統計拍賣最低價的下列統計量:最大值、均值、 0.75 分位數,要求顯示在同一張表上。
建議先完成第三問,再做第二問
(考察點:創建列,索引,分組,統計量,分列)
# for name,group in MVL.groupby('年份'):
MVL.groupby('年份')['lowest price '].agg(['max','mean','quantile'])
max | mean | quantile | |
---|---|---|---|
年份 | |||
2001 | 93500 | 71640.740741 | 75750.0 |
2002 | 30800 | 20316.666667 | 19700.0 |
2003 | 38500 | 31983.333333 | 33600.0 |
2004 | 44200 | 29408.333333 | 28650.0 |
2005 | 37900 | 31908.333333 | 33000.0 |
2006 | 39900 | 37058.333333 | 37750.0 |
2007 | 53800 | 45691.666667 | 45850.0 |
2008 | 37300 | 29945.454545 | 32600.0 |
2009 | 36900 | 31333.333333 | 31050.0 |
(3) 將第一列時間列拆分成兩個列,一列爲年份(格式爲 20××),另一列爲月份(英語縮寫),添加到列表作爲第一第二列,並將原表第一列刪除,其他列依次向後順延。
(考察點:拆分列)
MVL['年份']=MVL['Date'].apply(lambda x:int(str(x)[:-4])+2000)
MVL['月份']=MVL['Date'].apply(lambda x:str(x)[-3:])
MVL.drop(columns='Date',inplace=True)
MVL=MVL[['年份','月份','Total number of license issued','lowest price ','avg price','Total number of applicants']]
# MVL.reorder_levels(['年份','月份','Total number of license issued','lowest price','avg price','Total number of applicants'],axis=0).head()
# MVL=MVL.set_index(['年份','月份'])
MVL.head()
年份 | 月份 | Total number of license issued | lowest price | avg price | Total number of applicants | |
---|---|---|---|---|---|---|
0 | 2002 | Jan | 1400 | 13600 | 14735 | 3718 |
1 | 2002 | Feb | 1800 | 13100 | 14057 | 4590 |
2 | 2002 | Mar | 2000 | 14300 | 14662 | 5190 |
3 | 2002 | Apr | 2300 | 16000 | 16334 | 4806 |
4 | 2002 | May | 2350 | 17800 | 18357 | 4665 |
(4) 現在將表格行索引設爲多級索引,外層爲年份,內層爲原表格第二至第五列的變量名,列索引爲月份。
(考察點:多級索引)
# MVL.set_index(['年份','Total number of license issued','lowest price ','avg price','Total number of applicants'])
# pd.crosstab(index=MVL['年份','Total number of license issued','lowest price ','avg price','Total number of applicants'],columns=MVL['月份'])
# MVL.pivot_table(index=['年份','Total number of license issued','lowest price ','avg price','Total number of applicants'],columns='月份',values=1).head()
MVL_m=MVL.melt(id_vars=['年份','月份'],value_vars=['Total number of license issued','lowest price ','avg price','Total number of applicants'])
MVL_m=MVL_m.set_index(['年份','variable'])
pd.pivot_table(MVL_m,index=['年份','variable'],columns='月份',values='value')
月份 | Apr | Aug | Dec | Feb | Jan | Jul | Jun | Mar | May | Nov | Oct | Sep | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
年份 | variable | ||||||||||||
2002 | Total number of applicants | 4806.0 | 4640.0 | 3525.0 | 4590.0 | 3718.0 | 3774.0 | 4502.0 | 5190.0 | 4665.0 | 4021.0 | 4661.0 | 4393.0 |
Total number of license issued | 2300.0 | 3000.0 | 3600.0 | 1800.0 | 1400.0 | 3000.0 | 2800.0 | 2000.0 | 2350.0 | 3200.0 | 3200.0 | 3200.0 | |
avg price | 16334.0 | 21601.0 | 27848.0 | 14057.0 | 14735.0 | 20904.0 | 20178.0 | 14662.0 | 18357.0 | 31721.0 | 27040.0 | 24040.0 | |
lowest price | 16000.0 | 21000.0 | 27800.0 | 13100.0 | 13600.0 | 19800.0 | 19600.0 | 14300.0 | 17800.0 | 30800.0 | 26400.0 | 23600.0 | |
2003 | Total number of applicants | 8794.0 | 9315.0 | 10491.0 | 12030.0 | 9442.0 | 11929.0 | 15507.0 | 11219.0 | 14634.0 | 9849.0 | 9383.0 | 8532.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2017 | lowest price | 89800.0 | 91600.0 | 92800.0 | 88200.0 | 87600.0 | 92200.0 | 89400.0 | 87800.0 | 90100.0 | 93100.0 | 93500.0 | 91300.0 |
2018 | Total number of applicants | 204980.0 | 192755.0 | 165442.0 | 220831.0 | 226316.0 | 202337.0 | 209672.0 | 217056.0 | 198627.0 | 177355.0 | 181861.0 | 189142.0 |
Total number of license issued | 11916.0 | 10402.0 | 12850.0 | 11098.0 | 12183.0 | 10395.0 | 10775.0 | 9855.0 | 10216.0 | 11766.0 | 10728.0 | 12712.0 | |
avg price | 87089.0 | 88365.0 | 87508.0 | 87660.0 | 87936.0 | 88380.0 | 87900.0 | 88176.0 | 89018.0 | 87374.0 | 88070.0 | 87410.0 | |
lowest price | 86900.0 | 88300.0 | 87400.0 | 87600.0 | 87900.0 | 88300.0 | 87800.0 | 88100.0 | 89000.0 | 87300.0 | 88000.0 | 87300.0 |
68 rows × 12 columns
(5) 一般而言某個月最低價與上月最低價的差額,會與該月均值與上月均值的差額具有相同的正負號,哪些拍賣時間不具有這個特點?
(考察點:統計量,分組,合併,索引排序)
MVL['LOW_Difference']=MVL[['lowest price ']].diff()
MVL['AVG_Difference']=MVL[['avg price']].diff()
MVL[MVL['LOW_Difference']*MVL['AVG_Difference']<0]
# df.diff(2)
年份 | 月份 | Total number of license issued | lowest price | avg price | Total number of applicants | LOW_Difference | AVG_Difference | |
---|---|---|---|---|---|---|---|---|
21 | 2003 | Oct | 4500 | 32800 | 34842 | 9383 | 4000.0 | -3886.0 |
22 | 2003 | Nov | 5042 | 33100 | 34284 | 9849 | 300.0 | -558.0 |
29 | 2004 | Jun | 6233 | 17800 | 21001 | 19233 | 7000.0 | -13225.0 |
36 | 2005 | Jan | 5500 | 28500 | 32520 | 6208 | -800.0 | 2238.0 |
37 | 2005 | Feb | 3800 | 31700 | 32425 | 8949 | 3200.0 | -95.0 |
44 | 2005 | Sep | 6700 | 26500 | 28927 | 10972 | 1500.0 | -6978.0 |
52 | 2006 | May | 4500 | 37700 | 38139 | 8301 | 200.0 | -187.0 |
56 | 2006 | Sep | 6500 | 37000 | 41601 | 7064 | -2900.0 | 1142.0 |
60 | 2007 | Jan | 6000 | 38500 | 40974 | 6587 | -1300.0 | 456.0 |
61 | 2007 | Feb | 3500 | 39100 | 40473 | 5056 | 600.0 | -501.0 |
71 | 2007 | Dec | 7500 | 50000 | 56042 | 10356 | -3800.0 | 1725.0 |
128 | 2012 | Oct | 9500 | 65200 | 66708 | 19921 | -500.0 | 283.0 |
(6) 將某一個月牌照發行量與其前兩個月發行量均值的差額定義爲發行增益,最初的兩個月用 0 填充,求發行增益極值出現的時間。
包含極大值極小值
(考察點:統計量,索引,排序,差分)
MVL.sort_index()
MVL['m1']=MVL['Total number of license issued'].diff()
MVL['m2']=MVL['Total number of license issued'].diff(2)
MVL['re']=(MVL['m1']+MVL['m2'])/2
MVL
MVL['re'].idxmax()
MVL.loc[72]
MVL[MVL['re']==MVL['re'].min()]
年份 | 月份 | Total number of license issued | lowest price | avg price | Total number of applicants | LOW_Difference | AVG_Difference | m1 | m2 | re | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2002 | Jan | 1400 | 13600 | 14735 | 3718 | NaN | NaN | NaN | NaN | NaN |
1 | 2002 | Feb | 1800 | 13100 | 14057 | 4590 | -500.0 | -678.0 | 400.0 | NaN | NaN |
2 | 2002 | Mar | 2000 | 14300 | 14662 | 5190 | 1200.0 | 605.0 | 200.0 | 600.0 | 400.0 |
3 | 2002 | Apr | 2300 | 16000 | 16334 | 4806 | 1700.0 | 1672.0 | 300.0 | 500.0 | 400.0 |
4 | 2002 | May | 2350 | 17800 | 18357 | 4665 | 1800.0 | 2023.0 | 50.0 | 350.0 | 200.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
198 | 2018 | Aug | 10402 | 88300 | 88365 | 192755 | 0.0 | -15.0 | 7.0 | -373.0 | -183.0 |
199 | 2018 | Sep | 12712 | 87300 | 87410 | 189142 | -1000.0 | -955.0 | 2310.0 | 2317.0 | 2313.5 |
200 | 2018 | Oct | 10728 | 88000 | 88070 | 181861 | 700.0 | 660.0 | -1984.0 | 326.0 | -829.0 |
201 | 2018 | Nov | 11766 | 87300 | 87374 | 177355 | -700.0 | -696.0 | 1038.0 | -946.0 | 46.0 |
202 | 2018 | Dec | 12850 | 87400 | 87508 | 165442 | 100.0 | 134.0 | 1084.0 | 2122.0 | 1603.0 |
203 rows × 11 columns
年份 | 月份 | Total number of license issued | lowest price | avg price | Total number of applicants | LOW_Difference | AVG_Difference | m1 | m2 | re | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2002 | Jan | 1400 | 13600 | 14735 | 3718 | NaN | NaN | NaN | NaN | NaN |
1 | 2002 | Feb | 1800 | 13100 | 14057 | 4590 | -500.0 | -678.0 | 400.0 | NaN | NaN |
2 | 2002 | Mar | 2000 | 14300 | 14662 | 5190 | 1200.0 | 605.0 | 200.0 | 600.0 | 400.0 |
3 | 2002 | Apr | 2300 | 16000 | 16334 | 4806 | 1700.0 | 1672.0 | 300.0 | 500.0 | 400.0 |
4 | 2002 | May | 2350 | 17800 | 18357 | 4665 | 1800.0 | 2023.0 | 50.0 | 350.0 | 200.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
198 | 2018 | Aug | 10402 | 88300 | 88365 | 192755 | 0.0 | -15.0 | 7.0 | -373.0 | -183.0 |
199 | 2018 | Sep | 12712 | 87300 | 87410 | 189142 | -1000.0 | -955.0 | 2310.0 | 2317.0 | 2313.5 |
200 | 2018 | Oct | 10728 | 88000 | 88070 | 181861 | 700.0 | 660.0 | -1984.0 | 326.0 | -829.0 |
201 | 2018 | Nov | 11766 | 87300 | 87374 | 177355 | -700.0 | -696.0 | 1038.0 | -946.0 | 46.0 |
202 | 2018 | Dec | 12850 | 87400 | 87508 | 165442 | 100.0 | 134.0 | 1084.0 | 2122.0 | 1603.0 |
203 rows × 11 columns
72
年份 2008
月份 Jan
Total number of license issued 16000
lowest price 8100
avg price 23370
Total number of applicants 20539
LOW_Difference -41900
AVG_Difference -32672
m1 8500
m2 8500
re 8500
Name: 72, dtype: object
年份 | 月份 | Total number of license issued | lowest price | avg price | Total number of applicants | LOW_Difference | AVG_Difference | m1 | m2 | re | |
---|---|---|---|---|---|---|---|---|---|---|---|
74 | 2008 | Apr | 9000 | 37300 | 37659 | 37072 | 6000.0 | 5490.0 | -300.0 | -7000.0 | -3650.0 |
二、2007 年-2019 年俄羅斯機場貨運航班運載量
(1) 求每年貨運航班總運量。
(考察點:統計量,分組)
(2) 每年記錄的機場都是相同的嗎?
(考察點:分組,查看類別值)
(3) 按年計算 2010 年-2015 年全年貨運量記錄爲 0 的機場航班比例。
(考察點:分組,統計量,篩選)
(4) 若某機場至少存在 5 年或以上滿足所有月運量記錄都爲 0,則將其所有年份的記錄信息從表中刪除,並返回處理後的表格
(考察點:數據刪除,分組,篩選,索引)
(5) 採用一種合理的方式將所有機場劃分爲東南西北四個分區,並給出 2017年-2019 年貨運總量最大的區域。
提示:最後一列 →_→
(考察點:分組)
(6) 在統計學中常常用秩代表排名,現在規定某個機場某年某個月的秩爲該機場該月在當年所有月份中貨運量的排名(例如 *** 機場 19 年 1 月運量在整個 19 年 12 個月中排名第一,則秩爲 1),那麼判斷某月運量情況的相對大小的秩方法爲將所有機場在該月的秩排名相加,並將這個量定義爲每一個月的秩綜合指數,請根據上述定義計算 2016 年 12 個月的秩綜合指數。
(考察點:分組,合併,排序)
# RAS = Russian airport shipping
RAS = pd.read_csv('./2007年-2019年俄羅斯貨運航班運載量.csv')
RAS.head()
RAS.info()
Airport name | Year | January | February | March | April | May | June | July | August | September | October | November | December | Whole year | Airport coordinates | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Abakan | 2019 | 44.70 | 66.21 | 72.7 | 75.82 | 100.34 | 78.38 | 63.88 | 73.06 | 66.74 | 75.44 | 110.5 | 89.8 | 917.57 | (Decimal('91.399735'), Decimal('53.751351')) |
1 | Aikhal | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('111.543324'), Decimal('65.957161')) |
2 | Loss | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('125.398355'), Decimal('58.602489')) |
3 | Amderma | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('61.577429'), Decimal('69.759076')) |
4 | Anadyr (Carbon) | 2019 | 81.63 | 143.01 | 260.9 | 304.36 | 122.00 | 106.87 | 84.99 | 130.00 | 102.00 | 118.00 | 94.0 | 199.0 | 1746.76 | (Decimal('177.738273'), Decimal('64.713433')) |
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3711 entries, 0 to 3710
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Airport name 3711 non-null object
1 Year 3711 non-null int64
2 January 3711 non-null float64
3 February 3711 non-null float64
4 March 3711 non-null float64
5 April 3711 non-null float64
6 May 3711 non-null float64
7 June 3711 non-null float64
8 July 3711 non-null float64
9 August 3711 non-null float64
10 September 3711 non-null float64
11 October 3711 non-null float64
12 November 3711 non-null float64
13 December 3711 non-null float64
14 Whole year 3711 non-null float64
15 Airport coordinates 3711 non-null object
dtypes: float64(13), int64(1), object(2)
memory usage: 464.0+ KB
(1) 求每年貨運航班總運量。
(考察點:統計量,分組)
RAS.groupby('Year')['Whole year'].sum()
Year
2007 659438.23
2008 664682.46
2009 560809.77
2010 693033.98
2011 818691.71
2012 846388.03
2013 792337.08
2014 729457.12
2015 630208.97
2016 679370.15
2017 773662.28
2018 767095.28
2019 764606.27
Name: Whole year, dtype: float64
(2) 每年記錄的機場都是相同的嗎?
(考察點:分組,查看類別值)
temp=pd.DataFrame()
for name ,group in RAS.groupby('Year'):
print(name,temp.equals(pd.DataFrame(group['Airport name'].value_counts())))
temp=pd.DataFrame(group['Airport name'].value_counts())
# display(pd.DataFrame(group['Airport name'].value_counts()))
# print(name )
# display(group['Airport name'].value_counts())
temp
2007 False
2008 True
2009 True
2010 True
2011 True
2012 True
2013 True
2014 True
2015 True
2016 True
2017 True
2018 False
2019 False
Airport name | |
---|---|
Usinsk | 2 |
Nyagan | 2 |
Vorkuta | 2 |
Ust-Tsilma | 2 |
Nerungri (Chulman) | 1 |
... | ... |
Keperveem | 1 |
Blagoveshchensk (Ignatevo) | 1 |
Nogliki | 1 |
Sovetskaya Gavan | 1 |
Мотыгино | 1 |
247 rows × 1 columns
(3) 按年計算 2010 年-2015 年全年貨運量記錄爲 0 的機場航班比例。
(考察點:分組,統計量,篩選)
RAS_10_15=RAS[RAS['Year'].isin([2010,2011,2012,2013,2014,2015])]
RAS_10_15.head()
for name ,group in RAS_10_15.groupby('Year'):
print(name,group[group['Whole year']==0].count()/group.count())
Airport name | Year | January | February | March | April | May | June | July | August | September | October | November | December | Whole year | Airport coordinates | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1083 | Abakan | 2015 | 37.70 | 47.97 | 54.67 | 82.12 | 68.81 | 112.95 | 55.83 | 95.20 | 137.79 | 72.13 | 63.67 | 78.30 | 907.14 | (Decimal('91.399735'), Decimal('53.751351')) |
1084 | Aikhal | 2015 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | (Decimal('111.543324'), Decimal('65.957161')) |
1085 | Loss | 2015 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | (Decimal('125.398355'), Decimal('58.602489')) |
1086 | Amderma | 2015 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | (Decimal('61.577429'), Decimal('69.759076')) |
1087 | Anadyr | 2015 | 124.31 | 254.19 | 340.37 | 286.06 | 156.55 | 124.95 | 89.05 | 72.29 | 118.16 | 92.89 | 158.39 | 316.94 | 2134.15 | (Decimal('177.738273'), Decimal('64.713433')) |
2010 Airport name 0.767123
Year 0.767123
January 0.767123
February 0.767123
March 0.767123
April 0.767123
May 0.767123
June 0.767123
July 0.767123
August 0.767123
September 0.767123
October 0.767123
November 0.767123
December 0.767123
Whole year 0.767123
Airport coordinates 0.767123
dtype: float64
2011 Airport name 0.770548
Year 0.770548
January 0.770548
February 0.770548
March 0.770548
April 0.770548
May 0.770548
June 0.770548
July 0.770548
August 0.770548
September 0.770548
October 0.770548
November 0.770548
December 0.770548
Whole year 0.770548
Airport coordinates 0.770548
dtype: float64
2012 Airport name 0.770548
Year 0.770548
January 0.770548
February 0.770548
March 0.770548
April 0.770548
May 0.770548
June 0.770548
July 0.770548
August 0.770548
September 0.770548
October 0.770548
November 0.770548
December 0.770548
Whole year 0.770548
Airport coordinates 0.770548
dtype: float64
2013 Airport name 0.770548
Year 0.770548
January 0.770548
February 0.770548
March 0.770548
April 0.770548
May 0.770548
June 0.770548
July 0.770548
August 0.770548
September 0.770548
October 0.770548
November 0.770548
December 0.770548
Whole year 0.770548
Airport coordinates 0.770548
dtype: float64
2014 Airport name 0.770548
Year 0.770548
January 0.770548
February 0.770548
March 0.770548
April 0.770548
May 0.770548
June 0.770548
July 0.770548
August 0.770548
September 0.770548
October 0.770548
November 0.770548
December 0.770548
Whole year 0.770548
Airport coordinates 0.770548
dtype: float64
2015 Airport name 0.770548
Year 0.770548
January 0.770548
February 0.770548
March 0.770548
April 0.770548
May 0.770548
June 0.770548
July 0.770548
August 0.770548
September 0.770548
October 0.770548
November 0.770548
December 0.770548
Whole year 0.770548
Airport coordinates 0.770548
dtype: float64
(4) 若某機場至少存在 5 年或以上滿足所有月運量記錄都爲 0,則將其所有年份的記錄信息從表中刪除,並返回處理後的表格
(考察點:數據刪除,分組,篩選,索引)
RAS_0=RAS[RAS['Whole year']==0]
RAS_0.head()
RAS_0=RAS_0[['Airport name','Whole year','Year']]
temp=RAS_0.groupby('Airport name')['Year'].count()>=5
type(temp)
print('temp',temp)
RAS[~RAS['Airport name'].isin(temp.index)]#== False
print(RAS_0['Airport name'].isin(temp.index) )
# RAS_0.loc[temp.values]['Airport name']
# RAS_0.groupby('Airport name').head(1)#.count()
Airport name | Year | January | February | March | April | May | June | July | August | September | October | November | December | Whole year | Airport coordinates | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | Aikhal | 2019 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | (Decimal('111.543324'), Decimal('65.957161')) |
2 | Loss | 2019 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | (Decimal('125.398355'), Decimal('58.602489')) |
3 | Amderma | 2019 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | (Decimal('61.577429'), Decimal('69.759076')) |
6 | Apatite (Khibiny) | 2019 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | (Decimal('33.581999'), Decimal('67.459641')) |
7 | Arkhangelsk (Vaskovo) | 2019 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | (Decimal('40.706789'), Decimal('64.592645')) |
pandas.core.series.Series
temp Airport name
Achinsk True
Aikhal True
Amderma True
Antypayuta True
Apatite (Khibiny) True
...
Лешуконское True
Мотыгино True
Нюрба True
Среднеколымск True
Таксимо True
Name: Year, Length: 230, dtype: bool
Airport name | Year | January | February | March | April | May | June | July | August | September | October | November | December | Whole year | Airport coordinates | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Abakan | 2019 | 44.70 | 66.21 | 72.70 | 75.82 | 100.34 | 78.38 | 63.88 | 73.06 | 66.74 | 75.44 | 110.50 | 89.80 | 917.57 | (Decimal('91.399735'), Decimal('53.751351')) |
4 | Anadyr (Carbon) | 2019 | 81.63 | 143.01 | 260.90 | 304.36 | 122.00 | 106.87 | 84.99 | 130.00 | 102.00 | 118.00 | 94.00 | 199.00 | 1746.76 | (Decimal('177.738273'), Decimal('64.713433')) |
5 | Anapa (Vitjazevo) | 2019 | 45.92 | 53.15 | 54.00 | 54.72 | 52.00 | 67.45 | 172.31 | 72.57 | 70.00 | 63.00 | 69.00 | 82.10 | 856.22 | (Decimal('37.341511'), Decimal('45.003748')) |
8 | Arkhangelsk (Talagy) | 2019 | 85.61 | 118.70 | 131.39 | 144.82 | 137.95 | 140.18 | 128.56 | 135.68 | 124.75 | 139.60 | 210.27 | 307.10 | 1804.61 | (Decimal('40.714892'), Decimal('64.596138')) |
9 | Astrakhan (Narimanovo) | 2019 | 51.75 | 61.08 | 65.60 | 71.84 | 71.38 | 63.95 | 164.86 | 79.46 | 85.21 | 87.23 | 79.06 | 99.16 | 980.58 | (Decimal('47.999896'), Decimal('46.287344')) |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3693 | Reads (tub) | 2007 | 55.96 | 80.09 | 85.90 | 154.54 | 162.71 | 107.51 | 80.14 | 138.71 | 133.19 | 188.97 | 228.84 | 184.00 | 1600.56 | (Decimal('113.306492'), Decimal('52.020464')) |
3705 | Yuzhno-(Khomutovo) | 2007 | 710.80 | 970.00 | 1330.30 | 1352.30 | 1324.40 | 1613.00 | 1450.70 | 1815.60 | 1902.30 | 1903.20 | 1666.10 | 1632.10 | 17670.80 | (Decimal('142.723677'), Decimal('46.886967')) |
3706 | Yakutsk | 2007 | 583.70 | 707.80 | 851.80 | 1018.00 | 950.80 | 900.00 | 1154.90 | 1137.84 | 1485.50 | 1382.50 | 1488.00 | 1916.60 | 13577.44 | (Decimal('129.750225'), Decimal('62.086594')) |
3708 | Yamburg | 2007 | 3.55 | 0.16 | 3.37 | 5.32 | 4.31 | 6.30 | 6.88 | 3.60 | 4.13 | 4.93 | 4.17 | 8.87 | 55.59 | (Decimal('75.097783'), Decimal('67.980026')) |
3709 | Yaroslavl (Tunoshna) | 2007 | 847.00 | 1482.90 | 1325.40 | 1235.97 | 629.00 | 838.00 | 1211.30 | 915.00 | 1249.60 | 1650.50 | 1822.60 | 2055.60 | 15262.87 | (Decimal('40.170054'), Decimal('57.56231')) |
795 rows × 16 columns
1 True
2 True
3 True
6 True
7 True
...
3702 True
3703 True
3704 True
3707 True
3710 True
Name: Airport name, Length: 2807, dtype: bool
(5) 採用一種合理的方式將所有機場劃分爲東南西北四個分區,並給出 2017年-2019 年貨運總量最大的區域。
提示:最後一列 →_→
(考察點:分組)
RAS['x']=RAS['Airport coordinates'].apply(lambda x:str(x).split(',')[0][10:-2] if str(x)!='Not found'else 'Not found')
RAS['y']=RAS['Airport coordinates'].apply(lambda x:str(x).split(',')[1][10:-3] if str(x)!='Not found'else 'Not found')
RAS=RAS[(RAS['x']!='Not found')&(RAS['x']!='Abakan')]
RAS.head()
Airport name | Year | January | February | March | April | May | June | July | August | September | October | November | December | Whole year | Airport coordinates | x | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Abakan | 2019 | 44.70 | 66.21 | 72.7 | 75.82 | 100.34 | 78.38 | 63.88 | 73.06 | 66.74 | 75.44 | 110.5 | 89.8 | 917.57 | (Decimal('91.399735'), Decimal('53.751351')) | 91.399735 | 53.751351 |
1 | Aikhal | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('111.543324'), Decimal('65.957161')) | 111.543324 | 65.957161 |
2 | Loss | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('125.398355'), Decimal('58.602489')) | 125.398355 | 58.602489 |
3 | Amderma | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('61.577429'), Decimal('69.759076')) | 61.577429 | 69.759076 |
4 | Anadyr (Carbon) | 2019 | 81.63 | 143.01 | 260.9 | 304.36 | 122.00 | 106.87 | 84.99 | 130.00 | 102.00 | 118.00 | 94.0 | 199.0 | 1746.76 | (Decimal('177.738273'), Decimal('64.713433')) | 177.738273 | 64.713433 |
RAS['x']=RAS['Airport coordinates'].apply(lambda x:str(x).split(',')[0][10:-2] if str(x)!='Not found'else 'Not found')
# RAS['x']=RAS['Airport coordinates'].apply(lambda x:print(str(x).split(',')) )
RAS['y']=RAS['Airport coordinates'].apply(lambda x:str(x).split(',')[1][10:-3] if str(x)!='Not found'else 'Not found')
RAS=RAS[(RAS['x']!='Not found')&(RAS['x']!='Abakan')]
RAS=RAS[RAS['y']!='Not found']#&(RAS['y']!='Abakan')
# RAS['X']=pd.cut(RAS['x'],bins=[RAS['x'].min(),RAS['x'].mean(),RAS['x'].max()])
# RAS['Y']=pd.cut(RAS['y'],bins=[RAS['y'].min(),RAS['y'].mean(),RAS['y'].max()])
# RAS['region']=RAS['X']+RAS['Y']
RAS.head()
# RAS[RAS['x']=='Abakan']
# RAS['x'].astype(np.int64).mean()
Airport name | Year | January | February | March | April | May | June | July | August | September | October | November | December | Whole year | Airport coordinates | x | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Abakan | 2019 | 44.70 | 66.21 | 72.7 | 75.82 | 100.34 | 78.38 | 63.88 | 73.06 | 66.74 | 75.44 | 110.5 | 89.8 | 917.57 | (Decimal('91.399735'), Decimal('53.751351')) | 91.399735 | 53.751351 |
1 | Aikhal | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('111.543324'), Decimal('65.957161')) | 111.543324 | 65.957161 |
2 | Loss | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('125.398355'), Decimal('58.602489')) | 125.398355 | 58.602489 |
3 | Amderma | 2019 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.00 | (Decimal('61.577429'), Decimal('69.759076')) | 61.577429 | 69.759076 |
4 | Anadyr (Carbon) | 2019 | 81.63 | 143.01 | 260.9 | 304.36 | 122.00 | 106.87 | 84.99 | 130.00 | 102.00 | 118.00 | 94.0 | 199.0 | 1746.76 | (Decimal('177.738273'), Decimal('64.713433')) | 177.738273 | 64.713433 |
(6) 在統計學中常常用秩代表排名,現在規定某個機場某年某個月的秩爲該機場該月在當年所有月份中貨運量的排名(例如 *** 機場 19 年 1 月運量在整個 19 年 12 個月中排名第一,則秩爲 1),那麼判斷某月運量情況的相對大小的秩方法爲將所有機場在該月的秩排名相加,並將這個量定義爲每一個月的秩綜合指數,請根據上述定義計算 2016 年 12 個月的秩綜合指數。
(考察點:分組,合併,排序)
RAS_6=RAS[RAS.Year==2016]
RAS_6=RAS_6.melt(id_vars=['Airport name'],value_vars=['January','February','March','April','May','June','July','August','September','October','November','December'],value_name='month')
RAS_6=RAS_6.join(RAS_6.groupby('Airport name').rank(method='min'),rsuffix='_rank').sort_values(by=['Airport name','month'])
RAS_6
pd.pivot_table(RAS_6,columns='variable',values='month_rank',aggfunc='sum')#'value_rank'
Airport name | variable | month | month_rank | |
---|---|---|---|---|
0 | Abakan | January | 34.10 | 1.0 |
292 | Abakan | February | 45.41 | 2.0 |
584 | Abakan | March | 58.97 | 3.0 |
1752 | Abakan | July | 64.31 | 4.0 |
876 | Abakan | April | 72.71 | 5.0 |
... | ... | ... | ... | ... |
2265 | Таксимо | August | 0.00 | 1.0 |
2557 | Таксимо | September | 0.00 | 1.0 |
2849 | Таксимо | October | 0.00 | 1.0 |
3141 | Таксимо | November | 0.00 | 1.0 |
3433 | Таксимо | December | 0.00 | 1.0 |
3504 rows × 4 columns
variable | April | August | December | February | January | July | June | March | May | November | October | September |
---|---|---|---|---|---|---|---|---|---|---|---|---|
month_rank | 701.0 | 703.0 | 905.0 | 507.0 | 402.0 | 603.0 | 633.0 | 628.0 | 631.0 | 824.0 | 771.0 | 736.0 |
三、在美國的傳播
(1) 用 corr() 函數計算縣(每行都是一個縣)人口與表中最後一天記錄日期死亡數的相關係數。
(考察點:corr函數)
(2) 截止到 4 月 1 日,統計每個州零感染縣的比例。
(考察點:分組,篩選,創建列)
(3) 請找出最早出確證病例的三個縣。
(考察點:篩選,轉換,索引)
(4) 按州統計單日死亡增加數,並給出哪個州在哪一天確診數增加最大(這裏指的是在所有州和所有天兩個指標一起算,不是分別算)。
(考察點:分組,索引,差分,轉換,篩選)
(5) 現需對每個州編制確證與死亡表,第一列爲時間,並且起始時間爲該州開始出現死亡比例的那一天,第二列和第三列分別爲確證數和死亡數, 每個州需要保存爲一個單獨的 csv 文件,文件名爲“州名.csv”。
(考察點:分組,索引,轉換,循環,文件寫入輸出)
(6) 現需對 4 月 1 日至 4 月 10 日編制新增確證數與新增死亡數表,第一列爲州名,第二列和第三列分別爲新增確證數和新增死亡數,分別保存爲十個單獨的 csv 文件,文件名爲“日期.csv”。
(考察點:分組,索引,轉換,循環,文件寫入輸出)
# USCOV = COVID-19 in US
USCOV_diagnose = pd.read_csv('./美國確證數.csv')
USCOV_diagnose.head()
USCOV_diagnose.info()
USCOV_death = pd.read_csv('./美國死亡數.csv')
USCOV_death.head()
USCOV_death.info()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | 2020/1/22 | 2020/1/23 | 2020/1/24 | 2020/1/25 | 2020/1/26 | 2020/1/27 | 2020/1/28 | 2020/1/29 | 2020/1/30 | 2020/1/31 | 2020/2/1 | 2020/2/2 | 2020/2/3 | 2020/2/4 | 2020/2/5 | 2020/2/6 | 2020/2/7 | 2020/2/8 | 2020/2/9 | 2020/2/10 | 2020/2/11 | 2020/2/12 | 2020/2/13 | 2020/2/14 | 2020/2/15 | 2020/2/16 | 2020/2/17 | 2020/2/18 | 2020/2/19 | 2020/2/20 | 2020/2/21 | 2020/2/22 | 2020/2/23 | 2020/2/24 | 2020/2/25 | 2020/2/26 | 2020/2/27 | 2020/2/28 | 2020/2/29 | 2020/3/1 | 2020/3/2 | 2020/3/3 | 2020/3/4 | 2020/3/5 | 2020/3/6 | 2020/3/7 | 2020/3/8 | 2020/3/9 | 2020/3/10 | 2020/3/11 | 2020/3/12 | 2020/3/13 | 2020/3/14 | 2020/3/15 | 2020/3/16 | 2020/3/17 | 2020/3/18 | 2020/3/19 | 2020/3/20 | 2020/3/21 | 2020/3/22 | 2020/3/23 | 2020/3/24 | 2020/3/25 | 2020/3/26 | 2020/3/27 | 2020/3/28 | 2020/3/29 | 2020/3/30 | 2020/3/31 | 2020/4/1 | 2020/4/2 | 2020/4/3 | 2020/4/4 | 2020/4/5 | 2020/4/6 | 2020/4/7 | 2020/4/8 | 2020/4/9 | 2020/4/10 | 2020/4/11 | 2020/4/12 | 2020/4/13 | 2020/4/14 | 2020/4/15 | 2020/4/16 | 2020/4/17 | 2020/4/18 | 2020/4/19 | 2020/4/20 | 2020/4/21 | 2020/4/22 | 2020/4/23 | 2020/4/24 | 2020/4/25 | 2020/4/26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 84001001 | US | USA | 840 | 1001 | Autauga | Alabama | US | 32.539527 | -86.644082 | Autauga, Alabama, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 4 | 6 | 6 | 6 | 6 | 6 | 7 | 8 | 10 | 12 | 12 | 12 | 12 | 12 | 12 | 15 | 17 | 19 | 19 | 19 | 23 | 24 | 26 | 26 | 25 | 26 | 28 | 30 | 32 | 33 | 36 | 36 | 37 |
1 | 84001003 | US | USA | 840 | 1003 | Baldwin | Alabama | US | 30.727750 | -87.722071 | Baldwin, Alabama, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 3 | 4 | 4 | 5 | 5 | 10 | 15 | 18 | 19 | 20 | 24 | 28 | 29 | 29 | 38 | 42 | 44 | 56 | 59 | 66 | 71 | 72 | 87 | 91 | 101 | 103 | 109 | 112 | 117 | 123 | 132 | 143 | 147 | 147 | 161 |
2 | 84001005 | US | USA | 840 | 1005 | Barbour | Alabama | US | 31.868263 | -85.387129 | Barbour, Alabama, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 2 | 2 | 3 | 3 | 4 | 9 | 9 | 10 | 10 | 11 | 12 | 14 | 15 | 18 | 20 | 22 | 28 | 29 | 30 | 32 | 32 | 33 |
3 | 84001007 | US | USA | 840 | 1007 | Bibb | Alabama | US | 32.996421 | -87.125115 | Bibb, Alabama, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 3 | 4 | 4 | 4 | 5 | 7 | 8 | 9 | 9 | 11 | 13 | 16 | 17 | 17 | 18 | 22 | 24 | 26 | 28 | 32 | 32 | 34 | 33 | 34 | 34 | 38 |
4 | 84001009 | US | USA | 840 | 1009 | Blount | Alabama | US | 33.982109 | -86.567906 | Blount, Alabama, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 4 | 5 | 5 | 5 | 5 | 5 | 6 | 9 | 10 | 10 | 10 | 10 | 10 | 11 | 12 | 12 | 13 | 14 | 16 | 17 | 18 | 20 | 20 | 21 | 22 | 26 | 29 | 31 | 31 | 31 | 34 |
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3142 entries, 0 to 3141
Columns: 107 entries, UID to 2020/4/26
dtypes: float64(2), int64(99), object(6)
memory usage: 2.6+ MB
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | Population | 2020/1/22 | 2020/1/23 | 2020/1/24 | 2020/1/25 | 2020/1/26 | 2020/1/27 | 2020/1/28 | 2020/1/29 | 2020/1/30 | 2020/1/31 | 2020/2/1 | 2020/2/2 | 2020/2/3 | 2020/2/4 | 2020/2/5 | 2020/2/6 | 2020/2/7 | 2020/2/8 | 2020/2/9 | 2020/2/10 | 2020/2/11 | 2020/2/12 | 2020/2/13 | 2020/2/14 | 2020/2/15 | 2020/2/16 | 2020/2/17 | 2020/2/18 | 2020/2/19 | 2020/2/20 | 2020/2/21 | 2020/2/22 | 2020/2/23 | 2020/2/24 | 2020/2/25 | 2020/2/26 | 2020/2/27 | 2020/2/28 | 2020/2/29 | 2020/3/1 | 2020/3/2 | 2020/3/3 | 2020/3/4 | 2020/3/5 | 2020/3/6 | 2020/3/7 | 2020/3/8 | 2020/3/9 | 2020/3/10 | 2020/3/11 | 2020/3/12 | 2020/3/13 | 2020/3/14 | 2020/3/15 | 2020/3/16 | 2020/3/17 | 2020/3/18 | 2020/3/19 | 2020/3/20 | 2020/3/21 | 2020/3/22 | 2020/3/23 | 2020/3/24 | 2020/3/25 | 2020/3/26 | 2020/3/27 | 2020/3/28 | 2020/3/29 | 2020/3/30 | 2020/3/31 | 2020/4/1 | 2020/4/2 | 2020/4/3 | 2020/4/4 | 2020/4/5 | 2020/4/6 | 2020/4/7 | 2020/4/8 | 2020/4/9 | 2020/4/10 | 2020/4/11 | 2020/4/12 | 2020/4/13 | 2020/4/14 | 2020/4/15 | 2020/4/16 | 2020/4/17 | 2020/4/18 | 2020/4/19 | 2020/4/20 | 2020/4/21 | 2020/4/22 | 2020/4/23 | 2020/4/24 | 2020/4/25 | 2020/4/26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 84001001 | US | USA | 840 | 1001 | Autauga | Alabama | US | 32.539527 | -86.644082 | Autauga, Alabama, US | 55869 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 1 | 1 | 2 | 2 | 2 | 2 | 2 |
1 | 84001003 | US | USA | 840 | 1003 | Baldwin | Alabama | US | 30.727750 | -87.722071 | Baldwin, Alabama, US | 223234 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
2 | 84001005 | US | USA | 840 | 1005 | Barbour | Alabama | US | 31.868263 | -85.387129 | Barbour, Alabama, US | 24686 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 84001007 | US | USA | 840 | 1007 | Bibb | Alabama | US | 32.996421 | -87.125115 | Bibb, Alabama, US | 22394 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 84001009 | US | USA | 840 | 1009 | Blount | Alabama | US | 33.982109 | -86.567906 | Blount, Alabama, US | 57826 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3142 entries, 0 to 3141
Columns: 108 entries, UID to 2020/4/26
dtypes: float64(2), int64(100), object(6)
memory usage: 2.6+ MB
(1) 用 corr() 函數計算縣(每行都是一個縣)人口與表中最後一天記錄日期死亡數的相關係數。
(考察點:corr函數)
- data.corr() #相關係數矩陣,即給出了任意兩個變量之間的相關係數
- data.corr()[u’A’] #只顯示“A”與其他間的相關係數
- data[u’A’].corr(data[u’B’]) #A與B兩者的相關係數
USCOV_death[u'Population'].corr(USCOV_death[u'2020/4/26'])
0.4038441973480701
(2) 截止到 4 月 1 日,統計每個州零感染縣的比例。
(考察點:分組,篩選,創建列)
# for name ,group in USCOV_diagnose.groupby('Province_State'):
# temp=[]
# # display( group.loc[:,'2020/1/22':'2020/4/1'])
# for i in group.loc[:,'2020/1/22':'2020/4/1']:
# display(i)
# # print(group[group['sum']==0].count()/group.count())
(USCOV_diagnose[USCOV_diagnose['2020/4/1']==0].groupby('Province_State')['UID'].count()/USCOV_diagnose.groupby('Province_State')['UID'].count()).fillna(0)
Province_State
Alabama 0.119403
Alaska 0.793103
Arizona 0.000000
Arkansas 0.293333
California 0.137931
Colorado 0.218750
Connecticut 0.000000
Delaware 0.000000
District of Columbia 0.000000
Florida 0.164179
Georgia 0.125786
Hawaii 0.200000
Idaho 0.386364
Illinois 0.480392
Indiana 0.108696
Iowa 0.404040
Kansas 0.609524
Kentucky 0.441667
Louisiana 0.062500
Maine 0.250000
Maryland 0.041667
Massachusetts 0.142857
Michigan 0.192771
Minnesota 0.367816
Mississippi 0.060976
Missouri 0.391304
Montana 0.625000
Nebraska 0.752688
Nevada 0.470588
New Hampshire 0.100000
New Jersey 0.000000
New Mexico 0.424242
New York 0.080645
North Carolina 0.180000
North Dakota 0.547170
Ohio 0.181818
Oklahoma 0.376623
Oregon 0.277778
Pennsylvania 0.104478
Rhode Island 0.000000
South Carolina 0.065217
South Dakota 0.560606
Tennessee 0.115789
Texas 0.452756
Utah 0.482759
Vermont 0.142857
Virginia 0.270677
Washington 0.128205
West Virginia 0.472727
Wisconsin 0.319444
Wyoming 0.347826
Name: UID, dtype: float64
(3) 請找出最早出確證病例的三個縣。
(考察點:篩選,轉換,索引)
USCOV_diagnose.sort_values(by=['2020/1/22','2020/1/23','2020/1/24','2020/1/25','2020/1/26'],ascending=False)
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | 2020/1/22 | 2020/1/23 | 2020/1/24 | 2020/1/25 | 2020/1/26 | 2020/1/27 | 2020/1/28 | 2020/1/29 | 2020/1/30 | 2020/1/31 | 2020/2/1 | 2020/2/2 | 2020/2/3 | 2020/2/4 | 2020/2/5 | 2020/2/6 | 2020/2/7 | 2020/2/8 | 2020/2/9 | 2020/2/10 | 2020/2/11 | 2020/2/12 | 2020/2/13 | 2020/2/14 | 2020/2/15 | 2020/2/16 | 2020/2/17 | 2020/2/18 | 2020/2/19 | 2020/2/20 | 2020/2/21 | 2020/2/22 | 2020/2/23 | 2020/2/24 | 2020/2/25 | 2020/2/26 | 2020/2/27 | 2020/2/28 | 2020/2/29 | 2020/3/1 | 2020/3/2 | 2020/3/3 | 2020/3/4 | 2020/3/5 | 2020/3/6 | 2020/3/7 | 2020/3/8 | 2020/3/9 | 2020/3/10 | 2020/3/11 | 2020/3/12 | 2020/3/13 | 2020/3/14 | 2020/3/15 | 2020/3/16 | 2020/3/17 | 2020/3/18 | 2020/3/19 | 2020/3/20 | 2020/3/21 | 2020/3/22 | 2020/3/23 | 2020/3/24 | 2020/3/25 | 2020/3/26 | 2020/3/27 | 2020/3/28 | 2020/3/29 | 2020/3/30 | 2020/3/31 | 2020/4/1 | 2020/4/2 | 2020/4/3 | 2020/4/4 | 2020/4/5 | 2020/4/6 | 2020/4/7 | 2020/4/8 | 2020/4/9 | 2020/4/10 | 2020/4/11 | 2020/4/12 | 2020/4/13 | 2020/4/14 | 2020/4/15 | 2020/4/16 | 2020/4/17 | 2020/4/18 | 2020/4/19 | 2020/4/20 | 2020/4/21 | 2020/4/22 | 2020/4/23 | 2020/4/24 | 2020/4/25 | 2020/4/26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2969 | 84053033 | US | USA | 840 | 53033 | King | Washington | US | 47.491379 | -121.834613 | King, Washington, US | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 6 | 9 | 14 | 21 | 31 | 51 | 58 | 71 | 83 | 83 | 116 | 190 | 270 | 328 | 387 | 387 | 488 | 569 | 562 | 693 | 793 | 934 | 1040 | 1170 | 1170 | 1359 | 1577 | 1577 | 2077 | 2159 | 2161 | 2330 | 2330 | 2656 | 2787 | 2898 | 3167 | 3331 | 3486 | 3688 | 3886 | 4117 | 4262 | 4426 | 4426 | 4549 | 4620 | 4697 | 4902 | 4902 | 5174 | 5174 | 5293 | 5379 | 5532 | 5637 | 5739 | 5863 |
610 | 84017031 | US | USA | 840 | 17031 | Cook | Illinois | US | 41.841448 | -87.816588 | Cook, Illinois, US | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 3 | 4 | 4 | 4 | 5 | 5 | 6 | 7 | 7 | 11 | 22 | 27 | 40 | 50 | 50 | 62 | 107 | 178 | 278 | 278 | 548 | 805 | 922 | 1194 | 1418 | 1418 | 2239 | 2613 | 3445 | 3727 | 4496 | 5152 | 5575 | 6111 | 7439 | 8034 | 8728 | 9509 | 10520 | 11415 | 12472 | 13417 | 14585 | 15474 | 16323 | 17306 | 18087 | 19391 | 20395 | 21272 | 22101 | 23181 | 24546 | 25811 | 27616 | 29058 | 30574 |
103 | 84004013 | US | USA | 840 | 4013 | Maricopa | Arizona | US | 33.348359 | -112.491815 | Maricopa, Arizona, US | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 4 | 4 | 8 | 9 | 11 | 22 | 34 | 49 | 81 | 139 | 199 | 251 | 299 | 399 | 454 | 545 | 690 | 788 | 871 | 961 | 1049 | 1171 | 1326 | 1433 | 1495 | 1559 | 1689 | 1741 | 1891 | 1960 | 2020 | 2056 | 2146 | 2264 | 2404 | 2491 | 2589 | 2636 | 2738 | 2846 | 2970 | 3116 | 3234 | 3359 |
204 | 84006037 | US | USA | 840 | 6037 | Los Angeles | California | US | 34.308284 | -118.228241 | Los Angeles, California, US | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 7 | 11 | 13 | 14 | 14 | 14 | 20 | 27 | 32 | 40 | 53 | 53 | 94 | 144 | 190 | 231 | 292 | 292 | 407 | 536 | 662 | 812 | 1229 | 1465 | 1465 | 1829 | 2474 | 3019 | 3518 | 4045 | 4566 | 4605 | 5955 | 6377 | 6936 | 7559 | 7955 | 8443 | 8453 | 8894 | 9433 | 10047 | 10517 | 10854 | 11400 | 12021 | 12341 | 13823 | 15153 | 16447 | 17537 | 18545 | 19133 | 19567 |
215 | 84006059 | US | USA | 840 | 6059 | Orange | California | US | 33.701475 | -117.764600 | Orange, California, US | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 3 | 3 | 3 | 3 | 3 | 4 | 5 | 5 | 6 | 9 | 14 | 14 | 17 | 22 | 29 | 53 | 65 | 78 | 95 | 125 | 152 | 187 | 256 | 321 | 403 | 431 | 464 | 502 | 606 | 656 | 711 | 786 | 834 | 882 | 931 | 1016 | 1079 | 1138 | 1221 | 1277 | 1283 | 1299 | 1376 | 1425 | 1501 | 1556 | 1636 | 1676 | 1691 | 1753 | 1827 | 1845 | 1969 | 2074 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3137 | 84056037 | US | USA | 840 | 56037 | Sweetwater | Wyoming | US | 41.659439 | -108.882788 | Sweetwater, Wyoming, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 3 | 4 | 5 | 5 | 5 | 6 | 6 | 6 | 7 | 7 | 9 | 9 | 10 | 10 | 10 | 10 | 10 | 10 | 16 | 16 | 16 | 16 | 16 | 16 |
3138 | 84056039 | US | USA | 840 | 56039 | Teton | Wyoming | US | 43.935225 | -110.589080 | Teton, Wyoming, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 2 | 2 | 2 | 2 | 5 | 7 | 10 | 13 | 14 | 16 | 20 | 26 | 29 | 32 | 36 | 39 | 40 | 41 | 44 | 45 | 50 | 53 | 56 | 56 | 57 | 58 | 59 | 61 | 62 | 62 | 62 | 92 | 93 | 93 | 95 | 95 | 95 |
3139 | 84056041 | US | USA | 840 | 56041 | Uinta | Wyoming | US | 41.287818 | -110.547578 | Uinta, Wyoming, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 2 | 2 | 3 | 3 | 3 | 3 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 6 | 6 | 6 | 6 | 7 | 7 | 7 | 7 | 7 | 7 |
3140 | 84056043 | US | USA | 840 | 56043 | Washakie | Wyoming | US | 43.904516 | -107.680187 | Washakie, Wyoming, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 4 | 4 | 4 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 6 | 5 | 5 | 5 | 5 | 8 | 8 | 8 | 8 | 8 | 8 |
3141 | 84056045 | US | USA | 840 | 56045 | Weston | Wyoming | US | 43.839612 | -104.567488 | Weston, Wyoming, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3142 rows × 107 columns
for col in USCOV_diagnose.loc[:,'2020/1/22':'2020/4/26'].columns:
if USCOV_diagnose[USCOV_diagnose[col] != 0][col].count() >= 3:
USCOV_diagnose[USCOV_diagnose[col] != 0]
break
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | 2020/1/22 | 2020/1/23 | 2020/1/24 | 2020/1/25 | 2020/1/26 | 2020/1/27 | 2020/1/28 | 2020/1/29 | 2020/1/30 | 2020/1/31 | 2020/2/1 | 2020/2/2 | 2020/2/3 | 2020/2/4 | 2020/2/5 | 2020/2/6 | 2020/2/7 | 2020/2/8 | 2020/2/9 | 2020/2/10 | 2020/2/11 | 2020/2/12 | 2020/2/13 | 2020/2/14 | 2020/2/15 | 2020/2/16 | 2020/2/17 | 2020/2/18 | 2020/2/19 | 2020/2/20 | 2020/2/21 | 2020/2/22 | 2020/2/23 | 2020/2/24 | 2020/2/25 | 2020/2/26 | 2020/2/27 | 2020/2/28 | 2020/2/29 | 2020/3/1 | 2020/3/2 | 2020/3/3 | 2020/3/4 | 2020/3/5 | 2020/3/6 | 2020/3/7 | 2020/3/8 | 2020/3/9 | 2020/3/10 | 2020/3/11 | 2020/3/12 | 2020/3/13 | 2020/3/14 | 2020/3/15 | 2020/3/16 | 2020/3/17 | 2020/3/18 | 2020/3/19 | 2020/3/20 | 2020/3/21 | 2020/3/22 | 2020/3/23 | 2020/3/24 | 2020/3/25 | 2020/3/26 | 2020/3/27 | 2020/3/28 | 2020/3/29 | 2020/3/30 | 2020/3/31 | 2020/4/1 | 2020/4/2 | 2020/4/3 | 2020/4/4 | 2020/4/5 | 2020/4/6 | 2020/4/7 | 2020/4/8 | 2020/4/9 | 2020/4/10 | 2020/4/11 | 2020/4/12 | 2020/4/13 | 2020/4/14 | 2020/4/15 | 2020/4/16 | 2020/4/17 | 2020/4/18 | 2020/4/19 | 2020/4/20 | 2020/4/21 | 2020/4/22 | 2020/4/23 | 2020/4/24 | 2020/4/25 | 2020/4/26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
103 | 84004013 | US | USA | 840 | 4013 | Maricopa | Arizona | US | 33.348359 | -112.491815 | Maricopa, Arizona, US | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 4 | 4 | 8 | 9 | 11 | 22 | 34 | 49 | 81 | 139 | 199 | 251 | 299 | 399 | 454 | 545 | 690 | 788 | 871 | 961 | 1049 | 1171 | 1326 | 1433 | 1495 | 1559 | 1689 | 1741 | 1891 | 1960 | 2020 | 2056 | 2146 | 2264 | 2404 | 2491 | 2589 | 2636 | 2738 | 2846 | 2970 | 3116 | 3234 | 3359 |
204 | 84006037 | US | USA | 840 | 6037 | Los Angeles | California | US | 34.308284 | -118.228241 | Los Angeles, California, US | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 7 | 11 | 13 | 14 | 14 | 14 | 20 | 27 | 32 | 40 | 53 | 53 | 94 | 144 | 190 | 231 | 292 | 292 | 407 | 536 | 662 | 812 | 1229 | 1465 | 1465 | 1829 | 2474 | 3019 | 3518 | 4045 | 4566 | 4605 | 5955 | 6377 | 6936 | 7559 | 7955 | 8443 | 8453 | 8894 | 9433 | 10047 | 10517 | 10854 | 11400 | 12021 | 12341 | 13823 | 15153 | 16447 | 17537 | 18545 | 19133 | 19567 |
215 | 84006059 | US | USA | 840 | 6059 | Orange | California | US | 33.701475 | -117.764600 | Orange, California, US | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 3 | 3 | 3 | 3 | 3 | 4 | 5 | 5 | 6 | 9 | 14 | 14 | 17 | 22 | 29 | 53 | 65 | 78 | 95 | 125 | 152 | 187 | 256 | 321 | 403 | 431 | 464 | 502 | 606 | 656 | 711 | 786 | 834 | 882 | 931 | 1016 | 1079 | 1138 | 1221 | 1277 | 1283 | 1299 | 1376 | 1425 | 1501 | 1556 | 1636 | 1676 | 1691 | 1753 | 1827 | 1845 | 1969 | 2074 |
610 | 84017031 | US | USA | 840 | 17031 | Cook | Illinois | US | 41.841448 | -87.816588 | Cook, Illinois, US | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 3 | 4 | 4 | 4 | 5 | 5 | 6 | 7 | 7 | 11 | 22 | 27 | 40 | 50 | 50 | 62 | 107 | 178 | 278 | 278 | 548 | 805 | 922 | 1194 | 1418 | 1418 | 2239 | 2613 | 3445 | 3727 | 4496 | 5152 | 5575 | 6111 | 7439 | 8034 | 8728 | 9509 | 10520 | 11415 | 12472 | 13417 | 14585 | 15474 | 16323 | 17306 | 18087 | 19391 | 20395 | 21272 | 22101 | 23181 | 24546 | 25811 | 27616 | 29058 | 30574 |
2969 | 84053033 | US | USA | 840 | 53033 | King | Washington | US | 47.491379 | -121.834613 | King, Washington, US | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 6 | 9 | 14 | 21 | 31 | 51 | 58 | 71 | 83 | 83 | 116 | 190 | 270 | 328 | 387 | 387 | 488 | 569 | 562 | 693 | 793 | 934 | 1040 | 1170 | 1170 | 1359 | 1577 | 1577 | 2077 | 2159 | 2161 | 2330 | 2330 | 2656 | 2787 | 2898 | 3167 | 3331 | 3486 | 3688 | 3886 | 4117 | 4262 | 4426 | 4426 | 4549 | 4620 | 4697 | 4902 | 4902 | 5174 | 5174 | 5293 | 5379 | 5532 | 5637 | 5739 | 5863 |
(4) 按州統計單日死亡增加數,並給出哪個州在哪一天確診數增加最大(這裏指的是在所有州和所有天兩個指標一起算,不是分別算)。
(考察點:分組,索引,差分,轉換,篩選)
USCOV_death.groupby('Province_State')[:,'2020/1/22':'2020/4/1']