數據挖掘:Pandas時間模塊管理!

datetime

import numpy as np
import pandas as pd
import datetime
# datetime.date

t = datetime.date.today()  # datetime 模塊 date 類 today() 類方法 
print(t, type(t))
print("")

t_str = str(t)
print(t, type(t_str))
2019-05-26 <class 'datetime.date'>

2019-05-26 <class 'str'>
# datetime.datetime

now = datetime.datetime.now()
print(now, type(now))
2019-05-26 16:18:17.612845 <class 'datetime.datetime'>
# datetime.timedelta  時間差 

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)
2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00

pd.Timestamp 跟datetime作用相同 只不過Timestamp是pd模塊裏的

import numpy as np
import pandas as pd
import datetime
t = datetime.datetime.today()
print(t)
print("")

ts1 = pd.Timestamp(t)
print(ts1)
print("")

ts2 = pd.Timestamp("20171021")
print(ts2)
2020-06-01 14:28:08.656056

2020-06-01 14:28:08.656056

2017-10-21 00:00:00

pd.to_datetime 多個時間數據轉換成時間戳索引

time_list = ["20171019", "20181020", "20191021"]

t = pd.to_datetime(time_list)
print(t, type(t))
DatetimeIndex(['2017-10-19', '2018-10-20', '2019-10-21'], dtype='datetime64[ns]', freq=None) <class 'pandas.core.indexes.datetimes.DatetimeIndex'>

如果時間序列裏包含非時間的數據 ignore(忽略異常 但不會轉換成時間戳索引) coerce(把異常值改爲NaT)

time_list1 = ["20171019", "20181020", "bbbb", "20191021"]

t1= pd.to_datetime(time_list1, errors="ignore")
print(t1, type(t1))
print("")

t2 = pd.to_datetime(time_list1, errors="coerce")
print(t2)
Index(['20171019', '20181020', 'bbbb', '20191021'], dtype='object') <class 'pandas.core.indexes.base.Index'>

DatetimeIndex(['2017-10-19', '2018-10-20', 'NaT', '2019-10-21'], dtype='datetime64[ns]', freq=None)

pd.DatetimeIndex() 直接生成時間戳序列

rng = pd.DatetimeIndex(["20160910", "11/06/2017", "20180821", "26/05/2019"])
print(rng)
print(type(rng))
print("")

print(rng[0], type(rng[0]))
DatetimeIndex(['2016-09-10', '2017-11-06', '2018-08-21', '2019-05-26'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

2016-09-10 00:00:00 <class 'pandas._libs.tslibs.timestamps.Timestamp'>
st = pd.Series(np.random.rand(4), index=rng)  # 把時間戳索引當成index
print(st)
2016-09-10    0.835586
2017-11-06    0.223044
2018-08-21    0.950717
2019-05-26    0.013370
dtype: float64

pd.date_range() 生成日期範圍

"""
pd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)

該函數主要用於生成一個固定頻率的時間索引,在調用構造方法時,必須指定start、end、periods中的兩個參數值,否則報錯。

start: 開始日期

end: 結束日期

periods:固定時期,取值爲整數或None

freq:日期偏移量,取值爲string或DateOffset,默認爲'D'

normalize:若參數爲True表示將start、end參數值正則化到午夜時間戳 0:00:00  默認爲False

name:生成時間索引對象的名稱,取值爲string或None

closed:可以理解成在closed=None情況下返回的結果中,若closed=‘left’表示在返回的結果基礎上,再取左開右閉的結果,若closed='right'表示在返回的結果基礎上,再取左閉右開的結果

"""
"\npd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)\n\n該函數主要用於生成一個固定頻率的時間索引,在調用構造方法時,必須指定start、end、periods中的兩個參數值,否則報錯。\n\nstart: 開始日期\n\nend: 結束日期\n\nperiods:固定時期,取值爲整數或None\n\nfreq:日期偏移量,取值爲string或DateOffset,默認爲'D'\n\nnormalize:若參數爲True表示將start、end參數值正則化到午夜時間戳 0:00:00  默認爲False\n\nname:生成時間索引對象的名稱,取值爲string或None\n\nclosed:可以理解成在closed=None情況下返回的結果中,若closed=‘left’表示在返回的結果基礎上,再取左開右閉的結果,若closed='right'表示在返回的結果基礎上,再取左閉右開的結果\n\n"

start end

t_index1 = pd.date_range(start="20181018", end="20191021", name="t_index1")
print(t_index1)
DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
               '2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
               '2018-10-26', '2018-10-27',
               ...
               '2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
               '2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
               '2019-10-20', '2019-10-21'],
              dtype='datetime64[ns]', name='t_index1', length=369, freq='D')

periods

t_index2 = pd.date_range(start="20181018", periods=10, name="t_index2")
print(t_index2)
DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
               '2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
               '2018-10-26', '2018-10-27'],
              dtype='datetime64[ns]', name='t_index2', freq='D')
t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)
DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
               '2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
               '2018-10-17', '2018-10-18'],
              dtype='datetime64[ns]', name='t_index3', freq='D')

name normalize

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4")
print(t_index4)
print("\n")

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4", normalize=True)
print(t_index4)
DatetimeIndex(['2019-11-09 16:30:00', '2019-11-10 16:30:00',
               '2019-11-11 16:30:00', '2019-11-12 16:30:00',
               '2019-11-13 16:30:00', '2019-11-14 16:30:00',
               '2019-11-15 16:30:00', '2019-11-16 16:30:00',
               '2019-11-17 16:30:00', '2019-11-18 16:30:00'],
              dtype='datetime64[ns]', name='t_index4', freq='D')
DatetimeIndex(['2019-11-09', '2019-11-10', '2019-11-11', '2019-11-12',
               '2019-11-13', '2019-11-14', '2019-11-15', '2019-11-16',
               '2019-11-17', '2019-11-18'],
              dtype='datetime64[ns]', name='t_index4', freq='D')

closed

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5")
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="left")  # 左閉右開
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="right")  # 左開右閉
print(t_index5)
print("\n")
DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17',
               '2019-09-18'],
              dtype='datetime64[ns]', name='t_index5', freq='D')
DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17'],
              dtype='datetime64[ns]', name='t_index5', freq='D')
DatetimeIndex(['2019-09-11', '2019-09-12', '2019-09-13', '2019-09-14',
               '2019-09-15', '2019-09-16', '2019-09-17', '2019-09-18'],
              dtype='datetime64[ns]', name='t_index5', freq='D')

pd.bdate_range() 默認頻率爲工作日

t_index6 = pd.bdate_range(start="20191001", end="20191007", name="t_index6")
print(t_index6)
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-07'],
              dtype='datetime64[ns]', name='t_index6', freq='B')

pd.date_range 轉換成list 元素爲時間戳Timestamp

t_index7_list= pd.date_range(start="20191001", end="20191007", name="t_index7_list")
print(t_index7_list)
print("\n")

t_index7_list= list(pd.date_range(start="20191001", end="20191007", name="t_index7_list"))
print(t_index7_list)
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07'],
              dtype='datetime64[ns]', name='t_index7_list', freq='D')
[Timestamp('2019-10-01 00:00:00', freq='D'), Timestamp('2019-10-02 00:00:00', freq='D'), Timestamp('2019-10-03 00:00:00', freq='D'), Timestamp('2019-10-04 00:00:00', freq='D'), Timestamp('2019-10-05 00:00:00', freq='D'), Timestamp('2019-10-06 00:00:00', freq='D'), Timestamp('2019-10-07 00:00:00', freq='D')]

fred 日期偏移量

# 默認freq = 'D' 每日

pd.date_range("10/1/2019", "2019/10/7")  
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07'],
              dtype='datetime64[ns]', freq='D')
# 'B' 每工作日

pd.date_range("10/01/2019", "10/07/2019", freq = "B")  
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-07'],
              dtype='datetime64[ns]', freq='B')
# H 每小時

pd.date_range("10/01/2019  12:00:00", "10/02/2019 12:00:00", freq = "H")  
DatetimeIndex(['2019-10-01 12:00:00', '2019-10-01 13:00:00',
               '2019-10-01 14:00:00', '2019-10-01 15:00:00',
               '2019-10-01 16:00:00', '2019-10-01 17:00:00',
               '2019-10-01 18:00:00', '2019-10-01 19:00:00',
               '2019-10-01 20:00:00', '2019-10-01 21:00:00',
               '2019-10-01 22:00:00', '2019-10-01 23:00:00',
               '2019-10-02 00:00:00', '2019-10-02 01:00:00',
               '2019-10-02 02:00:00', '2019-10-02 03:00:00',
               '2019-10-02 04:00:00', '2019-10-02 05:00:00',
               '2019-10-02 06:00:00', '2019-10-02 07:00:00',
               '2019-10-02 08:00:00', '2019-10-02 09:00:00',
               '2019-10-02 10:00:00', '2019-10-02 11:00:00',
               '2019-10-02 12:00:00'],
              dtype='datetime64[ns]', freq='H')
 # T/MIN 每分

pd.date_range("10/01/2019 12:10:00" , "10/01/2019 12:30:00", freq = "T") 
DatetimeIndex(['2019-10-01 12:10:00', '2019-10-01 12:11:00',
               '2019-10-01 12:12:00', '2019-10-01 12:13:00',
               '2019-10-01 12:14:00', '2019-10-01 12:15:00',
               '2019-10-01 12:16:00', '2019-10-01 12:17:00',
               '2019-10-01 12:18:00', '2019-10-01 12:19:00',
               '2019-10-01 12:20:00', '2019-10-01 12:21:00',
               '2019-10-01 12:22:00', '2019-10-01 12:23:00',
               '2019-10-01 12:24:00', '2019-10-01 12:25:00',
               '2019-10-01 12:26:00', '2019-10-01 12:27:00',
               '2019-10-01 12:28:00', '2019-10-01 12:29:00',
               '2019-10-01 12:30:00'],
              dtype='datetime64[ns]', freq='T')
# S 每秒

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "S")  
DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 00:00:01',
               '2019-10-01 00:00:02', '2019-10-01 00:00:03',
               '2019-10-01 00:00:04', '2019-10-01 00:00:05',
               '2019-10-01 00:00:06', '2019-10-01 00:00:07',
               '2019-10-01 00:00:08', '2019-10-01 00:00:09',
               '2019-10-01 00:00:10', '2019-10-01 00:00:11',
               '2019-10-01 00:00:12', '2019-10-01 00:00:13',
               '2019-10-01 00:00:14', '2019-10-01 00:00:15',
               '2019-10-01 00:00:16', '2019-10-01 00:00:17',
               '2019-10-01 00:00:18', '2019-10-01 00:00:19',
               '2019-10-01 00:00:20', '2019-10-01 00:00:21',
               '2019-10-01 00:00:22', '2019-10-01 00:00:23',
               '2019-10-01 00:00:24', '2019-10-01 00:00:25',
               '2019-10-01 00:00:26', '2019-10-01 00:00:27',
               '2019-10-01 00:00:28', '2019-10-01 00:00:29',
               '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', freq='S')
# L 每毫秒 (千分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "L")  
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.001000',
               '2019-10-01 00:00:00.002000', '2019-10-01 00:00:00.003000',
               '2019-10-01 00:00:00.004000', '2019-10-01 00:00:00.005000',
               '2019-10-01 00:00:00.006000', '2019-10-01 00:00:00.007000',
               '2019-10-01 00:00:00.008000', '2019-10-01 00:00:00.009000',
               ...
               '2019-10-01 00:00:29.991000', '2019-10-01 00:00:29.992000',
               '2019-10-01 00:00:29.993000', '2019-10-01 00:00:29.994000',
               '2019-10-01 00:00:29.995000', '2019-10-01 00:00:29.996000',
               '2019-10-01 00:00:29.997000', '2019-10-01 00:00:29.998000',
               '2019-10-01 00:00:29.999000',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30001, freq='L')
# U 每微秒 (百萬分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百萬分之一秒)
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',
               '2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
               '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
               '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
               '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
               ...
               '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
               '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
               '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
               '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
               '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30000001, freq='U')

星期幾縮寫 – MON/TUE/WED/THU/FRI/SAT/SUN

# "W-MON"指定從星期一開始算起 間隔是每週

pd.date_range("2019/10/1", "2019/11/1", freq = "W-MON")  
DatetimeIndex(['2019-10-07', '2019-10-14', '2019-10-21', '2019-10-28'], dtype='datetime64[ns]', freq='W-MON')
# "WOM-2MON" 指定每月從2第個星期一開始算起 間隔是月

pd.date_range("2019/10/1", "2020/10/1", freq = "WOM-2MON") 
DatetimeIndex(['2019-10-14', '2019-11-11', '2019-12-09', '2020-01-13',
               '2020-02-10', '2020-03-09', '2020-04-13', '2020-05-11',
               '2020-06-08', '2020-07-13', '2020-08-10', '2020-09-14'],
              dtype='datetime64[ns]', freq='WOM-2MON')
# M -- 每月最後一個日曆日

pd.date_range("2019", "2020", freq = "M") 
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')

月份

"""
一月     Jan.     January

二月     Feb.     February

三月     Mar.     March

四月     Apr.     April

五月     May.     May

六月     Jun.     June

七月     Jul.     July

八月     Aug.     August

九月     Sept.    September

十月     Oct.     October

十一月   Nov.     November

十二月   Dec.     December
"""
# Q 每個季度末最後一月的最後一個日曆日

print(pd.date_range("2019", "2020", freq="Q-JAN")) 

print(pd.date_range("2019", "2020", freq="Q-FEB"))

print(pd.date_range("2019", "2020", freq="Q-MAR"))
print("")

# 所以Q-月只有三種情況  1-4-7-10, 2-5-8-11, 3-6-9-12
print(pd.date_range("2019", "2020", freq="Q-APR"))
DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-JAN')
DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-31', '2019-11-30'], dtype='datetime64[ns]', freq='Q-FEB')
DatetimeIndex(['2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'], dtype='datetime64[ns]', freq='Q-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-APR')
# A -- 每年指定月份的最後一個日曆日

print(pd.date_range("2019", "2021", freq="A-JAN"))
print(pd.date_range("2019", "2021", freq="A-FEB"))
print(pd.date_range("2019", "2021", freq="A-DEC"))
DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='A-JAN')
DatetimeIndex(['2019-02-28', '2020-02-29'], dtype='datetime64[ns]', freq='A-FEB')
DatetimeIndex(['2019-12-31', '2020-12-31'], dtype='datetime64[ns]', freq='A-DEC')
# BM - 每月最後一個工作日

print(pd.date_range("2019", "2020", freq="BM"))
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-29', '2019-04-30',
               '2019-05-31', '2019-06-28', '2019-07-31', '2019-08-30',
               '2019-09-30', '2019-10-31', '2019-11-29', '2019-12-31'],
              dtype='datetime64[ns]', freq='BM')
# BQ - 每個季度末最後一月的最後一個工作日

print(pd.date_range("2019", "2021", freq="BQ-JAN"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-FEB"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-MAR"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-APR"))
DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
               '2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
              dtype='datetime64[ns]', freq='BQ-JAN')

DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-30', '2019-11-29',
               '2020-02-28', '2020-05-29', '2020-08-31', '2020-11-30'],
              dtype='datetime64[ns]', freq='BQ-FEB')

DatetimeIndex(['2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31',
               '2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='BQ-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
               '2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
              dtype='datetime64[ns]', freq='BQ-APR')
# BA -- 每年指定月份的最後一個工作日

print(pd.date_range("2019", "2021", freq="BA-JAN"))
print(pd.date_range("2019", "2023", freq="BA-FEB"))
print(pd.date_range("2019", "2021", freq="BA-MAR"))
DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='BA-JAN')
DatetimeIndex(['2019-02-28', '2020-02-28', '2021-02-26', '2022-02-28'], dtype='datetime64[ns]', freq='BA-FEB')
DatetimeIndex(['2019-03-29', '2020-03-31'], dtype='datetime64[ns]', freq='BA-MAR')
# MS -- 每月第一個日曆日

pd.date_range("2019", "2020", freq="MS")
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='MS')
# QS - 每個季度末最後一月的第一個日曆日

print(pd.date_range("2019", "2020", freq="QS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="QS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="QS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="QS-APR"))
DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='QS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='QS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-01', '2019-09-01', '2019-12-01'], dtype='datetime64[ns]', freq='QS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='QS-APR')
# AS -- 每年指定月份的第一個日曆日

print(pd.date_range("2019", "2021", freq="AS-JAN"))
print(pd.date_range("2019", "2021", freq="AS-FEB"))
print(pd.date_range("2019", "2021", freq="AS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='AS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-01'], dtype='datetime64[ns]', freq='AS-FEB')
DatetimeIndex(['2019-12-01', '2020-12-01'], dtype='datetime64[ns]', freq='AS-DEC')
# BMS -- 每月第一個工作日

print(pd.date_range("2019", "2021", freq="BMS"))
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-03', '2019-07-01', '2019-08-01',
               '2019-09-02', '2019-10-01', '2019-11-01', '2019-12-02',
               '2020-01-01', '2020-02-03', '2020-03-02', '2020-04-01',
               '2020-05-01', '2020-06-01', '2020-07-01', '2020-08-03',
               '2020-09-01', '2020-10-01', '2020-11-02', '2020-12-01',
               '2021-01-01'],
              dtype='datetime64[ns]', freq='BMS')
# BQS - 每個季度末最後一月的第一個工作日

print(pd.date_range("2019", "2020", freq="BQS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-APR"))
DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='BQS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='BQS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-03', '2019-09-02', '2019-12-02'], dtype='datetime64[ns]', freq='BQS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='BQS-APR')
# BAS -- 每年指定月份的第一個工作日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')

複合頻率

# 7D 間隔是7天

pd.date_range("2019/10/1", "2019/12/1", freq="7D")
DatetimeIndex(['2019-10-01', '2019-10-08', '2019-10-15', '2019-10-22',
               '2019-10-29', '2019-11-05', '2019-11-12', '2019-11-19',
               '2019-11-26'],
              dtype='datetime64[ns]', freq='7D')
# 2h30min 間隔是2小時30分鐘

pd.date_range("2019/10/1 00:00:00", "2019/10/1 12:00:00", freq="2h30min")
DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 02:30:00',
               '2019-10-01 05:00:00', '2019-10-01 07:30:00',
               '2019-10-01 10:00:00'],
              dtype='datetime64[ns]', freq='150T')
# 2M 每間隔2個月最後一個日曆

pd.date_range("2019", "2021", freq="2M")  
DatetimeIndex(['2019-01-31', '2019-03-31', '2019-05-31', '2019-07-31',
               '2019-09-30', '2019-11-30', '2020-01-31', '2020-03-31',
               '2020-05-31', '2020-07-31', '2020-09-30', '2020-11-30'],
              dtype='datetime64[ns]', freq='2M')

asfreq 時間頻率轉換

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

# 這裏是把D改爲4H
print(ts.asfreq("4H"))
print("\n")

# method 插值模式 ffill 用之前值填充 bfill 用之後值填充
print(ts.asfreq("4H", method="ffill"))  
print("\n")

print(ts.asfreq("4H", method="bfill"))
2019-01-01    0.610403
2019-01-02    0.416557
2019-01-03    0.821631
2019-01-04    0.699457
Freq: D, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00         NaN
2019-01-01 08:00:00         NaN
2019-01-01 12:00:00         NaN
2019-01-01 16:00:00         NaN
2019-01-01 20:00:00         NaN
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00         NaN
2019-01-02 08:00:00         NaN
2019-01-02 12:00:00         NaN
2019-01-02 16:00:00         NaN
2019-01-02 20:00:00         NaN
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00         NaN
2019-01-03 08:00:00         NaN
2019-01-03 12:00:00         NaN
2019-01-03 16:00:00         NaN
2019-01-03 20:00:00         NaN
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00    0.610403
2019-01-01 08:00:00    0.610403
2019-01-01 12:00:00    0.610403
2019-01-01 16:00:00    0.610403
2019-01-01 20:00:00    0.610403
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00    0.416557
2019-01-02 08:00:00    0.416557
2019-01-02 12:00:00    0.416557
2019-01-02 16:00:00    0.416557
2019-01-02 20:00:00    0.416557
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00    0.821631
2019-01-03 08:00:00    0.821631
2019-01-03 12:00:00    0.821631
2019-01-03 16:00:00    0.821631
2019-01-03 20:00:00    0.821631
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00    0.416557
2019-01-01 08:00:00    0.416557
2019-01-01 12:00:00    0.416557
2019-01-01 16:00:00    0.416557
2019-01-01 20:00:00    0.416557
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00    0.821631
2019-01-02 08:00:00    0.821631
2019-01-02 12:00:00    0.821631
2019-01-02 16:00:00    0.821631
2019-01-02 20:00:00    0.821631
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00    0.699457
2019-01-03 08:00:00    0.699457
2019-01-03 12:00:00    0.699457
2019-01-03 16:00:00    0.699457
2019-01-03 20:00:00    0.699457
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64

超前/滯後數據 shift(正數): 數值後移–滯後 ,shift(負數): 數值前移–超前

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

print(ts.shift(1))
print("\n")

print(ts.shift(-2))
print("\n")

# 計算變化百分比 該時間戳的值與上一個時間戳的值相比

per = ts/ts.shift(1)
print(per)
2019-01-01    0.197884
2019-01-02    0.403093
2019-01-03    0.208341
2019-01-04    0.330873
Freq: D, dtype: float64
2019-01-01         NaN
2019-01-02    0.197884
2019-01-03    0.403093
2019-01-04    0.208341
Freq: D, dtype: float64
2019-01-01    0.208341
2019-01-02    0.330873
2019-01-03         NaN
2019-01-04         NaN
Freq: D, dtype: float64
2019-01-01         NaN
2019-01-02    2.037017
2019-01-03    0.516855
2019-01-04    1.588134
Freq: D, dtype: float64

shift(freq) 加上freq參數 對時間戳進行位移 而不是對數值進行位移

print(ts)
print("\n")

print(ts.shift(2, freq="D"))  # 按天
print("\n")

print(ts.shift(2, freq="T"))  # 按分鐘
2019-01-01    0.197884
2019-01-02    0.403093
2019-01-03    0.208341
2019-01-04    0.330873
Freq: D, dtype: float64
2019-01-03    0.197884
2019-01-04    0.403093
2019-01-05    0.208341
2019-01-06    0.330873
Freq: D, dtype: float64
2019-01-01 00:02:00    0.197884
2019-01-02 00:02:00    0.403093
2019-01-03 00:02:00    0.208341
2019-01-04 00:02:00    0.330873
Freq: D, dtype: float64
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章