数据挖掘:Pandas时间模块管理!

datetime

import numpy as np
import pandas as pd
import datetime
# datetime.date

t = datetime.date.today()  # datetime 模块 date 类 today() 类方法 
print(t, type(t))
print("")

t_str = str(t)
print(t, type(t_str))
2019-05-26 <class 'datetime.date'>

2019-05-26 <class 'str'>
# datetime.datetime

now = datetime.datetime.now()
print(now, type(now))
2019-05-26 16:18:17.612845 <class 'datetime.datetime'>
# datetime.timedelta  时间差 

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)
2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00

pd.Timestamp 跟datetime作用相同 只不过Timestamp是pd模块里的

import numpy as np
import pandas as pd
import datetime
t = datetime.datetime.today()
print(t)
print("")

ts1 = pd.Timestamp(t)
print(ts1)
print("")

ts2 = pd.Timestamp("20171021")
print(ts2)
2020-06-01 14:28:08.656056

2020-06-01 14:28:08.656056

2017-10-21 00:00:00

pd.to_datetime 多个时间数据转换成时间戳索引

time_list = ["20171019", "20181020", "20191021"]

t = pd.to_datetime(time_list)
print(t, type(t))
DatetimeIndex(['2017-10-19', '2018-10-20', '2019-10-21'], dtype='datetime64[ns]', freq=None) <class 'pandas.core.indexes.datetimes.DatetimeIndex'>

如果时间序列里包含非时间的数据 ignore(忽略异常 但不会转换成时间戳索引) coerce(把异常值改为NaT)

time_list1 = ["20171019", "20181020", "bbbb", "20191021"]

t1= pd.to_datetime(time_list1, errors="ignore")
print(t1, type(t1))
print("")

t2 = pd.to_datetime(time_list1, errors="coerce")
print(t2)
Index(['20171019', '20181020', 'bbbb', '20191021'], dtype='object') <class 'pandas.core.indexes.base.Index'>

DatetimeIndex(['2017-10-19', '2018-10-20', 'NaT', '2019-10-21'], dtype='datetime64[ns]', freq=None)

pd.DatetimeIndex() 直接生成时间戳序列

rng = pd.DatetimeIndex(["20160910", "11/06/2017", "20180821", "26/05/2019"])
print(rng)
print(type(rng))
print("")

print(rng[0], type(rng[0]))
DatetimeIndex(['2016-09-10', '2017-11-06', '2018-08-21', '2019-05-26'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

2016-09-10 00:00:00 <class 'pandas._libs.tslibs.timestamps.Timestamp'>
st = pd.Series(np.random.rand(4), index=rng)  # 把时间戳索引当成index
print(st)
2016-09-10    0.835586
2017-11-06    0.223044
2018-08-21    0.950717
2019-05-26    0.013370
dtype: float64

pd.date_range() 生成日期范围

"""
pd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)

该函数主要用于生成一个固定频率的时间索引,在调用构造方法时,必须指定start、end、periods中的两个参数值,否则报错。

start: 开始日期

end: 结束日期

periods:固定时期,取值为整数或None

freq:日期偏移量,取值为string或DateOffset,默认为'D'

normalize:若参数为True表示将start、end参数值正则化到午夜时间戳 0:00:00  默认为False

name:生成时间索引对象的名称,取值为string或None

closed:可以理解成在closed=None情况下返回的结果中,若closed=‘left’表示在返回的结果基础上,再取左开右闭的结果,若closed='right'表示在返回的结果基础上,再取左闭右开的结果

"""
"\npd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)\n\n该函数主要用于生成一个固定频率的时间索引,在调用构造方法时,必须指定start、end、periods中的两个参数值,否则报错。\n\nstart: 开始日期\n\nend: 结束日期\n\nperiods:固定时期,取值为整数或None\n\nfreq:日期偏移量,取值为string或DateOffset,默认为'D'\n\nnormalize:若参数为True表示将start、end参数值正则化到午夜时间戳 0:00:00  默认为False\n\nname:生成时间索引对象的名称,取值为string或None\n\nclosed:可以理解成在closed=None情况下返回的结果中,若closed=‘left’表示在返回的结果基础上,再取左开右闭的结果,若closed='right'表示在返回的结果基础上,再取左闭右开的结果\n\n"

start end

t_index1 = pd.date_range(start="20181018", end="20191021", name="t_index1")
print(t_index1)
DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
               '2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
               '2018-10-26', '2018-10-27',
               ...
               '2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
               '2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
               '2019-10-20', '2019-10-21'],
              dtype='datetime64[ns]', name='t_index1', length=369, freq='D')

periods

t_index2 = pd.date_range(start="20181018", periods=10, name="t_index2")
print(t_index2)
DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
               '2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
               '2018-10-26', '2018-10-27'],
              dtype='datetime64[ns]', name='t_index2', freq='D')
t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)
DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
               '2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
               '2018-10-17', '2018-10-18'],
              dtype='datetime64[ns]', name='t_index3', freq='D')

name normalize

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4")
print(t_index4)
print("\n")

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4", normalize=True)
print(t_index4)
DatetimeIndex(['2019-11-09 16:30:00', '2019-11-10 16:30:00',
               '2019-11-11 16:30:00', '2019-11-12 16:30:00',
               '2019-11-13 16:30:00', '2019-11-14 16:30:00',
               '2019-11-15 16:30:00', '2019-11-16 16:30:00',
               '2019-11-17 16:30:00', '2019-11-18 16:30:00'],
              dtype='datetime64[ns]', name='t_index4', freq='D')
DatetimeIndex(['2019-11-09', '2019-11-10', '2019-11-11', '2019-11-12',
               '2019-11-13', '2019-11-14', '2019-11-15', '2019-11-16',
               '2019-11-17', '2019-11-18'],
              dtype='datetime64[ns]', name='t_index4', freq='D')

closed

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5")
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="left")  # 左闭右开
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="right")  # 左开右闭
print(t_index5)
print("\n")
DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17',
               '2019-09-18'],
              dtype='datetime64[ns]', name='t_index5', freq='D')
DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17'],
              dtype='datetime64[ns]', name='t_index5', freq='D')
DatetimeIndex(['2019-09-11', '2019-09-12', '2019-09-13', '2019-09-14',
               '2019-09-15', '2019-09-16', '2019-09-17', '2019-09-18'],
              dtype='datetime64[ns]', name='t_index5', freq='D')

pd.bdate_range() 默认频率为工作日

t_index6 = pd.bdate_range(start="20191001", end="20191007", name="t_index6")
print(t_index6)
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-07'],
              dtype='datetime64[ns]', name='t_index6', freq='B')

pd.date_range 转换成list 元素为时间戳Timestamp

t_index7_list= pd.date_range(start="20191001", end="20191007", name="t_index7_list")
print(t_index7_list)
print("\n")

t_index7_list= list(pd.date_range(start="20191001", end="20191007", name="t_index7_list"))
print(t_index7_list)
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07'],
              dtype='datetime64[ns]', name='t_index7_list', freq='D')
[Timestamp('2019-10-01 00:00:00', freq='D'), Timestamp('2019-10-02 00:00:00', freq='D'), Timestamp('2019-10-03 00:00:00', freq='D'), Timestamp('2019-10-04 00:00:00', freq='D'), Timestamp('2019-10-05 00:00:00', freq='D'), Timestamp('2019-10-06 00:00:00', freq='D'), Timestamp('2019-10-07 00:00:00', freq='D')]

fred 日期偏移量

# 默认freq = 'D' 每日

pd.date_range("10/1/2019", "2019/10/7")  
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07'],
              dtype='datetime64[ns]', freq='D')
# 'B' 每工作日

pd.date_range("10/01/2019", "10/07/2019", freq = "B")  
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-07'],
              dtype='datetime64[ns]', freq='B')
# H 每小时

pd.date_range("10/01/2019  12:00:00", "10/02/2019 12:00:00", freq = "H")  
DatetimeIndex(['2019-10-01 12:00:00', '2019-10-01 13:00:00',
               '2019-10-01 14:00:00', '2019-10-01 15:00:00',
               '2019-10-01 16:00:00', '2019-10-01 17:00:00',
               '2019-10-01 18:00:00', '2019-10-01 19:00:00',
               '2019-10-01 20:00:00', '2019-10-01 21:00:00',
               '2019-10-01 22:00:00', '2019-10-01 23:00:00',
               '2019-10-02 00:00:00', '2019-10-02 01:00:00',
               '2019-10-02 02:00:00', '2019-10-02 03:00:00',
               '2019-10-02 04:00:00', '2019-10-02 05:00:00',
               '2019-10-02 06:00:00', '2019-10-02 07:00:00',
               '2019-10-02 08:00:00', '2019-10-02 09:00:00',
               '2019-10-02 10:00:00', '2019-10-02 11:00:00',
               '2019-10-02 12:00:00'],
              dtype='datetime64[ns]', freq='H')
 # T/MIN 每分

pd.date_range("10/01/2019 12:10:00" , "10/01/2019 12:30:00", freq = "T") 
DatetimeIndex(['2019-10-01 12:10:00', '2019-10-01 12:11:00',
               '2019-10-01 12:12:00', '2019-10-01 12:13:00',
               '2019-10-01 12:14:00', '2019-10-01 12:15:00',
               '2019-10-01 12:16:00', '2019-10-01 12:17:00',
               '2019-10-01 12:18:00', '2019-10-01 12:19:00',
               '2019-10-01 12:20:00', '2019-10-01 12:21:00',
               '2019-10-01 12:22:00', '2019-10-01 12:23:00',
               '2019-10-01 12:24:00', '2019-10-01 12:25:00',
               '2019-10-01 12:26:00', '2019-10-01 12:27:00',
               '2019-10-01 12:28:00', '2019-10-01 12:29:00',
               '2019-10-01 12:30:00'],
              dtype='datetime64[ns]', freq='T')
# S 每秒

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "S")  
DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 00:00:01',
               '2019-10-01 00:00:02', '2019-10-01 00:00:03',
               '2019-10-01 00:00:04', '2019-10-01 00:00:05',
               '2019-10-01 00:00:06', '2019-10-01 00:00:07',
               '2019-10-01 00:00:08', '2019-10-01 00:00:09',
               '2019-10-01 00:00:10', '2019-10-01 00:00:11',
               '2019-10-01 00:00:12', '2019-10-01 00:00:13',
               '2019-10-01 00:00:14', '2019-10-01 00:00:15',
               '2019-10-01 00:00:16', '2019-10-01 00:00:17',
               '2019-10-01 00:00:18', '2019-10-01 00:00:19',
               '2019-10-01 00:00:20', '2019-10-01 00:00:21',
               '2019-10-01 00:00:22', '2019-10-01 00:00:23',
               '2019-10-01 00:00:24', '2019-10-01 00:00:25',
               '2019-10-01 00:00:26', '2019-10-01 00:00:27',
               '2019-10-01 00:00:28', '2019-10-01 00:00:29',
               '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', freq='S')
# L 每毫秒 (千分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "L")  
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.001000',
               '2019-10-01 00:00:00.002000', '2019-10-01 00:00:00.003000',
               '2019-10-01 00:00:00.004000', '2019-10-01 00:00:00.005000',
               '2019-10-01 00:00:00.006000', '2019-10-01 00:00:00.007000',
               '2019-10-01 00:00:00.008000', '2019-10-01 00:00:00.009000',
               ...
               '2019-10-01 00:00:29.991000', '2019-10-01 00:00:29.992000',
               '2019-10-01 00:00:29.993000', '2019-10-01 00:00:29.994000',
               '2019-10-01 00:00:29.995000', '2019-10-01 00:00:29.996000',
               '2019-10-01 00:00:29.997000', '2019-10-01 00:00:29.998000',
               '2019-10-01 00:00:29.999000',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30001, freq='L')
# U 每微秒 (百万分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百万分之一秒)
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',
               '2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
               '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
               '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
               '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
               ...
               '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
               '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
               '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
               '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
               '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30000001, freq='U')

星期几缩写 – MON/TUE/WED/THU/FRI/SAT/SUN

# "W-MON"指定从星期一开始算起 间隔是每周

pd.date_range("2019/10/1", "2019/11/1", freq = "W-MON")  
DatetimeIndex(['2019-10-07', '2019-10-14', '2019-10-21', '2019-10-28'], dtype='datetime64[ns]', freq='W-MON')
# "WOM-2MON" 指定每月从2第个星期一开始算起 间隔是月

pd.date_range("2019/10/1", "2020/10/1", freq = "WOM-2MON") 
DatetimeIndex(['2019-10-14', '2019-11-11', '2019-12-09', '2020-01-13',
               '2020-02-10', '2020-03-09', '2020-04-13', '2020-05-11',
               '2020-06-08', '2020-07-13', '2020-08-10', '2020-09-14'],
              dtype='datetime64[ns]', freq='WOM-2MON')
# M -- 每月最后一个日历日

pd.date_range("2019", "2020", freq = "M") 
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')

月份

"""
一月     Jan.     January

二月     Feb.     February

三月     Mar.     March

四月     Apr.     April

五月     May.     May

六月     Jun.     June

七月     Jul.     July

八月     Aug.     August

九月     Sept.    September

十月     Oct.     October

十一月   Nov.     November

十二月   Dec.     December
"""
# Q 每个季度末最后一月的最后一个日历日

print(pd.date_range("2019", "2020", freq="Q-JAN")) 

print(pd.date_range("2019", "2020", freq="Q-FEB"))

print(pd.date_range("2019", "2020", freq="Q-MAR"))
print("")

# 所以Q-月只有三种情况  1-4-7-10, 2-5-8-11, 3-6-9-12
print(pd.date_range("2019", "2020", freq="Q-APR"))
DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-JAN')
DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-31', '2019-11-30'], dtype='datetime64[ns]', freq='Q-FEB')
DatetimeIndex(['2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'], dtype='datetime64[ns]', freq='Q-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-APR')
# A -- 每年指定月份的最后一个日历日

print(pd.date_range("2019", "2021", freq="A-JAN"))
print(pd.date_range("2019", "2021", freq="A-FEB"))
print(pd.date_range("2019", "2021", freq="A-DEC"))
DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='A-JAN')
DatetimeIndex(['2019-02-28', '2020-02-29'], dtype='datetime64[ns]', freq='A-FEB')
DatetimeIndex(['2019-12-31', '2020-12-31'], dtype='datetime64[ns]', freq='A-DEC')
# BM - 每月最后一个工作日

print(pd.date_range("2019", "2020", freq="BM"))
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-29', '2019-04-30',
               '2019-05-31', '2019-06-28', '2019-07-31', '2019-08-30',
               '2019-09-30', '2019-10-31', '2019-11-29', '2019-12-31'],
              dtype='datetime64[ns]', freq='BM')
# BQ - 每个季度末最后一月的最后一个工作日

print(pd.date_range("2019", "2021", freq="BQ-JAN"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-FEB"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-MAR"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-APR"))
DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
               '2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
              dtype='datetime64[ns]', freq='BQ-JAN')

DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-30', '2019-11-29',
               '2020-02-28', '2020-05-29', '2020-08-31', '2020-11-30'],
              dtype='datetime64[ns]', freq='BQ-FEB')

DatetimeIndex(['2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31',
               '2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='BQ-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
               '2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
              dtype='datetime64[ns]', freq='BQ-APR')
# BA -- 每年指定月份的最后一个工作日

print(pd.date_range("2019", "2021", freq="BA-JAN"))
print(pd.date_range("2019", "2023", freq="BA-FEB"))
print(pd.date_range("2019", "2021", freq="BA-MAR"))
DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='BA-JAN')
DatetimeIndex(['2019-02-28', '2020-02-28', '2021-02-26', '2022-02-28'], dtype='datetime64[ns]', freq='BA-FEB')
DatetimeIndex(['2019-03-29', '2020-03-31'], dtype='datetime64[ns]', freq='BA-MAR')
# MS -- 每月第一个日历日

pd.date_range("2019", "2020", freq="MS")
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='MS')
# QS - 每个季度末最后一月的第一个日历日

print(pd.date_range("2019", "2020", freq="QS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="QS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="QS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="QS-APR"))
DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='QS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='QS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-01', '2019-09-01', '2019-12-01'], dtype='datetime64[ns]', freq='QS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='QS-APR')
# AS -- 每年指定月份的第一个日历日

print(pd.date_range("2019", "2021", freq="AS-JAN"))
print(pd.date_range("2019", "2021", freq="AS-FEB"))
print(pd.date_range("2019", "2021", freq="AS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='AS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-01'], dtype='datetime64[ns]', freq='AS-FEB')
DatetimeIndex(['2019-12-01', '2020-12-01'], dtype='datetime64[ns]', freq='AS-DEC')
# BMS -- 每月第一个工作日

print(pd.date_range("2019", "2021", freq="BMS"))
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-03', '2019-07-01', '2019-08-01',
               '2019-09-02', '2019-10-01', '2019-11-01', '2019-12-02',
               '2020-01-01', '2020-02-03', '2020-03-02', '2020-04-01',
               '2020-05-01', '2020-06-01', '2020-07-01', '2020-08-03',
               '2020-09-01', '2020-10-01', '2020-11-02', '2020-12-01',
               '2021-01-01'],
              dtype='datetime64[ns]', freq='BMS')
# BQS - 每个季度末最后一月的第一个工作日

print(pd.date_range("2019", "2020", freq="BQS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-APR"))
DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='BQS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='BQS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-03', '2019-09-02', '2019-12-02'], dtype='datetime64[ns]', freq='BQS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='BQS-APR')
# BAS -- 每年指定月份的第一个工作日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')

复合频率

# 7D 间隔是7天

pd.date_range("2019/10/1", "2019/12/1", freq="7D")
DatetimeIndex(['2019-10-01', '2019-10-08', '2019-10-15', '2019-10-22',
               '2019-10-29', '2019-11-05', '2019-11-12', '2019-11-19',
               '2019-11-26'],
              dtype='datetime64[ns]', freq='7D')
# 2h30min 间隔是2小时30分钟

pd.date_range("2019/10/1 00:00:00", "2019/10/1 12:00:00", freq="2h30min")
DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 02:30:00',
               '2019-10-01 05:00:00', '2019-10-01 07:30:00',
               '2019-10-01 10:00:00'],
              dtype='datetime64[ns]', freq='150T')
# 2M 每间隔2个月最后一个日历

pd.date_range("2019", "2021", freq="2M")  
DatetimeIndex(['2019-01-31', '2019-03-31', '2019-05-31', '2019-07-31',
               '2019-09-30', '2019-11-30', '2020-01-31', '2020-03-31',
               '2020-05-31', '2020-07-31', '2020-09-30', '2020-11-30'],
              dtype='datetime64[ns]', freq='2M')

asfreq 时间频率转换

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

# 这里是把D改为4H
print(ts.asfreq("4H"))
print("\n")

# method 插值模式 ffill 用之前值填充 bfill 用之后值填充
print(ts.asfreq("4H", method="ffill"))  
print("\n")

print(ts.asfreq("4H", method="bfill"))
2019-01-01    0.610403
2019-01-02    0.416557
2019-01-03    0.821631
2019-01-04    0.699457
Freq: D, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00         NaN
2019-01-01 08:00:00         NaN
2019-01-01 12:00:00         NaN
2019-01-01 16:00:00         NaN
2019-01-01 20:00:00         NaN
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00         NaN
2019-01-02 08:00:00         NaN
2019-01-02 12:00:00         NaN
2019-01-02 16:00:00         NaN
2019-01-02 20:00:00         NaN
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00         NaN
2019-01-03 08:00:00         NaN
2019-01-03 12:00:00         NaN
2019-01-03 16:00:00         NaN
2019-01-03 20:00:00         NaN
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00    0.610403
2019-01-01 08:00:00    0.610403
2019-01-01 12:00:00    0.610403
2019-01-01 16:00:00    0.610403
2019-01-01 20:00:00    0.610403
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00    0.416557
2019-01-02 08:00:00    0.416557
2019-01-02 12:00:00    0.416557
2019-01-02 16:00:00    0.416557
2019-01-02 20:00:00    0.416557
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00    0.821631
2019-01-03 08:00:00    0.821631
2019-01-03 12:00:00    0.821631
2019-01-03 16:00:00    0.821631
2019-01-03 20:00:00    0.821631
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00    0.416557
2019-01-01 08:00:00    0.416557
2019-01-01 12:00:00    0.416557
2019-01-01 16:00:00    0.416557
2019-01-01 20:00:00    0.416557
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00    0.821631
2019-01-02 08:00:00    0.821631
2019-01-02 12:00:00    0.821631
2019-01-02 16:00:00    0.821631
2019-01-02 20:00:00    0.821631
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00    0.699457
2019-01-03 08:00:00    0.699457
2019-01-03 12:00:00    0.699457
2019-01-03 16:00:00    0.699457
2019-01-03 20:00:00    0.699457
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64

超前/滞后数据 shift(正数): 数值后移–滞后 ,shift(负数): 数值前移–超前

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

print(ts.shift(1))
print("\n")

print(ts.shift(-2))
print("\n")

# 计算变化百分比 该时间戳的值与上一个时间戳的值相比

per = ts/ts.shift(1)
print(per)
2019-01-01    0.197884
2019-01-02    0.403093
2019-01-03    0.208341
2019-01-04    0.330873
Freq: D, dtype: float64
2019-01-01         NaN
2019-01-02    0.197884
2019-01-03    0.403093
2019-01-04    0.208341
Freq: D, dtype: float64
2019-01-01    0.208341
2019-01-02    0.330873
2019-01-03         NaN
2019-01-04         NaN
Freq: D, dtype: float64
2019-01-01         NaN
2019-01-02    2.037017
2019-01-03    0.516855
2019-01-04    1.588134
Freq: D, dtype: float64

shift(freq) 加上freq参数 对时间戳进行位移 而不是对数值进行位移

print(ts)
print("\n")

print(ts.shift(2, freq="D"))  # 按天
print("\n")

print(ts.shift(2, freq="T"))  # 按分钟
2019-01-01    0.197884
2019-01-02    0.403093
2019-01-03    0.208341
2019-01-04    0.330873
Freq: D, dtype: float64
2019-01-03    0.197884
2019-01-04    0.403093
2019-01-05    0.208341
2019-01-06    0.330873
Freq: D, dtype: float64
2019-01-01 00:02:00    0.197884
2019-01-02 00:02:00    0.403093
2019-01-03 00:02:00    0.208341
2019-01-04 00:02:00    0.330873
Freq: D, dtype: float64
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章