# -*- coding: utf-8 -*-'''
# Created on 八月-23-19 11:21
# test2.py
# @author: zhugelaoliu
# @DESC: zhugelaoliu
'''"""
有個超大的目錄,其中都是想要處理的日誌文件
"""import os
import fnmatch
import gzip
import bz2
import re
defgen_find(filepath, top):"""
find all filenames in directory tree that match a shell wildcard pattern
查找目錄樹中與shell通配符模式匹配的所有文件名
"""for path, dirlist, filelist in os.walk(top):for name in fnmatch.filter(filelist, filepath):yield os.path.join(path, name)defgen_opener(filenames):"""
open a sequence of filenames one at a time producting a file object.
the file is closed immediately when proceeding to the next iteration.
生成一個文件對象,一次打開一個文件名序列。
進行下一次迭代時,文件立即關閉。
"""for filename in filenames:if filename.endswith('.gz'):
f = gzip.open(filename,'rt')elif filename.endswith('.bz2'):
f = bz2.open(filename,'rt')else:
f =open(filename,'rt')yield f
f.close()defgen_concatenate(iterators):"""
chain a sequence of iterators together into a single sequence
將一系列迭代器鏈接在一起形成一個序列
"""for it in iterators:yieldfrom it
defgen_grep(pattern, lines):"""
look for a regex pattern in a sequence of lines
在一系列行中尋找正則表達式模式
"""
pat = re.compile(pattern)for line in lines:if pat.search(line):yield line
擴展
使用場景擴展:
解析、讀取實時的數據源、定期輪詢等
重點理解 gen_concatenate函數中的yield from it, 這是一個子生成器語句,
扁平化處理嵌套型的序列(推薦使用yield from 關鍵字
from collections import Iterable
defflatten(items, ignore_types=(str,bytes)):"""
這個函數的通用性非常高
"""for x in items:ifisinstance(x, Iterable)andnotisinstance(x, ignore_types):yieldfrom flatten(x)else:yield x
items =[1,2,[11,22,[111,222,[1111,2222]]]]for x in f(items):print(x)