Flume自帶的本地文件Sink進行存儲即RollingFileSink,其主要的官網參數如下
其中sink.rollInterval表示每個多久另起一個文件,比數據以每小時(sink.rollInterval=3600)寫一個文件,那麼flume就會自啓動起開始每小時生成一個新文件,而且文件的名稱是以時間戳的的方式命名的,非常不直觀友好,比如在2019-12-01 00:00:00開始那第一個文件就是flume-1575129600000(flume-倒是可以通過sink.PathManage.prefix來自定義),這樣的不能一樣一眼看出文件哪個時間段的,比較常用的可以爲flume-2019120100之類的方式,針對這個問題對RollingFileSink進行小小的升級,以實現可以自定義文件命名格式
升級後支持的特性
支持文件名稱格式以yyyyMMddHHmmSS年月日時的進行配置
支持同一個rollInterval內重啓多次,文件名不會衝突
新增參數
sink.rollInterval=3600 --原有參數不變,但是意義略有不同,原先是rollInterval是自啓動程序起文件間隔切換時間。現在表示文件存儲時間單位間隔數,如果想要每十分爲一個間隔(即如flume-2019120110 flume-2019120120 flume-2019120130。。。),則rollInterval=600,
sink.file.name.timeFormat=yyyyMMddHH --文件的命名方式,可以精確到秒,配合rollInterval,
sink.fileMonitor=20 --監控文件名稱切換的時間,(原來這個參數是rollInterval兼任這個意義),例如每20s看下當前時間所屬時間段,如果變換就需要進行創建新文件
工程準備
首先自定義一個Flume-sink工程,將原有的RollingFileSink和PathManager拷貝過來,在pom中引入下面三個工程
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-sdk</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-configuration</artifactId>
<version>1.7.0</version>
</dependency>
代碼更新如下
RollingFileSink 類,變動部分均加上了中文解釋,爲節省篇幅,部分較多未動的代碼有所省略,已經有所註釋,請注意
public class RollingFileSink extends AbstractSink implements Configurable {
private static final Logger logger = LoggerFactory
.getLogger(RollingFileSink.class);
//........省略了類中原有變量,主要的變量定義如下
private int fileMonitor;
private String fileNameTimeFormat;
private PathManager pathController;
private long rollInterval;
//新加了一個全局變量,當前文件名稱
private volatile String currentFile;
public RollingFileSink() {
pathController = new PathManager();
shouldRotate = false;
}
public void configure(Context context) {
String directory = context.getString("sink.directory");
String rollInterval = context.getString("sink.rollInterval");
//.......此處有省略其他參數讀取
/**
* 新增兩個參數,文件名前綴和時間格式,如果不配置時間時間則默認是時間戳的方式
*/
filePrefix=context.getString("sink.file.prefix","flume-");
fileNameTimeFormat=context.getString("sink.file.name.timeFormat","");
if (rollInterval == null) {
this.rollInterval = defaultRollInterval;
} else {
this.rollInterval = Long.parseLong(rollInterval);
}
batchSize = context.getInteger("sink.batchSize", defaultBatchSize);
fileMonitor=context.getInteger("sink.fileMonitor",10);
this.directory = new File(directory);
if (sinkCounter == null) {
sinkCounter = new SinkCounter(getName());
}
}
//啓動函數
public void start() {
logger.info("Starting {}...", this);
sinkCounter.start();
super.start();
pathController.setBaseDirectory(directory);
pathController.setFileNameFormat(fileNameTimeFormat);
pathController.setFilePrefix(filePrefix);
pathController.setRollInterval((int) rollInterval/60);
int fileRoll=fileMonitor;
if (rollInterval > 0) {
rollService = Executors.newScheduledThreadPool(
1,
new ThreadFactoryBuilder().setNameFormat(
"rollingFileSink-roller-" +
Thread.currentThread().getId() + "-%d").build());
//這裏跟原先不同,原先這裏參數直接是rollInterval,現在換成了專門的fileMonitor
rollService.scheduleAtFixedRate(new Runnable() {
public void run() {
File file=pathController.getCurrentFile();
//在這裏獲取當前文件名稱
currentFile=file.getName();
shouldRotate = true;
}
}, fileRoll, fileRoll, TimeUnit.SECONDS);
} else {
logger.info("RollInterval is not valid, file rolling will not happen.");
}
logger.info("RollingFileSink {} started.", getName());
}
//數據處理函數
public Sink.Status process() throws EventDeliveryException {
//獲取當前時間實際應該是那個最新的文件名
String newestFileName=pathController.latestFile();
//如果當前文件名不是當前時間段最新的文件名,則表示需要切換文件了,關閉上一個在寫的文件
if (shouldRotate&&(!currentFile.equals(newestFileName))) {
if (outputStream != null) {
logger.info("Closing file {}", pathController.getCurrentFile());
try {
serializer.flush();
serializer.beforeClose();
outputStream.close();
sinkCounter.incrementConnectionClosedCount();
shouldRotate = false;
} catch (IOException e) {
sinkCounter.incrementConnectionFailedCount();
throw new EventDeliveryException("Unable to rotate file "
+ pathController.getCurrentFile() + " while delivering event", e);
} finally {
serializer = null;
outputStream = null;
}
pathController.rotate();
}
}
//打開新的文件
if (outputStream == null) {
File currentFile = pathController.getCurrentFile();
logger.info("Opening output stream for file {}", currentFile);
try {
outputStream = new BufferedOutputStream(
new FileOutputStream(currentFile));
serializer = EventSerializerFactory.getInstance(
serializerType, serializerContext, outputStream);
serializer.afterCreate();
sinkCounter.incrementConnectionCreatedCount();
} catch (IOException e) {
sinkCounter.incrementConnectionFailedCount();
throw new EventDeliveryException("Failed to open file "
+ pathController.getCurrentFile() + " while delivering event", e);
}
}
Channel channel = getChannel();
Transaction transaction = channel.getTransaction();
Event event = null;
Sink.Status result = Sink.Status.READY;
String content;
try {
/ .........此處代碼跟原先一樣,爲節省篇幅,此處省略
} catch (Exception ex) {
transaction.rollback();
throw new EventDeliveryException("Failed to process transaction", ex);
} finally {
transaction.close();
}
retrn result;
}
@Override
public void stop() {
//...stop()函數不變,此處代碼跟原先一樣,爲節省篇幅,此處省略
}
//...省略其他set方法
}
PathManager 類
public class PathManager {
private static final Logger logger = LoggerFactory
.getLogger(PathManager.class);
private File baseDirectory;
private AtomicInteger fileIndex;
private File currentFile;
private String filePrefix="";
private String fileNameTimeFormat="";
public void setRollInterval(int rollInterval) {
this.rollInterval = rollInterval;
}
private int rollInterval;
public PathManager() {
fileIndex = new AtomicInteger();
}
/**
* 自定義文件命名格式,以時間格式如yyyyMMddHH來進行文件命名,而不是時間戳的方式
* 將原先方法進行修改如下
* @return
*/
public File nextFile() {
String fileName;
if(!fileNameTimeFormat.equals("")){
SimpleDateFormat sdf=new SimpleDateFormat(fileNameTimeFormat);
String startPrefix=sdf.format(new Date());
Calendar calendar=Calendar.getInstance();
int startMinute=calendar.get(Calendar.MINUTE);
int startTime=(startMinute/rollInterval)*rollInterval;
startPrefix=startTime<10?startPrefix+"0"+startTime:startPrefix+startTime;
fileName= filePrefix+startPrefix;
}else{
fileName=filePrefix+getSeriesTimestamp()+"-"+fileIndex.incrementAndGet();
}
currentFile = new File(baseDirectory, fileName);
//如果已經存在文件,則將已有文件重命名,一般用在一個時間內多次重複服務的情況
if(currentFile.exists()){
String newName=fileName+"-"+getRandom();
currentFile.renameTo(new File(baseDirectory,newName));
currentFile = new File(baseDirectory, fileName);
}
return currentFile;
}
/**
* 獲取當前時間點是該是格式的文件名稱
* @return
*/
public String latestFile(){
SimpleDateFormat sdf=new SimpleDateFormat(fileNameTimeFormat);
String startPrefix=sdf.format(new Date());
Calendar calendar=Calendar.getInstance();
int startMinute=calendar.get(Calendar.MINUTE);
int startTime=(startMinute/rollInterval)*rollInterval;
//以每個小時0爲起點,rollInterval一個區間,如rollInterval=15則,文件爲2019120100,2019120115、2019120130、2019120145
startPrefix=startTime<10?startPrefix+"0"+startTime:startPrefix+startTime;
String fileName= filePrefix+startPrefix;
return fileName;
}
public File getCurrentFile() {
if (currentFile == null) {
return nextFile();
}
return currentFile;
}
public void rotate() {
currentFile = null;
}
public File getBaseDirectory() {
return baseDirectory;
}
public void setBaseDirectory(File baseDirectory) {
this.baseDirectory = baseDirectory;
}
private long getSeriesTimestamp() {
return System.currentTimeMillis();
}
public void setFilePrefix(String prefix){
filePrefix=prefix;
}
public void setFileNameFormat(String patten){
fileNameTimeFormat=patten;
}
public AtomicInteger getFileIndex() {
return fileIndex;
}
//獲取一個隨機值
public String getRandom(){
return String.valueOf((int)(1+Math.random()*(1000)));
}
}