agent.sources = source agent.channels = channel agent.sinks = sink agent.sources.source.type = avro agent.sources.source.bind = 192.168.0.100 agent.sources.source.port = 44444 agent.sources.source.channels = channel agent.sinks.sink.type = org.apache.flume.sink.FileSink agent.sinks.sink.file.path = /data/log/%{dayStr} agent.sinks.sink.file.filePrefix = log-%{hourStr}%{minStr}- agent.sinks.sink.file.txnEventMax = 10000 agent.sinks.sink.file.maxOpenFiles = 5 agent.sinks.sink.channel = channel agent.channels.channel.type = memory agent.channels.channel.capacity = 100000 agent.channels.channel.transactionCapacity = 100000 agent.channels.channel.keep-alive = 60 |
package org.apache.flume.sink;
import java.io.IOException;
import java.util.Calendar;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.formatter.output.BucketPath;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.serialization.EventSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
public class FileSink extends AbstractSink implements Configurable
{
private static final Logger logger =
LoggerFactory
. getLogger(FileSink .class );
private String path ;
private static final String defaultFileName = "FlumeData";
private static final int defaultMaxOpenFiles =
50;
/**
* Default length of time we wait for blocking BucketWriter calls before
* timing out the operation. Intended to prevent server hangs.
*/
private long txnEventMax ;
private FileWriterLinkedHashMap sfWriters ;
private String serializerType ;
private Context serializerContext ;
private boolean needRounding = false;
private int roundUnit =
Calendar.SECOND;
private int roundValue =
1;
private SinkCounter sinkCounter ;
private int maxOpenFiles ;
private ScheduledExecutorService timedRollerPool ;
private long rollInterval ;
@Override
public void configure(Context
context) {
String directory = Preconditions. checkNotNull(
context.getString( "file.path"), "file.path
is required");
String fileName = context.getString( "file.filePrefix", defaultFileName);
this.path =
directory + "/" + fileName;
maxOpenFiles =
context.getInteger("file.maxOpenFiles" ,
defaultMaxOpenFiles);
serializerType =
context.getString("sink.serializer" , "TEXT" );
serializerContext = new Context(
context.getSubProperties(EventSerializer. CTX_PREFIX));
txnEventMax =
context.getLong("file.txnEventMax" , 1l);
if (sinkCounter == null)
{
sinkCounter = new SinkCounter(getName());
}
rollInterval =
context.getLong("file.rollInterval" , 30l);
String rollerName = "hdfs-" +
getName() + "-roll-timer-%d" ;
timedRollerPool =
Executors.newScheduledThreadPool( maxOpenFiles,
new ThreadFactoryBuilder().setNameFormat(rollerName).build());
}
@Override
public Status
process() throws EventDeliveryException {
Channel channel = getChannel();
Transaction transaction = channel.getTransaction();
List<BucketFileWriter> writers = Lists. newArrayList();
transaction.begin();
try {
Event event = null;
int txnEventCount
= 0;
for (txnEventCount
= 0; txnEventCount < txnEventMax; txnEventCount++) {
event = channel.take();
if (event
== null) {
break;
}
// reconstruct the path name by substituting place
holders
String realPath = BucketPath
. escapeString(path,
event.getHeaders(), needRounding,
roundUnit, roundValue );
BucketFileWriter bucketFileWriter = sfWriters.get(realPath);
// we haven't seen this file yet, so open it and
cache the
// handle
if (bucketFileWriter
== null) {
bucketFileWriter = new BucketFileWriter();
bucketFileWriter.open(realPath, serializerType,
serializerContext, rollInterval , timedRollerPool,
sfWriters);
sfWriters.put(realPath,
bucketFileWriter);
}
// track the buckets getting written in this transaction
if (!writers.contains(bucketFileWriter))
{
writers.add(bucketFileWriter);
}
// Write the data to File
bucketFileWriter.append(event);
}
if (txnEventCount
== 0) {
sinkCounter.incrementBatchEmptyCount();
} else if (txnEventCount
== txnEventMax) {
sinkCounter.incrementBatchCompleteCount();
} else {
sinkCounter.incrementBatchUnderflowCount();
}
// flush all pending buckets before committing the transaction
for (BucketFileWriter
bucketFileWriter : writers) {
if (!bucketFileWriter.isBatchComplete())
{
flush(bucketFileWriter);
}
}
transaction.commit();
if (txnEventCount
> 0) {
sinkCounter.addToEventDrainSuccessCount(txnEventCount);
}
if (event
== null) {
return Status.BACKOFF ;
}
return Status.READY ;
} catch (IOException
eIO) {
transaction.rollback();
logger.warn("File
IO error" , eIO);
return Status.BACKOFF ;
} catch (Throwable
th) {
transaction.rollback();
logger.error("process
failed" , th);
if (th instanceof Error)
{
throw (Error)
th;
} else {
throw new EventDeliveryException(th);
}
} finally {
transaction.close();
}
}
private void flush(BucketFileWriter
bucketFileWriter) throws IOException {
bucketFileWriter.flush();
}
@Override
public synchronized void start()
{
super.start();
this.sfWriters = new FileWriterLinkedHashMap(maxOpenFiles);
sinkCounter.start();
}
}
|
package org.apache.flume.sink; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.concurrent.Callable; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.serialization.EventSerializer; import org.apache.flume.serialization.EventSerializerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class BucketFileWriter { private static final Logger logger = LoggerFactory .getLogger(BucketFileWriter.class); private static final String IN_USE_EXT = ".tmp"; /** * This lock ensures that only one thread can open a file at a time. */ private final AtomicLong fileExtensionCounter; private OutputStream outputStream; private EventSerializer serializer; private String filePath; /** * Close the file handle and rename the temp file to the permanent filename. * Safe to call multiple times. Logs HDFSWriter.close() exceptions. * * @throws IOException * On failure to rename if temp file exists. */ public BucketFileWriter() { fileExtensionCounter = new AtomicLong(System.currentTimeMillis()); } public void open(final String filePath, String serializerType, Context serializerContext, final long rollInterval, final ScheduledExecutorService timedRollerPool, final FileWriterLinkedHashMap sfWriters) throws IOException { this.filePath = filePath; File file = new File(filePath + fileExtensionCounter + IN_USE_EXT); file.getParentFile().mkdirs(); outputStream = new BufferedOutputStream(new FileOutputStream(file)); logger.info("filename = " + file.getAbsolutePath()); serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, outputStream); serializer.afterCreate(); if (rollInterval > 0) { Callable<Void> action = new Callable<Void>() { @Override public Void call() throws Exception { logger.debug( "Rolling file ({}): Roll scheduled after {} sec elapsed.", filePath + fileExtensionCounter + IN_USE_EXT, rollInterval); if (sfWriters.containsKey(filePath)) { sfWriters.remove(filePath); } close(); return null; } }; timedRollerPool.schedule(action, rollInterval, TimeUnit.SECONDS); } } public void append(Event event) throws IOException { serializer.write(event); } public boolean isBatchComplete() { return true; } public void flush() throws IOException { serializer.flush(); outputStream.flush(); } /** * Rename bucketPath file from .tmp to permanent location. */ private void renameBucket() { File srcPath = new File(filePath + fileExtensionCounter + IN_USE_EXT); File dstPath = new File(filePath + fileExtensionCounter); if (srcPath.exists()) { srcPath.renameTo(dstPath); logger.info("Renaming " + srcPath + " to " + dstPath); } } public synchronized void close() throws IOException, InterruptedException { if (outputStream != null) { outputStream.flush(); outputStream.close(); } renameBucket(); } } |
package org.apache.flume.sink; import java.io.IOException; import java.util.LinkedHashMap; import java.util.Map.Entry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class FileWriterLinkedHashMap extends LinkedHashMap<String, BucketFileWriter> { private static final Logger logger = LoggerFactory .getLogger(FileWriterLinkedHashMap.class); private static final long serialVersionUID = -7860596835613215998L; private final int maxOpenFiles; public FileWriterLinkedHashMap(int maxOpenFiles) { super(16, 0.75f, true); // stock initial capacity/load, access this.maxOpenFiles = maxOpenFiles; } @Override protected boolean removeEldestEntry(Entry<String, BucketFileWriter> eldest) { if (size() > maxOpenFiles) { // If we have more that max open files, then close the last one // and // return true try { eldest.getValue().close(); } catch (IOException e) { logger.warn(eldest.getKey().toString(), e); } catch (InterruptedException e) { logger.warn(eldest.getKey().toString(), e); Thread.currentThread().interrupt(); } return true; } else { return false; } } } |