將flink輸出到hdfs的數據壓縮成gzip格式

CompressionOutputStreamWrapper.class

import org.apache.hadoop.io.compress.CompressionOutputStream;

import java.io.Serializable;

public class CompressionOutputStreamWrapper implements Serializable {
    private CompressionOutputStream compressionOutputStream;
    private long pos;

    public CompressionOutputStreamWrapper() {
    }

    public CompressionOutputStreamWrapper(CompressionOutputStream compressionOutputStream, long pos) {
        this.compressionOutputStream = compressionOutputStream;
        this.pos = pos;
    }

    public CompressionOutputStream getCompressionOutputStream() {
        return compressionOutputStream;
    }

    public void setCompressionOutputStream(CompressionOutputStream compressionOutputStream) {
        this.compressionOutputStream = compressionOutputStream;
    }

    public long getPos() {
        return pos;
    }

    public void setPos(long pos) {
        this.pos = pos;
    }

    @Override
    public String toString() {
        return "CompressionOutputStreamWrapper{" +
                "compressionOutputStream=" + compressionOutputStream +
                ", pos=" + pos +
                '}';
    }
}

MyStreamWriterBase.class

import org.apache.flink.streaming.connectors.fs.Writer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.IOException;

public abstract class MyStreamWriterBase<T> implements Writer<T> {

    private static final long serialVersionUID = 2L;

    /**
     * The {@code FSDataOutputStream} for the current part file.
     */
    private transient FSDataOutputStream outStream;
    //
    private transient CompressionOutputStream compressionOutputStream;

    private transient CompressionOutputStreamWrapper compressionOutputStreamWrapper;

    private boolean syncOnFlush;

    private String compressionCodec;

    public MyStreamWriterBase() {
    }

    public MyStreamWriterBase(String compressionCodec) {
        this.compressionCodec = compressionCodec;
    }

    protected MyStreamWriterBase(MyStreamWriterBase<T> other) {
        this.syncOnFlush = other.syncOnFlush;
        this.compressionCodec = other.compressionCodec;
    }

    /**
     * Controls whether to sync {@link FSDataOutputStream} on flush.
     */
    public void setSyncOnFlush(boolean syncOnFlush) {
        this.syncOnFlush = syncOnFlush;
    }

    /**
     * Returns the current output stream, if the stream is open.
     * //
     */

    @Override
    public void open(FileSystem fs, Path path) throws IOException {

        if (outStream != null) {
            throw new IllegalStateException("Writer has already been opened");
        }

        outStream = fs.create(path, false);

        Class<?> codecClass = null;
        try {
            codecClass = Class.forName(compressionCodec);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }

        Configuration conf = fs.getConf();

        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        compressionOutputStream = codec.createOutputStream(outStream);

        compressionOutputStreamWrapper = new CompressionOutputStreamWrapper();
        compressionOutputStreamWrapper.setCompressionOutputStream(compressionOutputStream);
        compressionOutputStreamWrapper.setPos(0);
    }

    @Override
    public long flush() throws IOException {
        if (outStream == null) {
            throw new IllegalStateException("Writer is not open");
        }
        if (!syncOnFlush) {
            compressionOutputStream.flush();
        }

        return compressionOutputStreamWrapper.getPos();
    }

    @Override
    public long getPos() throws IOException {
        if (outStream == null) {
            throw new IllegalStateException("Writer is not open");
        }

        return compressionOutputStreamWrapper.getPos();
    }

    @Override
    public void close() throws IOException {


        if (compressionOutputStream != null) {
            flush();
            compressionOutputStream.close();
            compressionOutputStream = null;
        }

        if (outStream != null) {
            outStream.close();
            outStream = null;
        }
    }

    public boolean isSyncOnFlush() {
        return syncOnFlush;
    }


    protected CompressionOutputStream getCompressionStream() {
        if (compressionOutputStream == null) {
            throw new IllegalStateException("Output stream has not been opened");
        }
        return compressionOutputStream;
    }

    public CompressionOutputStreamWrapper getCompressionOutputStreamWrapper() {
        return compressionOutputStreamWrapper;
    }

    public void setCompressionOutputStreamWrapper(CompressionOutputStreamWrapper compressionOutputStreamWrapper) {
        this.compressionOutputStreamWrapper = compressionOutputStreamWrapper;
    }
}

MyStringWriter.class

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionOutputStream;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;

public class MyStringWriter<T> extends MyStreamWriterBase<T> {
    private static final long serialVersionUID = 1L;

    private String charsetName;

    private transient Charset charset;

    /**
     * Creates a new {@code StringWriter} that uses {@code "UTF-8"} charset to convert
     * strings to bytes.
     */
    public MyStringWriter() {
        this("UTF-8");
    }

    public MyStringWriter(String compressionCodec, String charsetName) {
        super(compressionCodec);
        if(StringUtils.isBlank(charsetName)) {
            this.charsetName = "UTF-8";
        } else {
            this.charsetName = charsetName;
        }
    }

    /**
     * Creates a new {@code StringWriter} that uses the given charset to convert
     * strings to bytes.
     *
     * @param charsetName Name of the charset to be used, must be valid input for {@code Charset.forName(charsetName)}
     */
    public MyStringWriter(String charsetName) {
        this.charsetName = charsetName;
    }

    protected MyStringWriter(MyStringWriter<T> other) {
        super(other);
        this.charsetName = other.charsetName;
    }

    @Override
    public void open(FileSystem fs, Path path) throws IOException {
        super.open(fs, path);

        try {
            this.charset = Charset.forName(charsetName);
        } catch (IllegalCharsetNameException e) {
            throw new IOException("The charset " + charsetName + " is not valid.", e);
        } catch (UnsupportedCharsetException e) {
            throw new IOException("The charset " + charsetName + " is not supported.", e);
        }
    }

    @Override
    public void write(T element) throws IOException {
        BaseRow baseRow = (BaseRow) element;

        CompressionOutputStreamWrapper compressionOutputStreamWrapper = getCompressionOutputStreamWrapper();

        CompressionOutputStream outputStream = compressionOutputStreamWrapper.getCompressionOutputStream();
        byte[] bytes = baseRow.getResult().getBytes(charset);
        outputStream.write(bytes);
        outputStream.write('\n');
        long pos = compressionOutputStreamWrapper.getPos();
        pos += bytes.length + 1;
        compressionOutputStreamWrapper.setPos(pos);
    }

    @Override
    public MyStringWriter<T> duplicate() {
        return new MyStringWriter<>(this);
    }

    String getCharsetName() {
        return charsetName;
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章