Hdfs上文件 讀寫 工具類 Demo

讀取HDFS上文件Demo

package com.utils;

import java.io.IOException;
import java.util.List;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 讀取Hdfs上的文件
 *
 * @author chichuduxing
 * @date 2016年9月19日 下午14:19:14
 */
public class HdfsReader {
	/**
	 * 日誌對象
	 */
	protected static final Logger logger = LoggerFactory.getLogger(HdfsReader.class);

	private FileSystem _fs = null;

	/**
	 * 行讀取器
	 */
	private LineReader _lineReader = null;

	/**
	 * hdfs輸入流
	 */
	private FSDataInputStream _fsInputStream = null;

	/**
	 * 構造函數
	 *
	 * @param fs
	 */
	public HdfsReader(FileSystem fs) {
		this._fs = fs;
	}

	/**
	 * 初始化
	 *
	 * @param file
	 *            要讀取的文件路徑(/tmp/readdemo.txt)
	 * @return
	 */
	public boolean Init(String file) {
		if (null == file || file.isEmpty()) {
			logger.error("file name is null");
			return false;
		}
		try {
			Path file_path = new Path(file);

			if (!_fs.exists(file_path)) {
				logger.error(file + " not exist!");
				return false;
			}

			// 打開數據流
			this._fsInputStream = this._fs.open(file_path);

			this._lineReader = new LineReader(_fsInputStream, _fs.getConf());

			return true;
		} catch (Exception e) {
			logger.error("create line reader failed --" + e.getMessage(), e);
			return false;
		}
	}

	/**
	 * 開始從文件按行讀取數據
	 *
	 * @param dataList
	 *            讀取到的文件信息
	 * @param lineCont
	 *            每次讀取的行數
	 * @return 是否讀取成功
	 */
	public boolean next(List<String> dataList, int lineCont) {
		if (null == this._lineReader || null == dataList) {
			return false;
		}

		Text line = new Text();
		while (dataList.size() < lineCont) {
			try {
				// 文件已經讀完
				if (this._lineReader.readLine(line) <= 0) {
					CloseFileStream();
					break;
				}
			} catch (Exception e) {
				logger.error("read file failed --" + e.getMessage(), e);
				CloseFileStream();
				return false;
			}
			dataList.add(line.toString());
		}
		logger.info("get data count: " + dataList.size());

		return true;
	}

	/**
	 * 關閉文件流和行讀取器
	 */
	private void CloseFileStream() {
		try {
			if (this._fsInputStream != null || this._lineReader != null) {
				_fsInputStream.close();
				_lineReader.close();
			}
		} catch (IOException e) {
			logger.error("CloseFileStream() failed --" + e.getMessage(), e);
		} finally {
			_fsInputStream = null;
			_lineReader = null;
		}
	}
}


寫文件到HDFS上Demo

package com.utils;

import java.io.IOException;
import java.io.OutputStream;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 寫文件到Hdfs上
 *
 * @author chichuduxing
 * @date 2016年9月19日 上午11:19:14
 */

public class HdfsWriter {
	/**
	 * 日誌對象
	 */
	private static final Logger logger = LoggerFactory.getLogger(HdfsWriter.class);

	/**
	 * hdfs 上傳目錄
	 */
	private final String _hdfsOutDirectory;

	/**
	 * 臨時文件
	 */
	private Path _tmpFilePath;

	/**
	 * hdfs 文件句柄
	 */
	private FileSystem _fs = null;

	/**
	 * 寫入文件流 句柄
	 */
	private OutputStream _outputStream = null;

	/**
	 * 是否有寫入數據
	 */
	private boolean _ifWriteData = false;

	/**
	 * @param fs
	 * @param outPath
	 *            輸出文件路徑(/tmp/output/test.txt)
	 */
	public HdfsWriter(FileSystem fs, String outPath) {
		this._hdfsOutDirectory = outPath;
		this._fs = fs;
	}

	/**
	 * 初始化hdfs寫入目錄、句柄
	 *
	 * @return
	 */
	public boolean init() throws Exception {
		logger.info("HdfsWriter.init() start...");
		try {
			// 清理臨時的數據
			InitOutStream();
		} catch (Exception e) {
			logger.error("HdfsLoader.Init() failed.", e);

			return false;
		}
		logger.info("HdfsWriter.init() done...");
		return true;
	}

	/**
	 * 初始化輸入流
	 *
	 * @return
	 */
	private void InitOutStream() throws Exception {
		try {
			// 生成臨時文件
			String tmp_file_name = this._hdfsOutDirectory + ".tmp";
			this._tmpFilePath = new Path(tmp_file_name);

			if (this._fs.exists(this._tmpFilePath)) {
				this._fs.delete(this._tmpFilePath, true);
			}

			if (!this._fs.createNewFile(this._tmpFilePath)) {
				throw new Exception("create tmp hdfs file failed. --" + this._tmpFilePath);
			}
			logger.info("create hdfs tmp file success: " + tmp_file_name);

			// 獲取文件流句柄
			this._outputStream = this._fs.append(this._tmpFilePath);
		} catch (Exception e) {
			this._outputStream = null;
			throw new Exception("HdfsWriter.InitOutStream() failed.", e);
		}
	}

	/**
	 * 寫數據至hdfs
	 *
	 * @param data
	 */
	public void WriteData(String data) {
		if (null == data || data.isEmpty() || null == this._outputStream)
			return;

		// String 轉爲 bytes
		byte[] bcp_bytes = data.toString().getBytes();

		// 寫bcp流數據至hdfs
		if (0 < bcp_bytes.length) {
			try {
				this._outputStream.write(bcp_bytes);
				this._outputStream.flush();

				logger.info("upload bcp data success. --" + this._tmpFilePath.toString());

				this._ifWriteData = true;
			} catch (Exception ex) {
				logger.error("##hdfs write error##: " + ex.getMessage(), ex);

				// 轉移臨時文件
				CloseFileStream();
				try {
					// 重置寫入流
					InitOutStream();
				} catch (Exception e) {
					logger.error(e.getMessage());
				}
			}
		}
	}

	/**
	 * 關閉文件寫入流
	 */
	public void CloseFileStream() {
		try {
			// 關閉文件流
			if (null != this._outputStream) {
				logger.info("close the hdfs file stream.");
				this._outputStream.close();
				this._outputStream = null;
			}
		} catch (Exception e) {
			logger.error("close the hdfs file stream failed,file is:" + this._tmpFilePath, e);
		} finally {
			this._outputStream = null;
		}

		// 轉移臨時文件
		RenameFile();
	}

	/**
	 * 重命名文件
	 */
	private void RenameFile() {
		// 是否有寫入數據,防止出現空文件
		if (!this._ifWriteData)
			return;

		// 重命名文件名
		Path res_path = new Path(this._hdfsOutDirectory);

		try {
			if (this._fs.exists(res_path)) {
				this._fs.delete(res_path, true);
			}
			if (!this._fs.rename(this._tmpFilePath, res_path)) {
				logger.error("rename [" + this._tmpFilePath + "] to [" + res_path + "] failed.");
				return;
			}
		} catch (IOException e) {
			logger.error("rename the hdfs file failed,file is:" + this._tmpFilePath, e);
		}
		logger.info("rename [" + this._tmpFilePath + "] to [" + res_path + "] ok.");
	}
}



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章