SpringBoot集成Hadoop系列一 ---- 對HDFS的文件操作

一.對HDFS操作設計以下幾個主要的類:

Configuration:封裝了客戶端或者服務器的配置信息

FileSystem:此類的對象是一個文件系統對象,可以用該對象的一些方法來對文件進行操作通過FileSystem的靜態方法get獲得該對象,例:FileSystem hdfs = FileSystem.get(conf);

FSDataInputStream:這是HDFS中的輸入流,通過由FileSystem的open方法獲取

FSDataOutputStream:這是HDFS中的輸出流,通過由FileSystem的create方法獲取

二.依賴配置

<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.hdfs</groupId>
	<artifactId>HadoopTest</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>HadoopTest</name>
	<url>http://maven.apache.org</url>

	<parent>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-parent</artifactId>
		<version>2.0.0.RELEASE</version>
		<relativePath />
	</parent>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
		<java.version>1.8</java.version>
	</properties>

	<dependencies>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter</artifactId>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>3.1.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>3.1.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>3.1.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-mapreduce-client-core</artifactId>
			<version>3.1.1</version>
		</dependency>
		<dependency>
			<groupId>cn.bestwu</groupId>
			<artifactId>ik-analyzers</artifactId>
			<version>5.1.0</version>
		</dependency>
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<scope>test</scope>
		</dependency>
	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.springframework.boot</groupId>
				<artifactId>spring-boot-maven-plugin</artifactId>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
		</plugins>
	</build>
</project>
# tomcat thread = 200
server.tomcat.max-threads=1000

<!--edit tomcat port-->
server.port=8900
# session time 30
server.session-timeout=60

spring.application.name=hadoop
spring.servlet.multipart.max-file-size=50MB
spring.servlet.multipart.max-request-size=50MB

hdfs.path=hdfs://localhost:9000
hdfs.username=linhaiy
logging.config=classpath:logback.xml

三.HDFS文件操作接口開發

package com.hadoop.config;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;

/**
 * HDFS配置類
 * @author linhaiy
 * @date 2019.05.18
 */
@Configuration
public class HdfsConfig {
    @Value("${hdfs.path}")
    private String path;

    public String getPath() {
        return path;
    }

    public void setPath(String path) {
        this.path = path;
    }
}

package com.hadoop.hdfs.entity;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

/**
 * 用戶實體類
 * @author linhaiy
 * @date 2019.05.18
 */
public class User implements Writable {

	private String username;
	private Integer age;
	private String address;

	public User() {
		super();
		// TODO Auto-generated constructor stub
	}

	public User(String username, Integer age, String address) {
		super();
		this.username = username;
		this.age = age;
		this.address = address;
	}

	@Override
	public void write(DataOutput output) throws IOException {
		// 把對象序列化
		output.writeChars(username);
		output.writeInt(age);
		output.writeChars(address);
	}

	@Override
	public void readFields(DataInput input) throws IOException {
		// 把序列化的對象讀取到內存中
		username = input.readUTF();
		age = input.readInt();
		address = input.readUTF();
	}

	public String getUsername() {
		return username;
	}

	public void setUsername(String username) {
		this.username = username;
	}

	public Integer getAge() {
		return age;
	}

	public void setAge(Integer age) {
		this.age = age;
	}

	public String getAddress() {
		return address;
	}

	public void setAddress(String address) {
		this.address = address;
	}

	@Override
	public String toString() {
		return "User [username=" + username + ", age=" + age + ", address=" + address + "]";
	}

}
package com.hadoop.hdfs.service;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;

import com.hadoop.util.JsonUtil;

@Component
public class HdfsService {

	@Value("${hdfs.path}")
	private String path;
	@Value("${hdfs.username}")
	private String username;

	private static String hdfsPath;
	private static String hdfsName;
	private static final int bufferSize = 1024 * 1024 * 64;

	/**
	 * 獲取HDFS配置信息
	 * @return
	 */
	private static Configuration getConfiguration() {
		Configuration configuration = new Configuration();
		configuration.set("fs.defaultFS", hdfsPath);
		return configuration;
	}

	/**
	 * 獲取HDFS文件系統對象
	 * @return
	 * @throws Exception
	 */
	public static FileSystem getFileSystem() throws Exception {
		// 客戶端去操作hdfs時是有一個用戶身份的,默認情況下hdfs客戶端api會從jvm中獲取一個參數作爲自己的用戶身份
		// DHADOOP_USER_NAME=hadoop
		// 也可以在構造客戶端fs對象時,通過參數傳遞進去
		FileSystem fileSystem = FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
		return fileSystem;
	}

	/**
	 * 在HDFS創建文件夾
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static boolean mkdir(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return false;
		}
		if (existFile(path)) {
			return true;
		}
		FileSystem fs = getFileSystem();
		// 目標路徑
		Path srcPath = new Path(path);
		boolean isOk = fs.mkdirs(srcPath);
		fs.close();
		return isOk;
	}

	/**
	 * 判斷HDFS文件是否存在
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static boolean existFile(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return false;
		}
		FileSystem fs = getFileSystem();
		Path srcPath = new Path(path);
		boolean isExists = fs.exists(srcPath);
		return isExists;
	}

	/**
	 * 讀取HDFS目錄信息
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static List<Map<String, Object>> readPathInfo(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return null;
		}
		if (!existFile(path)) {
			return null;
		}
		FileSystem fs = getFileSystem();
		// 目標路徑
		Path newPath = new Path(path);
		FileStatus[] statusList = fs.listStatus(newPath);
		List<Map<String, Object>> list = new ArrayList<>();
		if (null != statusList && statusList.length > 0) {
			for (FileStatus fileStatus : statusList) {
				Map<String, Object> map = new HashMap<>();
				map.put("filePath", fileStatus.getPath());
				map.put("fileStatus", fileStatus.toString());
				list.add(map);
			}
			return list;
		} else {
			return null;
		}
	}

	/**
	 * HDFS創建文件
	 * @param path
	 * @param file
	 * @throws Exception
	 */
	public static void createFile(String path, MultipartFile file) throws Exception {
		if (StringUtils.isEmpty(path) || null == file.getBytes()) {
			return;
		}
		String fileName = file.getOriginalFilename();
		FileSystem fs = getFileSystem();
		// 上傳時默認當前目錄,後面自動拼接文件的目錄
		Path newPath = new Path(path + "/" + fileName);
		// 打開一個輸出流
		FSDataOutputStream outputStream = fs.create(newPath);
		outputStream.write(file.getBytes());
		outputStream.close();
		fs.close();
	}

	/**
	 * 讀取HDFS文件內容
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static String readFile(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return null;
		}
		if (!existFile(path)) {
			return null;
		}
		FileSystem fs = getFileSystem();
		// 目標路徑
		Path srcPath = new Path(path);
		FSDataInputStream inputStream = null;
		try {
			inputStream = fs.open(srcPath);
			// 防止中文亂碼
			BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
			String lineTxt = "";
			StringBuffer sb = new StringBuffer();
			while ((lineTxt = reader.readLine()) != null) {
				sb.append(lineTxt);
			}
			return sb.toString();
		} finally {
			inputStream.close();
			fs.close();
		}
	}

	/**
	 * 讀取HDFS文件列表
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static List<Map<String, String>> listFile(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return null;
		}
		if (!existFile(path)) {
			return null;
		}

		FileSystem fs = getFileSystem();
		// 目標路徑
		Path srcPath = new Path(path);
		// 遞歸找到所有文件
		RemoteIterator<LocatedFileStatus> filesList = fs.listFiles(srcPath, true);
		List<Map<String, String>> returnList = new ArrayList<>();
		while (filesList.hasNext()) {
			LocatedFileStatus next = filesList.next();
			String fileName = next.getPath().getName();
			Path filePath = next.getPath();
			Map<String, String> map = new HashMap<>();
			map.put("fileName", fileName);
			map.put("filePath", filePath.toString());
			returnList.add(map);
		}
		fs.close();
		return returnList;
	}

	/**
	 * HDFS重命名文件
	 * @param oldName
	 * @param newName
	 * @return
	 * @throws Exception
	 */
	public static boolean renameFile(String oldName, String newName) throws Exception {
		if (StringUtils.isEmpty(oldName) || StringUtils.isEmpty(newName)) {
			return false;
		}
		FileSystem fs = getFileSystem();
		// 原文件目標路徑
		Path oldPath = new Path(oldName);
		// 重命名目標路徑
		Path newPath = new Path(newName);
		boolean isOk = fs.rename(oldPath, newPath);
		fs.close();
		return isOk;
	}

	/**
	 * 刪除HDFS文件
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static boolean deleteFile(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return false;
		}
		if (!existFile(path)) {
			return false;
		}
		FileSystem fs = getFileSystem();
		Path srcPath = new Path(path);
		boolean isOk = fs.deleteOnExit(srcPath);
		fs.close();
		return isOk;
	}

	/**
	 * 上傳HDFS文件
	 * @param path
	 * @param uploadPath
	 * @throws Exception
	 */
	public static void uploadFile(String path, String uploadPath) throws Exception {
		if (StringUtils.isEmpty(path) || StringUtils.isEmpty(uploadPath)) {
			return;
		}
		FileSystem fs = getFileSystem();
		// 上傳路徑
		Path clientPath = new Path(path);
		// 目標路徑
		Path serverPath = new Path(uploadPath);

		// 調用文件系統的文件複製方法,第一個參數是否刪除原文件true爲刪除,默認爲false
		fs.copyFromLocalFile(false, clientPath, serverPath);
		fs.close();
	}

	/**
	 * 下載HDFS文件
	 * @param path
	 * @param downloadPath
	 * @throws Exception
	 */
	public static void downloadFile(String path, String downloadPath) throws Exception {
		if (StringUtils.isEmpty(path) || StringUtils.isEmpty(downloadPath)) {
			return;
		}
		FileSystem fs = getFileSystem();
		// 上傳路徑
		Path clientPath = new Path(path);
		// 目標路徑
		Path serverPath = new Path(downloadPath);

		// 調用文件系統的文件複製方法,第一個參數是否刪除原文件true爲刪除,默認爲false
		fs.copyToLocalFile(false, clientPath, serverPath);
		fs.close();
	}

	/**
	 * HDFS文件複製 
	 * @param sourcePath
	 * @param targetPath
	 * @throws Exception
	 */
	public static void copyFile(String sourcePath, String targetPath) throws Exception {
		if (StringUtils.isEmpty(sourcePath) || StringUtils.isEmpty(targetPath)) {
			return;
		}
		FileSystem fs = getFileSystem();
		// 原始文件路徑
		Path oldPath = new Path(sourcePath);
		// 目標路徑
		Path newPath = new Path(targetPath);

		FSDataInputStream inputStream = null;
		FSDataOutputStream outputStream = null;
		try {
			inputStream = fs.open(oldPath);
			outputStream = fs.create(newPath);

			IOUtils.copyBytes(inputStream, outputStream, bufferSize, false);
		} finally {
			inputStream.close();
			outputStream.close();
			fs.close();
		}
	}

	/**
	 * 打開HDFS上的文件並返回byte數組
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static byte[] openFileToBytes(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return null;
		}
		if (!existFile(path)) {
			return null;
		}
		FileSystem fs = getFileSystem();
		// 目標路徑
		Path srcPath = new Path(path);
		try {
			FSDataInputStream inputStream = fs.open(srcPath);
			return IOUtils.readFullyToByteArray(inputStream);
		} finally {
			fs.close();
		}
	}

	/**
	 * 打開HDFS上的文件並返回java對象
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static <T extends Object> T openFileToObject(String path, Class<T> clazz) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return null;
		}
		if (!existFile(path)) {
			return null;
		}
		String jsonStr = readFile(path);
		return JsonUtil.fromObject(jsonStr, clazz);
	}

	/**
	 * 獲取某個文件在HDFS的集羣位置
	 * @param path
	 * @return
	 * @throws Exception
	 */
	public static BlockLocation[] getFileBlockLocations(String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return null;
		}
		if (!existFile(path)) {
			return null;
		}
		FileSystem fs = getFileSystem();
		// 目標路徑
		Path srcPath = new Path(path);
		FileStatus fileStatus = fs.getFileStatus(srcPath);
		return fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
	}

	@PostConstruct
	public void getPath() {
		hdfsPath = this.path;
	}

	@PostConstruct
	public void getName() {
		hdfsName = this.username;
	}

	public static String getHdfsPath() {
		return hdfsPath;
	}

	public String getUsername() {
		return username;
	}
}
package com.hadoop.hdfs.controller;

import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.BlockLocation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import com.hadoop.hdfs.entity.User;
import com.hadoop.hdfs.service.HdfsService;
import com.hadoop.util.Result;

@RestController
@RequestMapping("/hadoop/hdfs")
public class HdfsAction {

	private static Logger LOGGER = LoggerFactory.getLogger(HdfsAction.class);

	/**
	 * 創建文件夾
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@RequestMapping(value = "mkdir", method = RequestMethod.POST)
	@ResponseBody
	public Result mkdir(@RequestParam("path") String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			LOGGER.debug("請求參數爲空");
			return new Result(Result.FAILURE, "請求參數爲空");
		}
		// 創建空文件夾
		boolean isOk = HdfsService.mkdir(path);
		if (isOk) {
			LOGGER.debug("文件夾創建成功");
			return new Result(Result.SUCCESS, "文件夾創建成功");
		} else {
			LOGGER.debug("文件夾創建失敗");
			return new Result(Result.FAILURE, "文件夾創建失敗");
		}
	}

	/**
	 * 讀取HDFS目錄信息
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/readPathInfo")
	public Result readPathInfo(@RequestParam("path") String path) throws Exception {
		List<Map<String, Object>> list = HdfsService.readPathInfo(path);
		return new Result(Result.SUCCESS, "讀取HDFS目錄信息成功", list);
	}

	/**
	 * 獲取HDFS文件在集羣中的位置
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/getFileBlockLocations")
	public Result getFileBlockLocations(@RequestParam("path") String path) throws Exception {
		BlockLocation[] blockLocations = HdfsService.getFileBlockLocations(path);
		return new Result(Result.SUCCESS, "獲取HDFS文件在集羣中的位置", blockLocations);
	}

	/**
	 * 創建文件
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/createFile")
	public Result createFile(@RequestParam("path") String path, @RequestParam("file") MultipartFile file)
			throws Exception {
		if (StringUtils.isEmpty(path) || null == file.getBytes()) {
			return new Result(Result.FAILURE, "請求參數爲空");
		}
		HdfsService.createFile(path, file);
		return new Result(Result.SUCCESS, "創建文件成功");
	}

	/**
	 * 讀取HDFS文件內容
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/readFile")
	public Result readFile(@RequestParam("path") String path) throws Exception {
		String targetPath = HdfsService.readFile(path);
		return new Result(Result.SUCCESS, "讀取HDFS文件內容", targetPath);
	}

	/**
	 * 讀取HDFS文件轉換成Byte類型
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/openFileToBytes")
	public Result openFileToBytes(@RequestParam("path") String path) throws Exception {
		byte[] files = HdfsService.openFileToBytes(path);
		return new Result(Result.SUCCESS, "讀取HDFS文件轉換成Byte類型", files);
	}

	/**
	 * 讀取HDFS文件裝換成User對象
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/openFileToUser")
	public Result openFileToUser(@RequestParam("path") String path) throws Exception {
		User user = HdfsService.openFileToObject(path, User.class);
		return new Result(Result.SUCCESS, "讀取HDFS文件裝換成User對象", user);
	}

	/**
	 * 讀取文件列表
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/listFile")
	public Result listFile(@RequestParam("path") String path) throws Exception {
		if (StringUtils.isEmpty(path)) {
			return new Result(Result.FAILURE, "請求參數爲空");
		}
		List<Map<String, String>> returnList = HdfsService.listFile(path);
		return new Result(Result.SUCCESS, "讀取文件列表成功", returnList);
	}

	/**
	 * 重命名文件
	 * @param oldName
	 * @param newName
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/renameFile")
	public Result renameFile(@RequestParam("oldName") String oldName, @RequestParam("newName") String newName)
			throws Exception {
		if (StringUtils.isEmpty(oldName) || StringUtils.isEmpty(newName)) {
			return new Result(Result.FAILURE, "請求參數爲空");
		}
		boolean isOk = HdfsService.renameFile(oldName, newName);
		if (isOk) {
			return new Result(Result.SUCCESS, "文件重命名成功");
		} else {
			return new Result(Result.FAILURE, "文件重命名失敗");
		}
	}

	/**
	 * 刪除文件
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/deleteFile")
	public Result deleteFile(@RequestParam("path") String path) throws Exception {
		boolean isOk = HdfsService.deleteFile(path);
		if (isOk) {
			return new Result(Result.SUCCESS, "delete file success");
		} else {
			return new Result(Result.FAILURE, "delete file fail");
		}
	}

	/**
	 * 上傳文件
	 * @param path
	 * @param uploadPath
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/uploadFile")
	public Result uploadFile(@RequestParam("path") String path, @RequestParam("uploadPath") String uploadPath)
			throws Exception {
		HdfsService.uploadFile(path, uploadPath);
		return new Result(Result.SUCCESS, "upload file success");
	}

	/**
	 * 下載文件
	 * @param path
	 * @param downloadPath
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/downloadFile")
	public Result downloadFile(@RequestParam("path") String path, @RequestParam("downloadPath") String downloadPath)
			throws Exception {
		HdfsService.downloadFile(path, downloadPath);
		return new Result(Result.SUCCESS, "download file success");
	}

	/**
	 * HDFS文件複製
	 * @param sourcePath
	 * @param targetPath
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/copyFile")
	public Result copyFile(@RequestParam("sourcePath") String sourcePath, @RequestParam("targetPath") String targetPath)
			throws Exception {
		HdfsService.copyFile(sourcePath, targetPath);
		return new Result(Result.SUCCESS, "copy file success");
	}

	/**
	 * 查看文件是否已存在
	 * @param path
	 * @return
	 * @throws Exception
	 */
	@PostMapping("/existFile")
	public Result existFile(@RequestParam("path") String path) throws Exception {
		boolean isExist = HdfsService.existFile(path);
		return new Result(Result.SUCCESS, "file isExist: " + isExist);
	}
}

四.一些測試結果截圖

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章