一.hadoop前置环境:
hadoop3.0+CDH6.1
这里默认已经在linux环境配置好;
如何安装hodoop不在本次范围内;
我的环境:
win10 + IDEA2019.3+jdk8
二.准备环境:
1.获取fs.defaultFS链接 在core-site.xml里面
这个等下配置在application.propties里面
2.下载winutils
https://gitee.com/bochangguan/winutils/tree/master/hadoop-3.0.0/bin
找你对于的hadoop版本下,然后配置环境变量
HADOOP_HOME 指向你的hadoop
PATH添加%HADOOP_HOME%
注意:如果不下载这个winutils,运行会报错:
java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
配置完,记得重启IDEA才可以读取到环境变量;
3.在hosts文件里面添加 linux服务器的映射(如果存在解析问题的话)
这个不添加的话,会出现linux的域名本地机器无法解析的情况
java.lang.IllegalArgumentException: java.net.UnknownHostException: xxx.xxx.com
4.非必须;将hadoop.dll复制到C:\Window\System32下
这一个步骤我配置过程是不需要的,如果你出现问题可以配置一下;
三,项目搭建
1.pom:
<!-- hadoop-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.0.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.0.0</version>
</dependency>
2.配置类
import com.cntaiping.tpi.dmp.util.HdfsUtil;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class HdfsConfig {
//这里的hdfs.path就是上面fs.defaultFS链接
@Value("${hdfs.path}")
private String defaultHdfsUri;
@Bean
public HdfsUtil getHbaseService(){
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
//操作文件io,用来读写
//conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
conf.set("fs.defaultFS",defaultHdfsUri);
return new HdfsUtil(conf,defaultHdfsUri);
}
}
3.工具类:
import com.alibaba.fastjson.JSON;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* HDFS相关的基本操作
*
* @author adminstrator
* @since 1.0.0
*/
public class HdfsUtil {
private Logger logger = LoggerFactory.getLogger(HdfsUtil.class);
private Configuration conf = null;
/**
* 默认的HDFS路径,比如:hdfs://192.168.197.130:9000
*/
private String defaultHdfsUri;
//通过这种方式设置java客户端身份
//System.set("HADOOP_USER_NAME", "appuser");
//FileSystem fs = FileSystem.get(conf);
//或者使用下面的方式设置客户端身份
//FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"),conf,"root");
public HdfsUtil(Configuration conf, String defaultHdfsUri) {
this.conf = conf;
this.defaultHdfsUri = defaultHdfsUri;
}
/**
* 获取HDFS文件系统
*
* @return org.apache.hadoop.fs.FileSystem
*/
private FileSystem getFileSystem() throws IOException {
return FileSystem.get(conf);
}
/**
* 创建HDFS目录
*
* @param path HDFS的相对目录路径,比如:/testDir
* @return boolean 是否创建成功
* @author adminstrator
* @since 1.0.0
*/
public boolean mkdir(String path) {
//如果目录已经存在,则直接返回
if (checkExists(path)) {
return true;
} else {
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
//最终的HDFS文件目录
String hdfsPath = generateHdfsPath(path);
//创建目录
return fileSystem.mkdirs(new Path(hdfsPath));
} catch (IOException e) {
logger.error(MessageFormat.format("创建HDFS目录失败,path:{0}", path), e);
return false;
} finally {
close(fileSystem);
}
}
}
/**
* 上传文件至HDFS
*
* @param srcFile 本地文件路径,比如:D:/test.txt
* @param dstPath HDFS的相对目录路径,比如:/testDir
* @author adminstrator
* @since 1.0.0
*/
public void uploadFileToHdfs(String srcFile, String dstPath) {
this.uploadFileToHdfs(false, true, srcFile, dstPath);
}
/**
* 上传文件至HDFS
*
* @param delSrc 是否删除本地文件
* @param overwrite 是否覆盖HDFS上面的文件
* @param srcFile 本地文件路径,比如:D:/test.txt
* @param dstPath HDFS的相对目录路径,比如:/testDir
* @author adminstrator
* @since 1.0.0
*/
public void uploadFileToHdfs(boolean delSrc, boolean overwrite, String srcFile, String dstPath) {
//源文件路径
Path localSrcPath = new Path(srcFile);
//目标文件路径
Path hdfsDstPath = new Path(generateHdfsPath(dstPath));
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
fileSystem.copyFromLocalFile(delSrc, overwrite, localSrcPath, hdfsDstPath);
} catch (IOException e) {
logger.error(MessageFormat.format("上传文件至HDFS失败,srcFile:{0},dstPath:{1}", srcFile, dstPath), e);
} finally {
close(fileSystem);
}
}
/**
* 判断文件或者目录是否在HDFS上面存在
*
* @param path HDFS的相对目录路径,比如:/testDir、/testDir/a.txt
* @return boolean
* @author adminstrator
* @since 1.0.0
*/
public boolean checkExists(String path) {
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
//最终的HDFS文件目录
String hdfsPath = generateHdfsPath(path);
//创建目录
return fileSystem.exists(new Path(hdfsPath));
} catch (IOException e) {
logger.error(MessageFormat.format("'判断文件或者目录是否在HDFS上面存在'失败,path:{0}", path), e);
return false;
} finally {
close(fileSystem);
}
}
/**
* HDFS创建文件
* @param path
* @param file
* @throws Exception
*/
public void createFile(String path, MultipartFile file) throws Exception {
if (StringUtils.isEmpty(path) || null == file.getBytes()) {
return;
}
String fileName = file.getOriginalFilename();
FileSystem fs = getFileSystem();
// 上传时默认当前目录,后面自动拼接文件的目录
Path newPath = new Path(path + "/" + fileName);
// 打开一个输出流
FSDataOutputStream outputStream = fs.create(newPath);
outputStream.write(file.getBytes());
outputStream.close();
fs.close();
}
/**
* 获取HDFS上面的某个路径下面的所有文件或目录(不包含子目录)信息
*
* @param path HDFS的相对目录路径,比如:/testDir
* @return java.util.List<java.util.Map < java.lang.String, java.lang.Object>>
* @author adminstrator
* @since 1.0.0
*/
public List<Map<String, Object>> listFiles(String path, PathFilter pathFilter) {
//返回数据
List<Map<String, Object>> result = new ArrayList<>();
//如果目录已经存在,则继续操作
if (checkExists(path)) {
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
//最终的HDFS文件目录
String hdfsPath = generateHdfsPath(path);
FileStatus[] statuses;
//根据Path过滤器查询
if (pathFilter != null) {
statuses = fileSystem.listStatus(new Path(hdfsPath), pathFilter);
} else {
statuses = fileSystem.listStatus(new Path(hdfsPath));
}
if (statuses != null) {
for (FileStatus status : statuses) {
//每个文件的属性
Map<String, Object> fileMap = new HashMap<>(2);
fileMap.put("path", status.getPath().toString());
fileMap.put("isDir", status.isDirectory());
fileMap.put("fileStatus", status.toString());
result.add(fileMap);
}
}
} catch (IOException e) {
logger.error(MessageFormat.format("获取HDFS上面的某个路径下面的所有文件失败,path:{0}", path), e);
} finally {
close(fileSystem);
}
}
return result;
}
/**
* 读取HDFS文件内容
* @param path
* @return
* @throws Exception
*/
public String readFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!checkExists(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
FSDataInputStream inputStream = null;
try {
inputStream = fs.open(srcPath);
// 防止中文乱码
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String lineTxt = "";
StringBuffer sb = new StringBuffer();
while ((lineTxt = reader.readLine()) != null) {
sb.append(lineTxt);
}
return sb.toString();
} finally {
inputStream.close();
fs.close();
}
}
/**
* 从HDFS下载文件至本地
*
* @param srcFile HDFS的相对目录路径,比如:/testDir/a.txt
* @param dstFile 下载之后本地文件路径(如果本地文件目录不存在,则会自动创建),比如:D:/test.txt
* @author adminstrator
* @since 1.0.0
*/
public void downloadFileFromHdfs(String srcFile, String dstFile) {
//HDFS文件路径
Path hdfsSrcPath = new Path(generateHdfsPath(srcFile));
//下载之后本地文件路径
Path localDstPath = new Path(dstFile);
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
fileSystem.copyToLocalFile(hdfsSrcPath, localDstPath);
} catch (IOException e) {
logger.error(MessageFormat.format("从HDFS下载文件至本地失败,srcFile:{0},dstFile:{1}", srcFile, dstFile), e);
} finally {
close(fileSystem);
}
}
/**
* 打开HDFS上面的文件并返回 InputStream
*
* @param path HDFS的相对目录路径,比如:/testDir/c.txt
* @return FSDataInputStream
* @author adminstrator
* @since 1.0.0
*/
public FSDataInputStream open(String path) {
//HDFS文件路径
Path hdfsPath = new Path(generateHdfsPath(path));
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
return fileSystem.open(hdfsPath);
} catch (IOException e) {
logger.error(MessageFormat.format("打开HDFS上面的文件失败,path:{0}", path), e);
}
return null;
}
/**
* 打开HDFS上面的文件并返回byte数组,方便Web端下载文件
* <p>new ResponseEntity<byte[]>(byte数组, headers, HttpStatus.CREATED);</p>
* <p>或者:new ResponseEntity<byte[]>(FileUtils.readFileToByteArray(templateFile), headers, HttpStatus.CREATED);</p>
*
* @param path HDFS的相对目录路径,比如:/testDir/b.txt
* @return FSDataInputStream
* @author adminstrator
* @since 1.0.0
*/
public byte[] openWithBytes(String path) {
//HDFS文件路径
Path hdfsPath = new Path(generateHdfsPath(path));
FileSystem fileSystem = null;
FSDataInputStream inputStream = null;
try {
fileSystem = getFileSystem();
inputStream = fileSystem.open(hdfsPath);
return IOUtils.toByteArray(inputStream);
} catch (IOException e) {
logger.error(MessageFormat.format("打开HDFS上面的文件失败,path:{0}", path), e);
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
// ignore
}
}
}
return null;
}
/**
* 打开HDFS上面的文件并返回String字符串
*
* @param path HDFS的相对目录路径,比如:/testDir/b.txt
* @return FSDataInputStream
* @author adminstrator
* @since 1.0.0
*/
public String openWithString(String path) {
//HDFS文件路径
Path hdfsPath = new Path(generateHdfsPath(path));
FileSystem fileSystem = null;
FSDataInputStream inputStream = null;
try {
fileSystem = getFileSystem();
inputStream = fileSystem.open(hdfsPath);
return IOUtils.toString(inputStream, Charset.forName("UTF-8"));
} catch (IOException e) {
logger.error(MessageFormat.format("打开HDFS上面的文件失败,path:{0}", path), e);
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
// ignore
}
}
}
return null;
}
/**
* 打开HDFS上面的文件并转换为Java对象(需要HDFS上门的文件内容为JSON字符串)
*
* @param path HDFS的相对目录路径,比如:/testDir/c.txt
* @return FSDataInputStream
* @author adminstrator
* @since 1.0.0
*/
public <T extends Object> T openWithObject(String path, Class<T> clazz) {
//1、获得文件的json字符串
String jsonStr = this.openWithString(path);
//2、使用com.alibaba.fastjson.JSON将json字符串转化为Java对象并返回
return JSON.parseObject(jsonStr, clazz);
}
/**
* 重命名
*
* @param srcFile 重命名之前的HDFS的相对目录路径,比如:/testDir/b.txt
* @param dstFile 重命名之后的HDFS的相对目录路径,比如:/testDir/b_new.txt
* @author adminstrator
* @since 1.0.0
*/
public boolean rename(String srcFile, String dstFile) {
//HDFS文件路径
Path srcFilePath = new Path(generateHdfsPath(srcFile));
//下载之后本地文件路径
Path dstFilePath = new Path(dstFile);
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
return fileSystem.rename(srcFilePath, dstFilePath);
} catch (IOException e) {
logger.error(MessageFormat.format("重命名失败,srcFile:{0},dstFile:{1}", srcFile, dstFile), e);
} finally {
close(fileSystem);
}
return false;
}
/**
* 删除HDFS文件或目录
*
* @param path HDFS的相对目录路径,比如:/testDir/c.txt
* @return boolean
* @author adminstrator
* @since 1.0.0
*/
public boolean delete(String path) {
//HDFS文件路径
Path hdfsPath = new Path(generateHdfsPath(path));
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
return fileSystem.delete(hdfsPath, true);
} catch (IOException e) {
logger.error(MessageFormat.format("删除HDFS文件或目录失败,path:{0}", path), e);
} finally {
close(fileSystem);
}
return false;
}
/**
* 获取某个文件在HDFS集群的位置
*
* @param path HDFS的相对目录路径,比如:/testDir/a.txt
* @return org.apache.hadoop.fs.BlockLocation[]
* @author adminstrator
* @since 1.0.0
*/
public BlockLocation[] getFileBlockLocations(String path) {
//HDFS文件路径
Path hdfsPath = new Path(generateHdfsPath(path));
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
FileStatus fileStatus = fileSystem.getFileStatus(hdfsPath);
return fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
} catch (IOException e) {
logger.error(MessageFormat.format("获取某个文件在HDFS集群的位置失败,path:{0}", path), e);
} finally {
close(fileSystem);
}
return null;
}
/**
* 将相对路径转化为HDFS文件路径
*
* @param dstPath 相对路径,比如:/data
* @return java.lang.String
* @author adminstrator
* @since 1.0.0
*/
private String generateHdfsPath(String dstPath) {
String hdfsPath = defaultHdfsUri;
if (dstPath.startsWith("/")) {
hdfsPath += dstPath;
} else {
hdfsPath = hdfsPath + "/" + dstPath;
}
return hdfsPath;
}
/**
* close方法
*/
private void close(FileSystem fileSystem) {
if (fileSystem != null) {
try {
fileSystem.close();
} catch (IOException e) {
logger.error(e.getMessage());
}
}
}
/**
* HDFS文件复制
* @param sourcePath
* @param targetPath
* @throws Exception
*/
public void copyFile(String sourcePath, String targetPath) throws Exception {
if (StringUtils.isEmpty(sourcePath) || StringUtils.isEmpty(targetPath)) {
return;
}
FileSystem fs = getFileSystem();
// 原始文件路径
Path oldPath = new Path(sourcePath);
// 目标路径
Path newPath = new Path(targetPath);
FSDataInputStream inputStream = null;
FSDataOutputStream outputStream = null;
try {
inputStream = fs.open(oldPath);
outputStream = fs.create(newPath);
//Files.copy(inputStream,outputStream);
IOUtils.copy(inputStream, outputStream);
} finally {
inputStream.close();
outputStream.close();
fs.close();
}
}
}
4.测试类
在实际的过程,可能会有一些日志冲突,记得排除
import com.cntaiping.tpi.dmp.util.HdfsUtil;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;
/**
* 测试HDFS的基本操作
*
* @author adminstrator
* @since 1.0.0
*/
@RunWith(SpringJUnit4ClassRunner.class)
@SpringBootTest
@WebAppConfiguration
public class TestHdfs {
@Autowired
private HdfsUtil hdfsService;
@Test
public void testExist(){
boolean isExist = hdfsService.checkExists("/use");
System.out.println(isExist);
}
/**
* 测试创建HDFS目录
*/
@Test
public void testMkdir(){
boolean result1 = hdfsService.mkdir("/testDir");
System.out.println("创建结果:" + result1);
boolean result2 = hdfsService.mkdir("/testDir/subDir");
System.out.println("创建结果:" + result2);
}
/**
* 测试上传文件
*/
@Test
public void testUploadFile(){
//测试上传三个文件
hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/a.txt","/testDir");
hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/b.txt","/testDir");
hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/c.txt","/testDir/subDir");
}
/**
* 测试列出某个目录下面的文件
*/
@Test
public void testListFiles(){
List<Map<String,Object>> result = hdfsService.listFiles("/testDir",null);
result.forEach(fileMap -> {
fileMap.forEach((key,value) -> {
System.out.println(key + "--" + value);
});
System.out.println();
});
}
/**
* 测试下载文件
*/
@Test
public void testDownloadFile(){
hdfsService.downloadFileFromHdfs("/testDir/a.txt","C:/Users/yanglei/Desktop/test111.txt");
}
/**
* 测试打开HDFS上面的文件
*/
@Test
public void testOpen() throws IOException {
FSDataInputStream inputStream = hdfsService.open("/testDir/a.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
while((line = reader.readLine())!=null){
System.out.println(line);
}
reader.close();
}
/**
* 测试打开HDFS上面的文件,并转化为Java对象
*/
@Test
public void testOpenWithObject() throws IOException {
//SysUserEntity user = hdfsService.openWithObject("/testDir/b.txt", SysUserEntity.class);
//System.out.println(user);
}
/**
* 测试重命名
*/
@Test
public void testRename(){
hdfsService.rename("/testDir/b.txt","/testDir/b_new.txt");
//再次遍历
testListFiles();
}
/**
* 测试删除文件
*/
@Test
public void testDelete(){
hdfsService.delete("/testDir/b_new.txt");
//再次遍历
testListFiles();
}
/**
* 测试获取某个文件在HDFS集群的位置
*/
@Test
public void testGetFileBlockLocations() throws IOException {
BlockLocation[] locations = hdfsService.getFileBlockLocations("/testDir/a.txt");
if(locations != null && locations.length > 0){
for(BlockLocation location : locations){
System.out.println(location.getHosts()[0]);
}
}
}
}
参考:
https://www.jianshu.com/p/274dce05c0fe