springboot遠程連接HDFS-hadoop3.0

一.hadoop前置環境:
hadoop3.0+CDH6.1
這裏默認已經在linux環境配置好;
如何安裝hodoop不在本次範圍內;

我的環境:
win10 + IDEA2019.3+jdk8

二.準備環境:
1.獲取fs.defaultFS鏈接 在core-site.xml裏面
這個等下配置在application.propties裏面
在這裏插入圖片描述
在這裏插入圖片描述
2.下載winutils
https://gitee.com/bochangguan/winutils/tree/master/hadoop-3.0.0/bin
找你對於的hadoop版本下,然後配置環境變量
HADOOP_HOME 指向你的hadoop
PATH添加%HADOOP_HOME%
在這裏插入圖片描述
在這裏插入圖片描述
注意:如果不下載這個winutils,運行會報錯:
java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
配置完,記得重啓IDEA纔可以讀取到環境變量;

3.在hosts文件裏面添加 linux服務器的映射(如果存在解析問題的話)
在這裏插入圖片描述
這個不添加的話,會出現linux的域名本地機器無法解析的情況
java.lang.IllegalArgumentException: java.net.UnknownHostException: xxx.xxx.com

4.非必須;將hadoop.dll複製到C:\Window\System32下
這一個步驟我配置過程是不需要的,如果你出現問題可以配置一下;

三,項目搭建
1.pom:

 <!--  hadoop-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.0.0</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.0.0</version>
        </dependency>

2.配置類


import com.cntaiping.tpi.dmp.util.HdfsUtil;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class HdfsConfig {
    //這裏的hdfs.path就是上面fs.defaultFS鏈接
    @Value("${hdfs.path}")
    private String defaultHdfsUri;

    @Bean
    public HdfsUtil getHbaseService(){
        org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
        //操作文件io,用來讀寫
        //conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
        conf.set("fs.defaultFS",defaultHdfsUri);
        return new HdfsUtil(conf,defaultHdfsUri);
    }
}

在這裏插入圖片描述

3.工具類:

import com.alibaba.fastjson.JSON;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * HDFS相關的基本操作
 *
 * @author adminstrator
 * @since 1.0.0
 */
public class HdfsUtil {

    private Logger logger = LoggerFactory.getLogger(HdfsUtil.class);
    private Configuration conf = null;

    /**
     * 默認的HDFS路徑,比如:hdfs://192.168.197.130:9000
     */
    private String defaultHdfsUri;


    //通過這種方式設置java客戶端身份
    //System.set("HADOOP_USER_NAME", "appuser");
    //FileSystem fs = FileSystem.get(conf);
    //或者使用下面的方式設置客戶端身份
    //FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"),conf,"root");



    public HdfsUtil(Configuration conf, String defaultHdfsUri) {
        this.conf = conf;
        this.defaultHdfsUri = defaultHdfsUri;
    }

    /**
     * 獲取HDFS文件系統
     *
     * @return org.apache.hadoop.fs.FileSystem
     */
    private FileSystem getFileSystem() throws IOException {
        return FileSystem.get(conf);
    }

    /**
     * 創建HDFS目錄
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir
     * @return boolean 是否創建成功
     * @author adminstrator
     * @since 1.0.0
     */
    public boolean mkdir(String path) {
        //如果目錄已經存在,則直接返回
        if (checkExists(path)) {
            return true;
        } else {
            FileSystem fileSystem = null;
            try {
                fileSystem = getFileSystem();
                //最終的HDFS文件目錄
                String hdfsPath = generateHdfsPath(path);
                //創建目錄
                return fileSystem.mkdirs(new Path(hdfsPath));
            } catch (IOException e) {
                logger.error(MessageFormat.format("創建HDFS目錄失敗,path:{0}", path), e);
                return false;
            } finally {
                close(fileSystem);
            }
        }
    }


    /**
     * 上傳文件至HDFS
     *
     * @param srcFile 本地文件路徑,比如:D:/test.txt
     * @param dstPath HDFS的相對目錄路徑,比如:/testDir
     * @author adminstrator
     * @since 1.0.0
     */
    public void uploadFileToHdfs(String srcFile, String dstPath) {
        this.uploadFileToHdfs(false, true, srcFile, dstPath);
    }

    /**
     * 上傳文件至HDFS
     *
     * @param delSrc    是否刪除本地文件
     * @param overwrite 是否覆蓋HDFS上面的文件
     * @param srcFile   本地文件路徑,比如:D:/test.txt
     * @param dstPath   HDFS的相對目錄路徑,比如:/testDir
     * @author adminstrator
     * @since 1.0.0
     */
    public void uploadFileToHdfs(boolean delSrc, boolean overwrite, String srcFile, String dstPath) {
        //源文件路徑
        Path localSrcPath = new Path(srcFile);
        //目標文件路徑
        Path hdfsDstPath = new Path(generateHdfsPath(dstPath));

        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();

            fileSystem.copyFromLocalFile(delSrc, overwrite, localSrcPath, hdfsDstPath);
        } catch (IOException e) {
            logger.error(MessageFormat.format("上傳文件至HDFS失敗,srcFile:{0},dstPath:{1}", srcFile, dstPath), e);
        } finally {
            close(fileSystem);
        }
    }

    /**
     * 判斷文件或者目錄是否在HDFS上面存在
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir、/testDir/a.txt
     * @return boolean
     * @author adminstrator
     * @since 1.0.0
     */
    public boolean checkExists(String path) {
        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();

            //最終的HDFS文件目錄
            String hdfsPath = generateHdfsPath(path);

            //創建目錄
            return fileSystem.exists(new Path(hdfsPath));
        } catch (IOException e) {
            logger.error(MessageFormat.format("'判斷文件或者目錄是否在HDFS上面存在'失敗,path:{0}", path), e);
            return false;
        } finally {
            close(fileSystem);
        }
    }

    /**
     * HDFS創建文件
     * @param path
     * @param file
     * @throws Exception
     */
    public  void createFile(String path, MultipartFile file) throws Exception {
        if (StringUtils.isEmpty(path) || null == file.getBytes()) {
            return;
        }
        String fileName = file.getOriginalFilename();
        FileSystem fs = getFileSystem();
        // 上傳時默認當前目錄,後面自動拼接文件的目錄
        Path newPath = new Path(path + "/" + fileName);
        // 打開一個輸出流
        FSDataOutputStream outputStream = fs.create(newPath);
        outputStream.write(file.getBytes());
        outputStream.close();
        fs.close();
    }


    /**
     * 獲取HDFS上面的某個路徑下面的所有文件或目錄(不包含子目錄)信息
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir
     * @return java.util.List<java.util.Map < java.lang.String, java.lang.Object>>
     * @author adminstrator
     * @since 1.0.0
     */
    public List<Map<String, Object>> listFiles(String path, PathFilter pathFilter) {
        //返回數據
        List<Map<String, Object>> result = new ArrayList<>();

        //如果目錄已經存在,則繼續操作
        if (checkExists(path)) {
            FileSystem fileSystem = null;

            try {
                fileSystem = getFileSystem();

                //最終的HDFS文件目錄
                String hdfsPath = generateHdfsPath(path);

                FileStatus[] statuses;
                //根據Path過濾器查詢
                if (pathFilter != null) {
                    statuses = fileSystem.listStatus(new Path(hdfsPath), pathFilter);
                } else {
                    statuses = fileSystem.listStatus(new Path(hdfsPath));
                }

                if (statuses != null) {
                    for (FileStatus status : statuses) {
                        //每個文件的屬性
                        Map<String, Object> fileMap = new HashMap<>(2);

                        fileMap.put("path", status.getPath().toString());
                        fileMap.put("isDir", status.isDirectory());
                        fileMap.put("fileStatus", status.toString());
                        result.add(fileMap);
                    }
                }
            } catch (IOException e) {
                logger.error(MessageFormat.format("獲取HDFS上面的某個路徑下面的所有文件失敗,path:{0}", path), e);
            } finally {
                close(fileSystem);
            }
        }

        return result;
    }

    /**
     * 讀取HDFS文件內容
     * @param path
     * @return
     * @throws Exception
     */
    public  String readFile(String path) throws Exception {
        if (StringUtils.isEmpty(path)) {
            return null;
        }
        if (!checkExists(path)) {
            return null;
        }
        FileSystem fs = getFileSystem();
        // 目標路徑
        Path srcPath = new Path(path);
        FSDataInputStream inputStream = null;
        try {
            inputStream = fs.open(srcPath);
            // 防止中文亂碼
            BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
            String lineTxt = "";
            StringBuffer sb = new StringBuffer();
            while ((lineTxt = reader.readLine()) != null) {
                sb.append(lineTxt);
            }
            return sb.toString();
        } finally {
            inputStream.close();
            fs.close();
        }
    }


    /**
     * 從HDFS下載文件至本地
     *
     * @param srcFile HDFS的相對目錄路徑,比如:/testDir/a.txt
     * @param dstFile 下載之後本地文件路徑(如果本地文件目錄不存在,則會自動創建),比如:D:/test.txt
     * @author adminstrator
     * @since 1.0.0
     */
    public void downloadFileFromHdfs(String srcFile, String dstFile) {
        //HDFS文件路徑
        Path hdfsSrcPath = new Path(generateHdfsPath(srcFile));
        //下載之後本地文件路徑
        Path localDstPath = new Path(dstFile);

        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();

            fileSystem.copyToLocalFile(hdfsSrcPath, localDstPath);
        } catch (IOException e) {
            logger.error(MessageFormat.format("從HDFS下載文件至本地失敗,srcFile:{0},dstFile:{1}", srcFile, dstFile), e);
        } finally {
            close(fileSystem);
        }
    }

    /**
     * 打開HDFS上面的文件並返回 InputStream
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir/c.txt
     * @return FSDataInputStream
     * @author adminstrator
     * @since 1.0.0
     */
    public FSDataInputStream open(String path) {
        //HDFS文件路徑
        Path hdfsPath = new Path(generateHdfsPath(path));

        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();

            return fileSystem.open(hdfsPath);
        } catch (IOException e) {
            logger.error(MessageFormat.format("打開HDFS上面的文件失敗,path:{0}", path), e);
        }

        return null;
    }

    /**
     * 打開HDFS上面的文件並返回byte數組,方便Web端下載文件
     * <p>new ResponseEntity<byte[]>(byte數組, headers, HttpStatus.CREATED);</p>
     * <p>或者:new ResponseEntity<byte[]>(FileUtils.readFileToByteArray(templateFile), headers, HttpStatus.CREATED);</p>
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir/b.txt
     * @return FSDataInputStream
     * @author adminstrator
     * @since 1.0.0
     */
    public byte[] openWithBytes(String path) {
        //HDFS文件路徑
        Path hdfsPath = new Path(generateHdfsPath(path));

        FileSystem fileSystem = null;
        FSDataInputStream inputStream = null;
        try {
            fileSystem = getFileSystem();
            inputStream = fileSystem.open(hdfsPath);

            return IOUtils.toByteArray(inputStream);
        } catch (IOException e) {
            logger.error(MessageFormat.format("打開HDFS上面的文件失敗,path:{0}", path), e);
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    // ignore
                }
            }
        }

        return null;
    }

    /**
     * 打開HDFS上面的文件並返回String字符串
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir/b.txt
     * @return FSDataInputStream
     * @author adminstrator
     * @since 1.0.0
     */
    public String openWithString(String path) {
        //HDFS文件路徑
        Path hdfsPath = new Path(generateHdfsPath(path));

        FileSystem fileSystem = null;
        FSDataInputStream inputStream = null;
        try {
            fileSystem = getFileSystem();
            inputStream = fileSystem.open(hdfsPath);

            return IOUtils.toString(inputStream, Charset.forName("UTF-8"));
        } catch (IOException e) {
            logger.error(MessageFormat.format("打開HDFS上面的文件失敗,path:{0}", path), e);
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    // ignore
                }
            }
        }

        return null;
    }

    /**
     * 打開HDFS上面的文件並轉換爲Java對象(需要HDFS上門的文件內容爲JSON字符串)
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir/c.txt
     * @return FSDataInputStream
     * @author adminstrator
     * @since 1.0.0
     */
    public <T extends Object> T openWithObject(String path, Class<T> clazz) {
        //1、獲得文件的json字符串
        String jsonStr = this.openWithString(path);

        //2、使用com.alibaba.fastjson.JSON將json字符串轉化爲Java對象並返回
        return JSON.parseObject(jsonStr, clazz);
    }

    /**
     * 重命名
     *
     * @param srcFile 重命名之前的HDFS的相對目錄路徑,比如:/testDir/b.txt
     * @param dstFile 重命名之後的HDFS的相對目錄路徑,比如:/testDir/b_new.txt
     * @author adminstrator
     * @since 1.0.0
     */
    public boolean rename(String srcFile, String dstFile) {
        //HDFS文件路徑
        Path srcFilePath = new Path(generateHdfsPath(srcFile));
        //下載之後本地文件路徑
        Path dstFilePath = new Path(dstFile);

        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();

            return fileSystem.rename(srcFilePath, dstFilePath);
        } catch (IOException e) {
            logger.error(MessageFormat.format("重命名失敗,srcFile:{0},dstFile:{1}", srcFile, dstFile), e);
        } finally {
            close(fileSystem);
        }

        return false;
    }

    /**
     * 刪除HDFS文件或目錄
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir/c.txt
     * @return boolean
     * @author adminstrator
     * @since 1.0.0
     */
    public boolean delete(String path) {
        //HDFS文件路徑
        Path hdfsPath = new Path(generateHdfsPath(path));

        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();

            return fileSystem.delete(hdfsPath, true);
        } catch (IOException e) {
            logger.error(MessageFormat.format("刪除HDFS文件或目錄失敗,path:{0}", path), e);
        } finally {
            close(fileSystem);
        }

        return false;
    }

    /**
     * 獲取某個文件在HDFS集羣的位置
     *
     * @param path HDFS的相對目錄路徑,比如:/testDir/a.txt
     * @return org.apache.hadoop.fs.BlockLocation[]
     * @author adminstrator
     * @since 1.0.0
     */
    public BlockLocation[] getFileBlockLocations(String path) {
        //HDFS文件路徑
        Path hdfsPath = new Path(generateHdfsPath(path));

        FileSystem fileSystem = null;
        try {
            fileSystem = getFileSystem();
            FileStatus fileStatus = fileSystem.getFileStatus(hdfsPath);

            return fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        } catch (IOException e) {
            logger.error(MessageFormat.format("獲取某個文件在HDFS集羣的位置失敗,path:{0}", path), e);
        } finally {
            close(fileSystem);
        }

        return null;
    }


    /**
     * 將相對路徑轉化爲HDFS文件路徑
     *
     * @param dstPath 相對路徑,比如:/data
     * @return java.lang.String
     * @author adminstrator
     * @since 1.0.0
     */
    private String generateHdfsPath(String dstPath) {
        String hdfsPath = defaultHdfsUri;
        if (dstPath.startsWith("/")) {
            hdfsPath += dstPath;
        } else {
            hdfsPath = hdfsPath + "/" + dstPath;
        }

        return hdfsPath;
    }

    /**
     * close方法
     */
    private void close(FileSystem fileSystem) {
        if (fileSystem != null) {
            try {
                fileSystem.close();
            } catch (IOException e) {
                logger.error(e.getMessage());
            }
        }
    }

    /**
     * HDFS文件複製
     * @param sourcePath
     * @param targetPath
     * @throws Exception
     */
    public  void copyFile(String sourcePath, String targetPath) throws Exception {
        if (StringUtils.isEmpty(sourcePath) || StringUtils.isEmpty(targetPath)) {
            return;
        }
        FileSystem fs = getFileSystem();
        // 原始文件路徑
        Path oldPath = new Path(sourcePath);
        // 目標路徑
        Path newPath = new Path(targetPath);

        FSDataInputStream inputStream = null;
        FSDataOutputStream outputStream = null;
        try {
            inputStream = fs.open(oldPath);
            outputStream = fs.create(newPath);

            //Files.copy(inputStream,outputStream);
            IOUtils.copy(inputStream, outputStream);
        } finally {
            inputStream.close();
            outputStream.close();
            fs.close();
        }
    }

}

4.測試類
在實際的過程,可能會有一些日誌衝突,記得排除


import com.cntaiping.tpi.dmp.util.HdfsUtil;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;

/**
 * 測試HDFS的基本操作
 *
 * @author adminstrator
 * @since 1.0.0
 */
@RunWith(SpringJUnit4ClassRunner.class)
@SpringBootTest
@WebAppConfiguration
public class TestHdfs {

    @Autowired
    private HdfsUtil hdfsService;


    @Test
    public void testExist(){
        boolean isExist = hdfsService.checkExists("/use");
        System.out.println(isExist);
    }

    /**
     * 測試創建HDFS目錄
     */
    @Test
    public void testMkdir(){
        boolean result1 = hdfsService.mkdir("/testDir");
        System.out.println("創建結果:" + result1);

        boolean result2 = hdfsService.mkdir("/testDir/subDir");
        System.out.println("創建結果:" + result2);
    }

    /**
     * 測試上傳文件
     */
    @Test
    public void testUploadFile(){
        //測試上傳三個文件
        hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/a.txt","/testDir");
        hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/b.txt","/testDir");

        hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/c.txt","/testDir/subDir");
    }

    /**
     * 測試列出某個目錄下面的文件
     */
    @Test
    public void testListFiles(){
        List<Map<String,Object>> result = hdfsService.listFiles("/testDir",null);

        result.forEach(fileMap -> {
            fileMap.forEach((key,value) -> {
                System.out.println(key + "--" + value);
            });
            System.out.println();
        });
    }

    /**
     * 測試下載文件
     */
    @Test
    public void testDownloadFile(){
        hdfsService.downloadFileFromHdfs("/testDir/a.txt","C:/Users/yanglei/Desktop/test111.txt");
    }

    /**
     * 測試打開HDFS上面的文件
     */
    @Test
    public void testOpen() throws IOException {
        FSDataInputStream inputStream = hdfsService.open("/testDir/a.txt");

        BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
        String line = null;
        while((line = reader.readLine())!=null){
            System.out.println(line);
        }

        reader.close();
    }

    /**
     * 測試打開HDFS上面的文件,並轉化爲Java對象
     */
    @Test
    public void testOpenWithObject() throws IOException {
        //SysUserEntity user = hdfsService.openWithObject("/testDir/b.txt", SysUserEntity.class);
        //System.out.println(user);
    }

    /**
     * 測試重命名
     */
    @Test
    public void testRename(){
        hdfsService.rename("/testDir/b.txt","/testDir/b_new.txt");

        //再次遍歷
        testListFiles();
    }

    /**
     * 測試刪除文件
     */
    @Test
    public void testDelete(){
        hdfsService.delete("/testDir/b_new.txt");

        //再次遍歷
        testListFiles();
    }

    /**
     * 測試獲取某個文件在HDFS集羣的位置
     */
    @Test
    public void testGetFileBlockLocations() throws IOException {
        BlockLocation[] locations = hdfsService.getFileBlockLocations("/testDir/a.txt");

        if(locations != null && locations.length > 0){
            for(BlockLocation location : locations){
                System.out.println(location.getHosts()[0]);
            }
        }
    }
}

參考:
https://www.jianshu.com/p/274dce05c0fe

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章