scala 字節流讀取hdfs文件遞歸子目錄,同時解決NN standby 切換問題

package processor

import java.io._
import java.util.concurrent.{Executors, ExecutorService}

import Utils.{HDFSUtil, OperaFunc, MysqlUtil}
import org.apache.hadoop.fs.{Path, FSDataInputStream}

import scala.collection.mutable.ListBuffer

/**
  * Created by Victory.John on 2019/1/10.
  *
  */
object MainAccess {
  //訂單
  var orderBodys: ListBuffer[String] = new ListBuffer[String]
  //位置
  var positionBodys: ListBuffer[String] = new ListBuffer[String]
  //騎行軌跡
  var ridingBodys: ListBuffer[String] = new ListBuffer[String]
  var bikeBodys: ListBuffer[String] = new ListBuffer[String]
  var orderPath = ""
  var sb=new StringBuffer()
  var strs:Array[String]=null
  def main(args: Array[String]) {
    
    getData(args(0), orderBodys)

  }

  /**
    * Step 1 初始化FileSystem 實例對象
    *
    * @param path
    */
  def getData(path: String, orders: ListBuffer[String]): Unit = {

    val holder = new ListBuffer[String]
    val pathFullName = path
    HDFSUtil.getFSInstance()
    val listPaths = HDFSUtil.listChildren(pathFullName, holder)
    for (path <- listPaths) {
      println("path",path)
      getFSReadLine(path, orders)
    }
    println("strs length",strs.length)
    for(line<-strs){
      orderBodys+=line
    }
    println("orders length",orderBodys.size)

    HDFSUtil.close()
  }

  /**
    * Step 2 字節流轉字符流轉按行讀取HDFS data
    *
    * @param path
    */
  def getFSReadLine(path: String, order: ListBuffer[String]): Unit = {
    var inputStream: FSDataInputStream = null
    var bufferedReader: BufferedReader = null
    try {
      //獲取到HDFS的輸入流
      inputStream = HDFSUtil.fs.open(new Path(path))
      val buf: Array[Byte] =new Array[Byte](1024)
      var bytesRead: Int = inputStream.read(buf)
      while (bytesRead >= 0) {
        var  s=new String(buf,0,bytesRead);
        sb.append(s)
        bytesRead = inputStream.read(buf)
      }
      strs=sb.toString.split("\n")
      inputStream.close()
    } catch {
      case ex: Exception =>

        println("sorceerror=" + ex.getMessage)
        ex.fillInStackTrace()

    }

  }

  def initData(): Unit = {
    println("數據初始化>>>>>>>>>" + orderPath)
    getData(orderPath, orderBodys)
    println("order類型完成>>>>>>:" + orderBodys.size)
    // getData(orderPath + partitionPath, positionBodys)
    // println("position類型完成>>>>>>:" + positionBodys.size)
    //    getData(orderPath + positionBodys, ridingBodys)
    //    println("riding類型完成>>>>>>:" + ridingBodys.size)
  }
  //getData(path, ty, map)
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章