scala中HDFS文件操作,hive表操作

file:///home/text1.txt 則從本地讀

hdfs://clusterA/direct1/text1.txt 從集羣上讀

import java.io.OutputStreamWriter

 

 

/**
  * 讀取hdfs文件
  *
  * @param aPath 要讀取的文件路徑,如hdfs://clusterA/direct1/text1.txt"
  * @return
  */

val sc = SparkSession.builder().enableHiveSupport().getOrCreate()
val pathArr = aPath.split("//")
    val uri = pathArr.head + "//" + pathArr(1).split("/").head
    val path = new Path(aPath)
    val hdfs = org.apache.hadoop.fs.FileSystem.get(
        new java.net.URI(uri),
        new org.apache.hadoop.conf.Configuration())
    if (hdfs.exists(path))
           val outpu=sc.sparkContext.textFile(aPath).collect()

 


/**
  * 刪除hdfs目錄
  *
  * @param aPath 要刪除的路徑,如hdfs://clusterA/direct1"
  * @return
  */
def deleteHdfsPath(aPath: String) = {
    val pathArr = aPath.split("//")
    val uri = pathArr.head + "//" + pathArr(1).split("/").head
    val path = new Path(aPath)
    val hdfs = org.apache.hadoop.fs.FileSystem.get(
        new java.net.URI(uri),
        new org.apache.hadoop.conf.Configuration())
    if (hdfs.exists(path))
        hdfs.delete(path, true)
}

/**
  * 獲取文本變更時間
  *
  * @param aPath 要獲取的文本,如hdfs://clusterA/direct1/file.txt"
  * @return
  */
def getMdfTime(aPath: String) = {
    val pathArr = aPath.split("//")
    val uri = pathArr.head + "//" + pathArr(1).split("/").head
    val path = new Path(aPath)
    val hdfs = org.apache.hadoop.fs.FileSystem.get(
        new java.net.URI(uri),
        new org.apache.hadoop.conf.Configuration())
    if (hdfs.exists(path))
       { val fileSt = hdfs.getFileStatus(new Path(path))
         val modTime = fileSt.getModificationTime.toString\\毫秒時間戳 Long型
       }
}

 

/**
  * 支持中文的hdfs文件寫入-覆蓋模式
  *
  * @param aPath 要寫如的文本路徑,如hdfs://clusterA/direct1/file.txt"
  * @param content 要寫入的文本
  * @return
  */

  def writeOverwrite(aPath: String, content: Iterator[String], hdfs: FileSystem) = {
    //支持中文
    val path = new Path(aPath)
    val out = new OutputStreamWriter(hdfs.create(path, true)) //true則overwrite,false的話路徑存在會報錯
    content.foreach(str => out.write(str + "\n"))
    out.flush()
    out.close()
  }
/**
  * 支持中文的hdfs文件寫入-追加模式
  *
  * @param aPath 要寫如的文本路徑,如hdfs://clusterA/direct1/file.txt"
  * @param content 要寫入的文本
  * @return
  */
   
 def writeAppend( filename : String, content : Iterator[ String ],hdfs:FileSystem ) = {
      //支持中文
      val path = new Path( filename )
      var fileOutputStream:FSDataOutputStream= null
      try
        if (hdfs.exists(path)) {
          fileOutputStream = hdfs.append(path)
          content.foreach(x=>fileOutputStream.write((x+"\n").getBytes()))
        }
        else {
          fileOutputStream = hdfs.create(path)
          content.foreach(x=>fileOutputStream.write((x+"\n").getBytes()))
        }
      finally {
        if (fileOutputStream != null) fileOutputStream.close
      }
    }

 


/**
  * 保存dataframe到指定table
  *
  * @param aPath 
  * @return
  */
 def saveDF(sqlContext: HiveContext, tableNme: String, hdfsPath: String, DF: DataFrame,  day: String, hour: String, numPartitions: Int): Unit = {

      //方法1
        val savePath = hdfsPath + "/pt_d=" + day + "/pt_h=" + hour
        val sqlcode = "alter table " + tableNme + " add if not exists partition (pt_d='" + day + "', pt_h='" + hour + "')" 
        //覆寫模式       
        funDeleteHdfsPath(savePath)//先刪除路徑下文件
        DF.repartition(numPartitions).write.format("orc").save(savePath)
        //追加模式//DF.repartition(numPartitions).write.mode("append").format("orc").save(savePath)
        sqlContext.sql("use biads")
        sqlContext.sql(sqlcode)
     
 //方法2
    val sc = SparkSession.builder().enableHiveSupport().getOrCreate()
    DF.createOrReplaceTempView(tempViewName)
    sc.sql(s"INSERT OVERWRITE TABLE biads.${logTableName}  PARTITION (pt_d='${timeD}',pt_h='${timeH}',pt_channel='${pt_channel}') select * from ${tempViewName}")//若想追加寫表則,OVERWRITE改爲 INTO
        outString
}


 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章