SparkSQL保存DataFrame爲CSV文件

 

ReadShipMMSITwo

package com.xtd.file

import java.io.File

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SaveMode, SparkSession}

object ReadShipMMSITwo {

  def main(args: Array[String]): Unit = {

    // sparkSession
    val spark = SparkSession
      .builder()
      .appName("ReadShipMMSITwo ")
      .master("local[*]")
      .config("spark.some.config.option", "some-value")
      .getOrCreate()
    // sparkcontext
    val sc = spark.sparkContext

    // directory
    val dir = "file:///D:/Hadoop/ship/EEship.csv"
    //    val dir = args(1)
    val file = new File(dir)
    val df = spark.read.option("header","true").option("inferSchema","true").csv(dir)
    df.printSchema()
    df.show()
    // regedit
    df.createOrReplaceTempView("route")
    // dataframe
    val MMSIDF = spark.sql("SELECT ship_mobile_nineyard FROM route WHERE ORDER BY ship_mobile_nineyard")
    println(MMSIDF.count())
    // coalesce:specify partitions
    MMSIDF.coalesce(1).write.mode("Append").csv("file:///D:\\Hadoop\\ship\\file")
    // option("header","true")
    MMSIDF.coalesce(1).write.option("header","true").csv("file:///D:\\Hadoop\\ship\\csv")

  }
}

Results File

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章