ReadShipMMSITwo
package com.xtd.file
import java.io.File
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SaveMode, SparkSession}
object ReadShipMMSITwo {
def main(args: Array[String]): Unit = {
// sparkSession
val spark = SparkSession
.builder()
.appName("ReadShipMMSITwo ")
.master("local[*]")
.config("spark.some.config.option", "some-value")
.getOrCreate()
// sparkcontext
val sc = spark.sparkContext
// directory
val dir = "file:///D:/Hadoop/ship/EEship.csv"
// val dir = args(1)
val file = new File(dir)
val df = spark.read.option("header","true").option("inferSchema","true").csv(dir)
df.printSchema()
df.show()
// regedit
df.createOrReplaceTempView("route")
// dataframe
val MMSIDF = spark.sql("SELECT ship_mobile_nineyard FROM route WHERE ORDER BY ship_mobile_nineyard")
println(MMSIDF.count())
// coalesce:specify partitions
MMSIDF.coalesce(1).write.mode("Append").csv("file:///D:\\Hadoop\\ship\\file")
// option("header","true")
MMSIDF.coalesce(1).write.option("header","true").csv("file:///D:\\Hadoop\\ship\\csv")
}
}
Results File