Scala實現wordcount

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object WordCount {
  def main(args: Array[String]): Unit = {
    val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("WordCount")

    val sc = new SparkContext(config)
    //    println(sc)
    val lines: RDD[String] = sc.textFile("in/word.txt")
    val words: RDD[String] = lines.flatMap(x=>x.split(" "))
    val wordToOne: RDD[(String, Int)] = words.map(x=>(x,1))
    val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey((x,y)=>(x+y))

    val result: Array[(String, Int)] = wordToSum.collect()
    for(word <- result) println(word)
    // lines.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_+_).collect().foreach(println)
  }
}
發佈了33 篇原創文章 · 獲贊 1 · 訪問量 4696
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章