spark on yarn簡單使用

/opt/spark/bin/spark-shell --master yarn  --driver-memory 10g --driver-cores 4 --executor-memory 12g --executor-cores 4   --num-executors 200

 

 

 

 

scala> import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapred.TextOutputFormat

scala> import org.apache.hadoop.io.Text
import org.apache.hadoop.io.Text

scala> import org.apache.hadoop.io.IntWritable
import org.apache.hadoop.io.IntWritable

scala> val counts=sc.textFile("hdfs:///books*").flatMap(line=>line.split("\n")).map(w=>{val x=w.split(",");(x{0}+x{1},1);}).reduceByKey(_+_)
counts: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[16] at reduceByKey at <console>:27

scala>counts.saveAsHadoopFile("/tmp/111lxw1234.com/",classOf[Text],classOf[IntWritable],classOf[TextOutputFormat[Text,IntWritable]])
 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章