源碼:
package com.ydj.sql
import org.apache.spark.sql.SparkSession
import scala.collection.mutable.ArrayBuffer
/**
* @Auther: yuandejin
* @Date: 2019-09-04 13:43
* @Description:
*/
object MapToDataFrame {
def main(args: Array[String]): Unit = {
//1. 構建sparkSession
val sparkSession = SparkSession
.builder()
.appName("MapToDataFrame")
.master("local")
.getOrCreate()
//2. 隱式轉換 rdd轉dataFrame
import sparkSession.implicits._
//3. scala的Map數據結構
val map = Map("aa" -> "aaa", "bb" -> "bbb")
//4. map的所有key
val mk = map.keys
//5. 創建rdd
val rdd = sparkSession.sparkContext.parallelize(Seq(map))
//6. 根據map的key取出所有的值,構建新的rdd,並轉成dataFrame
val frame = rdd.map(x => {
val bb = new ArrayBuffer[String]()
for (k: String <- mk) {
bb.+=(x(k))
}
bb
}).map(x => (x(0), x(1))).toDF("k1", "k2")
//7. 打印
frame.show()
}
}
運行結果: