def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("SparkKudu")
.enableHiveSupport()
.getOrCreate()
val kuduMaster = "mmcsm2.mm.cn:7051,mmcsm1.mm.cn:7051,mmcscm.mm.cn:7051"
val kuduTableName = "test.test_spark"
val kuduOptions: Map[String, String] = Map(
"kudu.master" -> kuduMaster,
"kudu.table" -> kuduTableName
)
/**
* spark 讀取 kudu
*/
val customerReadDF = spark.sqlContext.read.options(kuduOptions).kudu
customerReadDF.show()
/**
* spark 讀取 hive寫入kudu
*/
val df = spark.table("test.test_spark_hive")
// val pks = Seq("primary", "key").map(_.toLowerCase)
val kuduContext = new KuduContext(kuduMaster, spark.sparkContext)
val res = fmt_df(df)
// 寫入數據
kuduContext.upsertRows(res, kuduTableName)
import org.apache.kudu.client.KuduClient
val client = new KuduClient.KuduClientBuilder("mmcsm2.mm.cn,mmcsm1.mm.cn,mmcscm.mm.cn").defaultAdminOperationTimeoutMs(600000).build
// 創建非空的列
// client.alterTable(kuduTableName, new AlterTableOptions().addColumn("device_id", Type.STRING, ""));
// 創建可爲空的列
// client.alterTable(tableName, new AlterTableOptions().addNullableColumn("site_id", Type.INT64));
// 寫入方式2
//res.write.options(Map("kudu.master" -> kuduMaster , "kudu.table" -> kuduTableName))
// .mode("append").format("org.apache.kudu.spark.kudu").save
}
def fmt_df(df: DataFrame) = {
val new_sch = fmt_schema(df.schema, Seq())
var d = df
new_sch.foreach {
case StructField(name: String, dataType: DataType, _, _) =>
d = d.withColumn(name, col(name).cast(dataType))
}
d
}
def fmt_schema(sch: StructType, pk: Seq[String]) = {
var arr = Array[StructField]()
sch.foreach {
case StructField(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata) =>
val name_ = name.toLowerCase
val dataType_ = dataType match {
case DateType => StringType
case t: DecimalType => StringType
case TimestampType =>StringType
case _ => dataType
}
var nullable_ = nullable
pk.foreach(k => if (k.toLowerCase == name_) nullable_ = false)
arr = arr :+ new StructField(name_, dataType_, nullable_, metadata)
}
StructType(arr)
}
spark讀寫kudu
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.