spark讀寫kudu

 def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .master("local[*]")
      .appName("SparkKudu")
      .enableHiveSupport()
      .getOrCreate()
    val kuduMaster = "mmcsm2.mm.cn:7051,mmcsm1.mm.cn:7051,mmcscm.mm.cn:7051"
    val kuduTableName = "test.test_spark"
    val kuduOptions: Map[String, String] = Map(
      "kudu.master" -> kuduMaster,
      "kudu.table" -> kuduTableName
    )

    /**
      * spark 讀取 kudu
      */
    val customerReadDF = spark.sqlContext.read.options(kuduOptions).kudu
    customerReadDF.show()

    /**
      * spark 讀取 hive寫入kudu
      */
    val df = spark.table("test.test_spark_hive")
   // val pks = Seq("primary", "key").map(_.toLowerCase)
    val kuduContext = new KuduContext(kuduMaster, spark.sparkContext)
    val res = fmt_df(df)
    // 寫入數據
    kuduContext.upsertRows(res, kuduTableName)
    import org.apache.kudu.client.KuduClient
    val client = new KuduClient.KuduClientBuilder("mmcsm2.mm.cn,mmcsm1.mm.cn,mmcscm.mm.cn").defaultAdminOperationTimeoutMs(600000).build


    // 創建非空的列
    // client.alterTable(kuduTableName, new AlterTableOptions().addColumn("device_id", Type.STRING, ""));
    // 創建可爲空的列
   // client.alterTable(tableName, new AlterTableOptions().addNullableColumn("site_id", Type.INT64));
    // 寫入方式2
    //res.write.options(Map("kudu.master" -> kuduMaster , "kudu.table" -> kuduTableName))
    // .mode("append").format("org.apache.kudu.spark.kudu").save
  }

  def fmt_df(df: DataFrame) = {
    val new_sch = fmt_schema(df.schema, Seq())
    var d = df
    new_sch.foreach {
      case StructField(name: String, dataType: DataType, _, _) =>
        d = d.withColumn(name, col(name).cast(dataType))
    }
    d
  }
  def fmt_schema(sch: StructType, pk: Seq[String]) = {
    var arr = Array[StructField]()
    sch.foreach {
      case StructField(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata) =>
        val name_ = name.toLowerCase
        val dataType_ = dataType match {
          case DateType => StringType
          case t: DecimalType => StringType
          case TimestampType =>StringType
          case _ => dataType
        }
        var nullable_ = nullable
        pk.foreach(k => if (k.toLowerCase == name_) nullable_ = false)
        arr = arr :+ new StructField(name_, dataType_, nullable_, metadata)
    }
    StructType(arr)
  }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章