利用UDF對dataframe列數據進行修改

/* import org.apache.spark.sql.functions._ val sqlContext = new org.apache.spark.sql.SQLContext(sc) import sqlContext.implicits._ */ /* https://stackoverflow.com/questions/34614239/how-to-apply-a-function-to-a-column-of-a-spark-dataframe https://jaceklaskowski.gitbooks.io/mastering-spark-sql/spark-sql-Column.html https://www.jianshu.com/p/833b72adb2b6 */ import org.apache.spark.sql.functions.udf val df = Seq((1, "jeden"), (2, "dwa"), (3, "jerry"), (0,"tom")).toDF("number", "polish") scala> df.show +------+------+ |number|polish| +------+------+ | 1| jeden| | 2| dwa| | 3| jerry| | 0| tom| +------+------+ val label_class = udf((x:Int) => if(x>0) 1 else 0) scala> df.withColumn("number", label_class($"number")).show +------+------+ |number|polish| +------+------+ | 1| jeden| | 1| dwa| | 1| jerry| | 0| tom| +------+------+ scala> val data = df.withColumn("number", label_class($"number")) data: org.apache.spark.sql.DataFrame = [number: int, polish: string] scala> data res3: org.apache.spark.sql.DataFrame = [number: int, polish: string] scala> data.show +------+------+ |number|polish| +------+------+ | 1| jeden| | 1| dwa| | 1| jerry| | 0| tom| +------+------+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章