程序功能:導入頂點以及邊的數據,生成邊RDD和頂點RDD,構建圖
import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
// 字符串轉碼,解決亂碼問題
import java.nio.charset.StandardCharsets
import scala.io.Source
import collection.mutable.ArrayBuffer
object SimpleApp {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
val vertexArr = new ArrayBuffer[(Long, String)]()
val edgeArr = new ArrayBuffer[Edge[String]]()
// 讀入時指定編碼
val sourceV = Source.fromFile("/vagrant/dev_program/spark_test/graphx/python_process_guijing/zygj_vertice_attr.txt", "UTF-8")
val lines = sourceV.getLines()
// var i = 0
while(lines.hasNext){
val pp = lines.next().split("\t")
vertexArr += ((pp(0).toLong, pp(1)))
}
println(vertexArr.length)
val sourceE = Source.fromFile("/vagrant/dev_program/spark_test/graphx/python_process_guijing/zygj_edge_attr.txt", "UTF-8")
val linesE = sourceE.getLines()
while(linesE.hasNext){
val ee = linesE.next().split("\t")
edgeArr += Edge(ee(0).toLong, ee(1).toLong, ee(2))
}
// 創建點RDD
val users: RDD[(VertexId, String)] = sc.parallelize(vertexArr)
// 創建邊RDD
val relationships: RDD[Edge[String]] = sc.parallelize(edgeArr)
// 定義一個默認用戶,避免有不存在用戶的關係
val graph = Graph(users, relationships)
// 輸出Graph的信息
graph.vertices.collect().foreach(println(_))
graph.triplets.map(triplet => triplet.srcAttr + "----->" + triplet.dstAttr + " attr:" + triplet.attr).collect().foreach(println(_))
}
}