import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexRDD}
import org.apache.spark.{SparkConf, SparkContext}
方法一:join
object aggregate_demo2 {
//数据发送 [Int, String, Int]分别代表:[顶点的数据集(初始化完成之后就是距离的数据集),边的数据集(两顶点的关系的名称), 发送的消息的数据类型]
def sendMsg(ec:EdgeContext[Int,String,Int])={
//向每条边的目标顶点发送数据,发送的是 源顶点的src的属性数据+1
ec.sendToDst(ec.srcAttr + 1)
}
//统计每个顶点收到的数据的值, 返回最大的数据值,作为顶点的属性数据
def mergeMsg(a:Int, b:Int):Int={
math.max(a,b)
}
def sumEdgeCount(g:Graph[Int,String], i:Int = 1):Graph[Int,String]={
//执行aggregateMessages,返回顶点数据集
val verts= g.aggregateMessages[Int](sendMsg, mergeMsg)
verts.collect.foreach(println(_))
//创建一个新的图g2,由新生成的顶点数据集和生成前的边的数据集构成(边的数据集是一直不变的)
val g2 = Graph(verts, g.edges)
//然后,将g2和g的顶点数据集join起来,计算两图的顶点属性差值
val check = g2.vertices.join(g.vertices).map(x=> x._2._1 - x._2._2).reduce(_+_)
//递归终止条件,如果差值全部为0,终止递归,如果不为0,用新生成的图继续递归
if (check > 0)
sumEdgeCount(g2)
else
g
}
def main(args: Array[String]): Unit = {
//设置运行环境
val conf = new SparkConf().setAppName("SimpleGraphX").setMaster("local[2]")
val sc = new SparkContext(conf)
sc.setLogLevel("WARN")
//构建图
//创建顶点信息
val myVertices = sc.parallelize(Array(
(1L, "Suan"),
(2L, "David"),
(3L, "Judy"),
(4L, "Mike"),
(5L, "Lisa")
))
//创建边信息
val myEdges = sc.parallelize(Array(
Edge(1L, 2L, "friends"),
Edge(2L, 3L, "friends"),
Edge(3L, 4L, "colleagues"),
Edge(3L, 5L, "friends"),
Edge(4L, 5L, "colleagues")
))
val myGraph = Graph(myVertices, myEdges)
//设置顶点属性, 将所有顶点的属性初始化为 0
val initGraph = myGraph.mapVertices((_,_) => 0)
//用递归算法计算出图中各顶点到id=1顶点的最远距离
sumEdgeCount(initGraph).vertices.collect.foreach(println(_))
}
}
方法二:
object aggregate_demo2 {
def sendMsg(ec:EdgeContext[Int,String,Int])={
ec.sendToDst(ec.srcAttr + 1)
}
def mergeMsg(a:Int, b:Int):Int={
math.max(a,b)
}
//在sumEdgeCount方法中增加一个属性i,i表示的是第几次递归,最大的顶点属性数据 = 递归次数,当最大的顶点属性数据 < 递归次数时,立即返回顶点顶点属性数据
def sumEdgeCount(g:Graph[Int,String], i:Int = 1):Graph[Int,String]={
val verts= g.aggregateMessages[Int](sendMsg, mergeMsg)
val g2 = Graph(verts, g.edges)
val check = g2.vertices.map(x => x._2).max()
if(check >= i)
sumEdgeCount(g2, i+1)
else
g
}
def main(args: Array[String]): Unit = {
//设置运行环境
val conf = new SparkConf().setAppName("SimpleGraphX").setMaster("local[2]")
val sc = new SparkContext(conf)
sc.setLogLevel("WARN")
//构建图
//创建顶点信息
val myVertices = sc.parallelize(Array(
(1L, "Suan"),
(2L, "David"),
(3L, "Judy"),
(4L, "Mike"),
(5L, "Lisa")
))
//创建边信息
val myEdges = sc.parallelize(Array(
Edge(1L, 2L, "friends"),
Edge(2L, 3L, "friends"),
Edge(3L, 4L, "colleagues"),
Edge(3L, 5L, "friends"),
Edge(4L, 5L, "colleagues")
))
val myGraph = Graph(myVertices, myEdges)
//设置顶点属性, 将所有顶点的属性初始化为 0
val initGraph = myGraph.mapVertices((_,_) => 0)
//用递归算法计算出图中各顶点到id=1顶点的最远距离
sumEdgeCount(initGraph,i=1).vertices.collect.foreach(println(_))
}
}