spark-GraphX 图像的聚合操作aggregate （计算图中各节点到根节点的最远距离）

原創

2020-04-20 08:36

import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexRDD}
import org.apache.spark.{SparkConf, SparkContext}

方法一：join

object aggregate_demo2 {

  //数据发送   [Int, String, Int]分别代表：[顶点的数据集（初始化完成之后就是距离的数据集），边的数据集（两顶点的关系的名称）， 发送的消息的数据类型]
 
    def sendMsg(ec:EdgeContext[Int,String,Int])={
    //向每条边的目标顶点发送数据，发送的是 源顶点的src的属性数据+1
    ec.sendToDst(ec.srcAttr + 1)
  }
   //统计每个顶点收到的数据的值， 返回最大的数据值，作为顶点的属性数据
  def mergeMsg(a:Int, b:Int):Int={
    math.max(a,b)
  }

  def sumEdgeCount(g:Graph[Int,String], i:Int = 1):Graph[Int,String]={
    //执行aggregateMessages，返回顶点数据集
     val verts= g.aggregateMessages[Int](sendMsg, mergeMsg)
     verts.collect.foreach(println(_))
    //创建一个新的图g2，由新生成的顶点数据集和生成前的边的数据集构成（边的数据集是一直不变的）
    val g2 = Graph(verts, g.edges)
    //然后，将g2和g的顶点数据集join起来，计算两图的顶点属性差值
    val check = g2.vertices.join(g.vertices).map(x=> x._2._1 - x._2._2).reduce(_+_)

//递归终止条件，如果差值全部为0，终止递归，如果不为0，用新生成的图继续递归
    if (check > 0)
      sumEdgeCount(g2)
    else
      g
  }

  def main(args: Array[String]): Unit = {

    //设置运行环境
    val conf = new SparkConf().setAppName("SimpleGraphX").setMaster("local[2]")
    val sc = new SparkContext(conf)
    sc.setLogLevel("WARN")

    //构建图
    //创建顶点信息
    val myVertices = sc.parallelize(Array(
      (1L, "Suan"),
      (2L, "David"),
      (3L, "Judy"),
      (4L, "Mike"),
      (5L, "Lisa")
    ))

    //创建边信息
    val myEdges = sc.parallelize(Array(
      Edge(1L, 2L, "friends"),
      Edge(2L, 3L, "friends"),
      Edge(3L, 4L, "colleagues"),
      Edge(3L, 5L, "friends"),
      Edge(4L, 5L, "colleagues")
    ))

    val myGraph = Graph(myVertices, myEdges)

    //设置顶点属性, 将所有顶点的属性初始化为 0
    val initGraph = myGraph.mapVertices((_,_) => 0)

    //用递归算法计算出图中各顶点到id=1顶点的最远距离
    sumEdgeCount(initGraph).vertices.collect.foreach(println(_))

  }
}

方法二：

object aggregate_demo2 {

  def sendMsg(ec:EdgeContext[Int,String,Int])={
    ec.sendToDst(ec.srcAttr + 1)
  }
   
  def mergeMsg(a:Int, b:Int):Int={
    math.max(a,b)
  }

  //在sumEdgeCount方法中增加一个属性i，i表示的是第几次递归，最大的顶点属性数据 = 递归次数，当最大的顶点属性数据 < 递归次数时，立即返回顶点顶点属性数据
  def sumEdgeCount(g:Graph[Int,String], i:Int = 1):Graph[Int,String]={
    val verts= g.aggregateMessages[Int](sendMsg, mergeMsg)

    val g2 = Graph(verts, g.edges)
    val check = g2.vertices.map(x => x._2).max()

    if(check >= i)
      sumEdgeCount(g2, i+1)
    else
      g
  }

  def main(args: Array[String]): Unit = {

    //设置运行环境
    val conf = new SparkConf().setAppName("SimpleGraphX").setMaster("local[2]")
    val sc = new SparkContext(conf)
    sc.setLogLevel("WARN")

    //构建图
    //创建顶点信息
    val myVertices = sc.parallelize(Array(
      (1L, "Suan"),
      (2L, "David"),
      (3L, "Judy"),
      (4L, "Mike"),
      (5L, "Lisa")
    ))

    //创建边信息
    val myEdges = sc.parallelize(Array(
      Edge(1L, 2L, "friends"),
      Edge(2L, 3L, "friends"),
      Edge(3L, 4L, "colleagues"),
      Edge(3L, 5L, "friends"),
      Edge(4L, 5L, "colleagues")
    ))

    val myGraph = Graph(myVertices, myEdges)

    //设置顶点属性, 将所有顶点的属性初始化为 0
    val initGraph = myGraph.mapVertices((_,_) => 0)

    //用递归算法计算出图中各顶点到id=1顶点的最远距离
    sumEdgeCount(initGraph,i=1).vertices.collect.foreach(println(_))
   }

  }

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

spark-GraphX 图像的聚合操作aggregate （计算图中各节点到根节点的最远距离）

Flume 將本地文件通過 idea 進行wordcount

idea 報錯 org.springframework.beans.factory.UnsatisfiedDependencyException:

idea mvn install報錯Failed to execute goal org.apache.maven.plugins:maven-surefire-plugin:2.12.4:test

idea開發spark環境搭建以及第一個wordcount程序

spark-GraphX 圖像的聚合操作aggregate （計算圖中各節點到根節點的最遠距離）

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結