Spark Graphx計算有向環

在工作過程中有使用到Spark Graphx做一些圖的計算,開發環境如下:
開發工具:IDEA
JDK:1.7.0_80
Maven:3.3.9

對於圖計算,其實可以有很多技術框架等,例如使用python的networx,spark的graphx,以及阿里也有開源框架,此處我們使用Graphx爲計算框架,由於個人簡單案例,就以單機環境實現,並且簡單數據輸入。

在Graphx中,通常類似的操作可以用聚合方法或者Pregel來實現,可以參考看下官方文檔:spark graphx,官方文檔有類似的案例可以參考,話不多說,上代碼:

package com.pnlorf.graphx.pnlorf.graphx.circle

import org.apache.spark.graphx._
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ArrayBuffer

/**
 * description: 在輸入數據中求環
 *
 * @author: 
 *          date: 2019/10/12
 *          package: com.pnlorf.graphx.pnlorf
 */
object CircleGraph {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setMaster("local").setAppName("Graphx_Circle")

    val sc: SparkContext = new SparkContext(conf)

    val myVertices = sc.parallelize(Array((1L, "A"),
      (2L, "B"),
      (3L, "C"),
      (4L, "D"),
      (5L, "E"),
      (6L, "F"),
      (7L, "G"),
      (8L, "H"),
      (9L, "I")
    ))

    val myEdges = sc.parallelize(Array(Edge(1L, 2L, new EdgeInfo("1", "2")),
      Edge(2L, 3L, new EdgeInfo("2", "3")),
      Edge(3L, 4L, new EdgeInfo("3", "4")),
      Edge(4L, 5L, new EdgeInfo("4", "5")),
      Edge(5L, 1L, new EdgeInfo("5", "1")),
      Edge(5L, 3L, new EdgeInfo("5", "3")),
      Edge(6L, 7L, new EdgeInfo("6", "7")),
      Edge(7L, 6L, new EdgeInfo("7", "6")),
      Edge(7L, 8L, new EdgeInfo("7", "8")),
      Edge(8L, 7L, new EdgeInfo("8", "7")),
      Edge(1L, 9L, new EdgeInfo("1", "9")),
      Edge(9L, 1L, new EdgeInfo("9", "1"))


    ))

    // 最小邊數量
    val minSize = 2
    // 最大邊數量
    val maxSize = 5

    val myGraph = Graph(myVertices, myEdges)

    type A = ArrayBuffer[ArrayBuffer[EdgeInfo]]

    type VD = ArrayBuffer[ArrayBuffer[EdgeInfo]]

    type ED = EdgeInfo

    val graph = myGraph.mapVertices((vid, vd) => new ArrayBuffer[ArrayBuffer[EdgeInfo]]())

    /**
     * 初始化msg,每個節點屬性都初始化
     *
     * @return 空的msg
     */
    def initialMsg(): A = {
      new ArrayBuffer[ArrayBuffer[EdgeInfo]]()
    }

    /**
     * 將收到的消息更新到當前節點屬性
     *
     * @param vid 點id
     * @param vd  點屬性
     * @param A   發送過來的消息
     * @return
     */
    def vprog(vid: Long, vd: VD, A: A): VD = {
      val retArray = new ArrayBuffer[ArrayBuffer[EdgeInfo]]()
      retArray.appendAll(vd)
      retArray.appendAll(A)
      retArray
    }

    def sendMsg(edgeTriplet: EdgeTriplet[VD, ED]): Iterator[(Long, A)] = {
      if (edgeTriplet.srcAttr.length == 0) {
        return Iterator((edgeTriplet.dstId, ArrayBuffer[ArrayBuffer[EdgeInfo]](ArrayBuffer[EdgeInfo](edgeTriplet.attr))))
      }

      val msgInfos = edgeTriplet.srcAttr.filter(msg => !msg.map(_.to).contains(edgeTriplet.attr.to))

      val newAttr = msgInfos.map(msg => {
        val retA = new ArrayBuffer[EdgeInfo]()
        retA.appendAll(msg)
        retA.append(edgeTriplet.attr)
        retA
      })

      Iterator((edgeTriplet.dstId, newAttr))
    }

    def mergeMsg(a1: A, a2: A): A = {
      a1 ++ a2
    }

    val result = Pregel(graph, initialMsg(), maxSize, EdgeDirection.Out)(vprog, sendMsg, mergeMsg).cache()

    result.vertices.map(v => v._2.filter(path => path.head.fm.equalsIgnoreCase(path.last.to)))
      .flatMap(v => v.toIterator)
      .map(v => {
        val array = new ArrayBuffer[String]()
        v.foreach(v1 => {
          array += v1.fm
          array += v1.to
        })
        (array.sorted.mkString("_"), v)
      })
      .groupByKey()
      .map(_._2.head)
      .filter(v => v.size > minSize)
      .collect()
      .foreach(v => {
        println("==========================")
        v.foreach(v1 => {
          println(v1.toString)
        })
        println("*************************")
      })

    result.unpersist(false)
  }

}

另一個自定義的類:

package com.pnlorf.graphx.pnlorf.graphx.circle

/**
 * description: 
 *
 * @author: 
 *          date: 2019/12/9
 *          package: com.pnlorf.graphx.pnlorf
 */
class EdgeInfo extends Serializable {

  var fm = ""

  var to = ""

  def this(fm: String, to: String) {
    this()
    this.fm = fm
    this.to = to
  }

  def canEqual(other: Any): Boolean = other.isInstanceOf[EdgeInfo]

  override def equals(other: Any): Boolean = other match {
    case that: EdgeInfo =>
      (that canEqual this) &&
        fm == that.fm &&
        to == that.to
    case _ => false
  }

  override def hashCode(): Int = {
    val state = Seq(fm, to)
    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
  }

  override def toString = s"EdgeInfo(fm=$fm, to=$to, hashCode=$hashCode)"
}

以上代碼可直接執行,結果輸出如下:

==========================
EdgeInfo(fm=4, to=5, hashCode=1665)
EdgeInfo(fm=5, to=1, hashCode=1692)
EdgeInfo(fm=1, to=2, hashCode=1569)
EdgeInfo(fm=2, to=3, hashCode=1601)
EdgeInfo(fm=3, to=4, hashCode=1633)
*************************
==========================
EdgeInfo(fm=4, to=5, hashCode=1665)
EdgeInfo(fm=5, to=3, hashCode=1694)
EdgeInfo(fm=3, to=4, hashCode=1633)
*************************

由於是寫個簡單樣例,可能考慮的不夠周全,有問題還希望大家多多包涵,有問題大家一起討論!

謝謝!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章