spark MLlib 學習

spark GraphX-用戶關係

import org.apache.spark.graphx.{VertexRDD, VertexId, Graph, Edge}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object UserRelationship {

  def main(args: Array[String]) {

    val conf = new SparkConf()
      .setMaster("local[1]")
      .setAppName(" user relationship ")

    val sc = new SparkContext(conf)

    // id , name , age
    val vertexArray = Array(
      (1L, ("Alice", 28)),
      (2L, ("Bob", 27)),
      (3L, ("Charlie", 65)),
      (4L, ("David", 42)),
      (5L, ("Ed", 55)),
      (6L, ("Fran", 50)))

    val edgeArray = Array(
      Edge(2L, 1L, 7),
      Edge(2L, 4L, 2),
      Edge(3L, 2L, 4),
      Edge(3L, 6L, 3),
      Edge(4L, 1L, 1),
      Edge(5L, 2L, 2),
      Edge(5L, 3L, 8),
      Edge(5L, 6L, 3))

    /*
    Edge have a scrId and a dstId corresponding to the source and destination
    vertexArray and edgeArray variables
     */
    val vertexRDD: RDD[(Long, (String, Int))] = sc.parallelize(vertexArray)
    val edgeRDD: RDD[Edge[Int]] = sc.parallelize(edgeArray)

    val graph: Graph[(String, Int), Int] = Graph(vertexRDD, edgeRDD)

    println("find whose age > 30")
    // find whose age > 30
    graph.vertices.filter {
      case (id, (name, age)) => age > 30
    }.collect.foreach {
      case (id, (name, age)) => println(s"$name is $age")
    }
    println(" find the attribute > 5 ")
    //find the attribute > 5
    graph.edges.filter(e => e.attr > 5)
      .collect.foreach(e => println(s"{$e.srcId} to s{$e.destId} att ${e.attr}"))
    println("edge attr >5")

    // edge attr >5
    for (triple <- graph.triplets.filter(t => t.attr > 5).collect) {
      println(s"${triple.srcAttr._1} likes ${triple.dstAttr._1}")
    }

    //Degrees操作 找出圖中最大的出度 入度 度數
    def max(a: (VertexId, Int), b: (VertexId, Int)): (VertexId, Int) = {
      if (a._2 > b._2) a else b
    }
    println("the max outDegrees and inDegress Degress is :")
    println("max of outDegrees:" + graph.outDegrees.reduce(max) +
      "max of inDegrees:" + graph.inDegrees.reduce(max) +
      "max of Degrees:" + graph.degrees.reduce(max))

    //頂點的轉換操作,頂點age + 10
    graph.mapVertices {
      case (id, (name, age)) => (id, (name, age + 10))
    }.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}"))

    //
    graph.mapEdges(e => e.attr * 2).edges.collect
      .foreach(e => println(s"${e.srcId} to ${e.dstId} attr ${e.attr}"))

    println("頂點年紀>30的子圖:")
    val subGraph = graph.subgraph(vpred = (id, vd) => vd._2 >= 30)

    println("子圖所有頂點:")
    subGraph.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}"))
    println("subGraph edges:")
    subGraph.edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att $e.attr"))

    println("collection:")

    val inDegrees: VertexRDD[Int] = graph.inDegrees
    case class User(name: String, age: Int, inDeg: Int, outDeg: Int)
    //創建一個新圖,頂點VD的數據類型爲User,並從graph做類型轉換
    val initialUserGraph: Graph[User, Int] = graph.mapVertices {
      case (id, (name, age)) => User(name, age, 0, 0)
    }

    // fill in the degree information
    val userGraph = initialUserGraph.outerJoinVertices(initialUserGraph.inDegrees) {
      case (id, u, inDegOpt) => User(u.name, u.age, inDegOpt.getOrElse(0), u.outDeg)
    }.outerJoinVertices(initialUserGraph.outDegrees) {
      case (id, u, outDegOpt) => User(u.name, u.age, u.inDeg, outDegOpt.getOrElse(0))
    }

  }

}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章