同步圖運算框架GraphLite實例之PageRank算法

1.PageRank算法介紹

PageRank,網頁排名,又稱網頁級別、Google左側排名或佩奇排名,是一種由[1] 根據網頁之間相互的超鏈接計算的技術,而作爲網頁排名的要素之一,以Google公司創辦人拉里·佩奇(Larry Page)之姓來命名。Google用它來體現網頁的相關性和重要性,在搜索引擎優化操作中是經常被用來評估網頁優化的成效因素之一。Google的創始人拉里·佩奇和謝爾蓋·布林於1998年在斯坦福大學發明了這項技術。
PageRank通過網絡浩瀚的超鏈接關係來確定一個頁面的等級。Google把從A頁面到B頁面的鏈接解釋爲A頁面給B頁面投票,Google根據投票來源(甚至來源的來源,即鏈接到A頁面的頁面)和投票目標的等級來決定新的等級。簡單的說,一個高等級的頁面可以使其他低等級頁面的等級提升。

2.PageRank算法原理

這裏寫圖片描述
這裏寫圖片描述
這裏寫圖片描述

3.GraphLite圖運算系統的PageRank算法實現

/**
 * @file PageRankVertex.cc
 * This file implements the PageRank algorithm using graphlite API.
 */
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "GraphLite.h"

#define VERTEX_CLASS_NAME(name) PageRankVertex##name

#define EPS 1e-6


//class PageRankVertexInputFormatter: public InputFormatter
class VERTEX_CLASS_NAME(InputFormatter): public InputFormatter {
public:
    int64_t getVertexNum() {
        unsigned long long n;
        sscanf(m_ptotal_vertex_line, "%lld", &n);// read one long long number ,and let n=it
        printf("at class PageRankVertexInputFormatter:    m_total_vertex=   %lld \n",n);
    m_total_vertex= n;
        return m_total_vertex;
    }
    int64_t getEdgeNum() {
        unsigned long long n;
        sscanf(m_ptotal_edge_line, "%lld", &n);// read one long long number ,and let n=it
        m_total_edge= n;
        printf("at class PageRankVertexInputFormatter:   m_total_edge=   %lld \n",n);
        return m_total_edge;
    }
    int getVertexValueSize() {
        m_n_value_size = sizeof(double);
        return m_n_value_size;
    }
    int getEdgeValueSize() {
        m_e_value_size = sizeof(double);
        return m_e_value_size;
    }
    int getMessageValueSize() {
        m_m_value_size = sizeof(double);
        return m_m_value_size;
    }
    void loadGraph() {
        unsigned long long last_vertex;
        unsigned long long from;
        unsigned long long to;
        double weight = 0;

        double value = 1;//initial PageRank
        int outdegree = 0;//outdegree of node

        const char *line= getEdgeLine(); // Get edge line, for user. Read from current file offset. 
                                         // return a string of edge in local subgraph


        // Note: modify this if an edge weight is to be read
        //       modify the 'weight' variable

        sscanf(line, "%lld %lld", &from, &to);//from=source node, to=dest node
        addEdge(from, to, &weight);//add one edge form->to weight=0

        last_vertex = from;
        ++outdegree;
        printf("Excute loadGraph()  ,  m_total_edge=  %ld\n",m_total_edge);
        for (int64_t i = 1; i < m_total_edge; ++i) {
            line= getEdgeLine();// Get edge line, for user. Read from current file offset. 
                                // return a string of edge in local subgraph

            // Note: modify this if an edge weight is to be read
            //       modify the 'weight' variable

            sscanf(line, "%lld %lld", &from, &to);
            if (last_vertex != from) {
                addVertex(last_vertex, &value, outdegree);//addVertex and it's PageRank value,outdegree
                last_vertex = from;
                outdegree = 1;
            } else {
                ++outdegree;
            }
            addEdge(from, to, &weight);
        }
        addVertex(last_vertex, &value, outdegree);
    }
};

class VERTEX_CLASS_NAME(OutputFormatter): public OutputFormatter {
public:
    void writeResult() {
        int64_t vid;
        double value;
        char s[1024];

        for (ResultIterator r_iter; ! r_iter.done(); r_iter.next() ) {
            r_iter.getIdValue(vid, &value);
            int n = sprintf(s, "%lld: %f\n", (unsigned long long)vid, value);
            writeNextResLine(s, n);
        }
    }
};

// An aggregator that records a double value tom compute sum
// <double> set the type of m_global and m_local value is double
class VERTEX_CLASS_NAME(Aggregator): public Aggregator<double> {
public:
    void init() {
        m_global = 0;  //aggregator global value of AggrValue
        m_local = 0;   //aggregator local value of AggrValue
    }
    void* getGlobal() {
        return &m_global;
    }
    void setGlobal(const void* p) {
        m_global = * (double *)p;
    }
    void* getLocal() {
        return &m_local;
    }
    void merge(const void* p) {
        m_global += * (double *)p;
         printf("excute merge()  on PageRankAggregator class, m_global= %lf\n",m_global);

    }
    void accumulate(const void* p) {
        m_local += * (double *)p;
        printf("excute accumulate()  on PageRankAggregator class, m_local= %lf\n",m_local);
    }
};

class VERTEX_CLASS_NAME(): public Vertex <double, double, double> {
public:
    void compute(MessageIterator* pmsgs) {
        printf("Excute compute(),  MessageIterrator *pmsgs, pmsgs.size=  %d\n ",pmsgs->m_vector_size);
        double val;//PageRank value
        if (getSuperstep() == 0) {   //Get current superstep number
           val= 1.0;  //initial all vertex's PageRank=1  u maybe not initial val there,because we initial val at loadGraph()
           printf("getSuperstep()==0     val=%lf\n",getValue());

        } else {
            if (getSuperstep() >= 2) {
                double global_val = * (double *)getAggrGlobal(0);  //Get global value of  aggregator index=0
                if (global_val < EPS) {   //judge convergence
                        printf("at compute() on PageRankVertex class, global_val==%lf\n",global_val);
            voteToHalt(); return;
                }
            }

            double sum = 0;
            for ( ; ! pmsgs->done(); pmsgs->next() ) {
                sum += pmsgs->getValue();//getValue() on MessageIterator class  return message value.
            }
            val = 0.15 + 0.85 * sum;

            double acc = fabs(getValue() - val);//getValude on Vertex class return vertex value

            accumulateAggr(0, &acc);// Accumulate local value of some aggregator. first param is Aggregator index
            * mutableValue() = val;
        }
        //set new PageRank value and then send Message
       // * mutableValue() = val;
        const int64_t n = getOutEdgeIterator().size();//Get an out-edge iterator.size()
        sendMessageToAllNeighbors(val / n);//R_v/L_v   R_v=value  L_v=n
    }
};

class VERTEX_CLASS_NAME(Graph): public Graph {
public:
    VERTEX_CLASS_NAME(Aggregator)* aggregator;

public:
    // argv[0]: PageRankVertex.so
    // argv[1]: <input path>
    // argv[2]: <output path>
    void init(int argc, char* argv[]) {

        setNumHosts(5);  //machine count=5, one master and 4  workers
        setHost(0, "localhost", 1411);
        setHost(1, "localhost", 1421);
        setHost(2, "localhost", 1431);
        setHost(3, "localhost", 1441);
        setHost(4, "localhost", 1451);

        if (argc < 3) {  //the number of param
           printf ("Usage: %s <input path> <output path>\n", argv[0]);
           exit(1);
        }

        m_pin_path = argv[1];//input file path
        m_pout_path = argv[2];//output file path

        aggregator = new VERTEX_CLASS_NAME(Aggregator)[1];  //define class array  PageRankAggregator[1]
        regNumAggr(1);//set  m_aggregator_cnt=param,   aggregator count
        regAggr(0, &aggregator[0]);   // m_paggregator[0]= second param ,type:  pointers of AggregatorBase
    }

    void term() {
        delete[] aggregator;
    }
};

/* STOP: do not change the code below. */
extern "C" Graph* create_graph() {
    Graph* pgraph = new VERTEX_CLASS_NAME(Graph);

    pgraph->m_pin_formatter = new VERTEX_CLASS_NAME(InputFormatter);
    pgraph->m_pout_formatter = new VERTEX_CLASS_NAME(OutputFormatter);
    pgraph->m_pver_base = new VERTEX_CLASS_NAME();

    return pgraph;
}

extern "C" void destroy_graph(Graph* pobject) {
    delete ( VERTEX_CLASS_NAME()* )(pobject->m_pver_base);
    delete ( VERTEX_CLASS_NAME(OutputFormatter)* )(pobject->m_pout_formatter);
    delete ( VERTEX_CLASS_NAME(InputFormatter)* )(pobject->m_pin_formatter);
    delete ( VERTEX_CLASS_NAME(Graph)* )pobject;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章