Java實現PageRank實例

package PageRank;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;

public class myPageRank {
	public static final int N = 4847571;// 給定圖數據頂點數
	private static final double AFA = 0.85;//阻尼係數
	private static final double DELTA = 0.1;//閾值
	private static final double MAX_TIMES = 20;//迭代次數
	//
	private static final String FILE = "D:/soc-LiveJournal1.txt";
	private static final String OUT = "D://result.txt";

	//
	public static void main(String args[]) throws IOException {
		List<List<Integer>> graph = file_to_matrix();
		pagerank(graph);

	}

	public static List<List<Integer>> file_to_matrix() {
		List<List<Integer>> graph = new ArrayList<>(N);
		for (int i = 0; i < N; i++) {
			graph.add(new ArrayList<>());
		}
		BufferedReader br = null;
		try {
			br = new BufferedReader(new InputStreamReader(new FileInputStream(FILE)), 65536);// 要是換成二進制呢?
			String line;
			String[] str;
			int num = 0;
			long start = System.currentTimeMillis();
			long now = 0;
			while ((line = br.readLine()) != null) {// 構建鄰接表
				num++;
				str = line.split("\\s");// "\s"
				graph.get(Integer.parseInt(str[0])).add(Integer.parseInt(str[1]));
			}
			now = System.currentTimeMillis() - start;
			System.out.println("文件讀取結束,共 " + num + "行!" + " 用時 " + now + "ms");
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (br != null)
					br.close();
			} catch (IOException e) {
				br = null;
				e.printStackTrace();
			} finally {
				br = null;
			}
		}
		return graph;
	}

	// 每個節點PR初始值爲1
	public static void pagerank(List<List<Integer>> graph) throws IOException {
		double[] Prnew = new double[N];
		for (int i = 0; i < N; i++) {
			Prnew[i] = 1.0;//
		}
		double[] Pr;
		// 迭代至|Pn+1−Pn|<ϵ
		long start = System.currentTimeMillis();
		long now = 0;
		for (int i = 1; i < MAX_TIMES; i++) {
			// 保留迭代前的Pr
			Pr = Prnew;
			// 迭代後
			Prnew = get_Prnew(graph, Pr);
			double delta = get_DELTA(Prnew, Pr);
			now = System.currentTimeMillis() - start;
			System.out.println("第" + i + "次迭代完成,DELTA = " + delta + ", 用時 " + now + "ms");
			if (delta < DELTA)
				break;
		}

		System.out.println("\n開始計算前十節點...");
		double[][] big = getBiggestPr(Prnew);
		for (int j = 0; j < 10; j++) {
			System.out.println("第" + (j + 1) + "大節點, node: " + (int) big[j][0] + " pr: " + big[j][1]);
		}
		// 寫文件
		BufferedWriter bw = null;
		try {
			bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(OUT)));
			double sum = 0;
			for (int j = 0; j < N; j++) {
				// System.out.println(Prnew[j]);
				sum += Prnew[j];
				bw.write(String.valueOf(Prnew[j]));
				bw.newLine();
			}
			bw.close();
			System.out.println("sum=" + sum);
		} catch (Exception e) {
			e.printStackTrace();
		}

	}

	public static double[] get_Prnew(List<List<Integer>> list, double[] Pr) {
		double[] Prnew = new double[N];
		List<Integer> _list;
		int _list_size;
		for (int i = 0; i < N; i++) {
			for (int k = 0; k < N; k++) {
				_list = list.get(k);
				_list_size = _list.size();

				if ((_list.size() == 0) || (_list.size() == 1 && _list.get(0) == k)) {// 出度爲0 或者 出度爲1且指向自身
					Prnew[i] += AFA * (Pr[k] * (1.0 / N));
				} else {
					for (int j = 0; j < _list_size; j++) {
						if (_list.get(j) == i) {// 含有指向i的邊
							Prnew[i] += Pr[k] * (AFA * (1.0 / _list_size));
							break;
						}
					}
				}
			}
			Prnew[i] += (1 - AFA);
		}
		return Prnew;
	}

	public static double get_DELTA(double[] Prnew, double[] Pr) {
		double temp = 0;
		for (int i = 0; i < N; i++) {
			temp += (Prnew[i] - Pr[i]) * (Prnew[i] - Pr[i]);
		}
		return Math.sqrt(temp);
	}

	public static double[][] getBiggestPr(double[] Pr) {
		double[][] biggestPr = new double[10][2];
		for (int i = 0; i < N; i++) {
			if (Pr[i] > biggestPr[9][1]) {
				biggestPr[9][0] = i;
				biggestPr[9][1] = Pr[i];
				for (int j = 8; j >= 0; j--) {
					if (biggestPr[j + 1][1] > biggestPr[j][1]) {
						// 交換
						biggestPr[j + 1][0] = biggestPr[j + 1][0] + biggestPr[j][0];
						biggestPr[j][0] = biggestPr[j + 1][0] - biggestPr[j][0];
						biggestPr[j + 1][0] = biggestPr[j + 1][0] - biggestPr[j][0];
						biggestPr[j + 1][1] = biggestPr[j + 1][1] + biggestPr[j][1];
						biggestPr[j][1] = biggestPr[j + 1][1] - biggestPr[j][1];
						biggestPr[j + 1][1] = biggestPr[j + 1][1] - biggestPr[j][1];
					} else {
						break;
					}
				}
			}
		}
		return biggestPr;
	}
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章