Graph和init
在pagerank.cpp中對圖進行初始化:
Graph<Empty> * graph;
graph = new Graph<Empty>();
具體實現在graph.hpp中,設置threads和sockets,其中threads—cpus—partitions,sockets—nodes—machines,threads_per_socket表示每個節點(機器)上分到的線程數(分區數):
Graph() {
threads = numa_num_configured_cpus();
sockets = numa_num_configured_nodes();
threads_per_socket = threads / sockets;
init();
}
init()函數設置分區數
void init() {
edge_data_size = std::is_same<EdgeData, Empty>::value ? 0 : sizeof(EdgeData);
unit_size = sizeof(VertexId) + edge_data_size;
edge_unit_size = sizeof(VertexId) + unit_size;
assert( numa_available() != -1 );
assert( sizeof(unsigned long) == 8 ); // assume unsigned long is 64-bit
char nodestring[sockets*2+1];
nodestring[0] = '0';
for (int s_i=1;s_i<sockets;s_i++) {
nodestring[s_i*2-1] = ',';
nodestring[s_i*2] = '0'+s_i;
}
struct bitmask * nodemask = numa_parse_nodestring(nodestring);
numa_set_interleave_mask(nodemask);
omp_set_dynamic(0);
omp_set_num_threads(threads);
thread_state = new ThreadState * [threads];
local_send_buffer_limit = 16;
local_send_buffer = new MessageBuffer * [threads];
for (int t_i=0;t_i<threads;t_i++) {
thread_state[t_i] = (ThreadState*)numa_alloc_onnode( sizeof(ThreadState), get_socket_id(t_i));
local_send_buffer[t_i] = (MessageBuffer*)numa_alloc_onnode( sizeof(MessageBuffer), get_socket_id(t_i));
local_send_buffer[t_i]->init(get_socket_id(t_i));
}
#pragma omp parallel for
for (int t_i=0;t_i<threads;t_i++) {
int s_i = get_socket_id(t_i);
assert(numa_run_on_node(s_i)==0);
#ifdef PRINT_DEBUG_MESSAGES
printf("thread-%d bound to socket-%d\n", t_i, s_i);
#endif
}
#ifdef PRINT_DEBUG_MESSAGES
printf("threads=%d*%d\n", sockets, threads_per_socket);
printf("interleave on %s\n", nodestring);
#endif
MPI_Comm_rank(MPI_COMM_WORLD, &partition_id);
MPI_Comm_size(MPI_COMM_WORLD, &partitions);
send_buffer = new MessageBuffer ** [partitions];
recv_buffer = new MessageBuffer ** [partitions];
for (int i=0;i<partitions;i++) {
send_buffer[i] = new MessageBuffer * [sockets];
recv_buffer[i] = new MessageBuffer * [sockets];
for (int s_i=0;s_i<sockets;s_i++) {
send_buffer[i][s_i] = (MessageBuffer*)numa_alloc_onnode( sizeof(MessageBuffer), s_i);
send_buffer[i][s_i]->init(s_i);
recv_buffer[i][s_i] = (MessageBuffer*)numa_alloc_onnode( sizeof(MessageBuffer), s_i);
recv_buffer[i][s_i]->init(s_i);
}
}
alpha = 8 * (partitions - 1);
MPI_Barrier(MPI_COMM_WORLD);
}
2.收到的邊數是收到的byte數除以每條邊的大小:
int recv_edges = recv_bytes / edge_unit_size;
3.點dst所在的分區號爲dst_part,
dst_part分區中點src的出邊加一
點dst的入度加一
點src的初度加一
__sync_fetch_and_add(&outgoing_adj_index[dst_part][src], 1);
__sync_fetch_and_add(&in_degree[dst], 1);
__sync_fetch_and_add(&out_degree[src], 1);
4.alpha值爲分區個數乘以8