Parallel program running in dual-core environment

Recently I have been studying parallel programming. Paralleled programs can get better performance in multicore machines. For example, a program creating two threads to do a job can reduce the executing time by nearly 50% in dual-core machine than in single-core machine. Without divided into two threads, it can also get the same performance using OpenMP directives. The code is below, and compiled by Intel C++ Compiler. Visual C++ Compiler doesn't support OpenMP automatic optimization.

#include <stdio.h>
#include <windows.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <omp.h>
using namespace std;

#define N 1000000

struct Workload
{
 long data[N];
 _int64 sum;
};

struct Workload * work;

void init_Workload(struct Workload * work)
{
 for(int i = 0; i < N; ++i)
  work->data[i] = i;
}

DWORD WINAPI CalSum(LPVOID n)
{
 int i;
 DWORD sum = 0;
 if((int)n == 0)
  for(i = 0; i < N; i += 2)
   sum += work->data[i];
 else
  for(i = 1; i < N; i += 2)
   sum += work->data[i];
 printf("Work%d has finished./n",n);
 return sum;
}

DWORD WINAPI CalSum2(LPVOID n)
{
 int i;
 int bound1 = N / 2;
 int bound2 = N / 2 + 1;
 DWORD sum = 0;
 if((int)n == 0)
  for(i = 0; i < bound1; ++i)
   sum += work->data[i];
 else
  for(i = bound2; i < N; ++i)
   sum += work->data[i];
 printf("Work%d has finished./n",n);
 return sum;
}

inline unsigned _int64 get_cycles()
{
 _asm RDTSC
}

int main()
{
 //long n = N;
 //printf("%ld/n%d,%d",n,sizeof(long),sizeof(_int64));
 int i, n = 2;
 unsigned _int64 start, end;
 HANDLE hWorkers[2];
 DWORD dwWorkerID[2];
 DWORD dwExitCodes[2];
 work = (struct Workload *)malloc(sizeof(struct Workload));
 assert(work);
 start = get_cycles();
 init_Workload(work);
 end = get_cycles();
 cout<<"The init costs "<<end<<" - "<<start<<" = "<<end - start<<" cycles."<<endl;
 //start = get_cycles();

 cout<<endl;

 cout<<"Well balanced win32 multithreading..."<<endl;
 for(i = 0; i < n; ++i)
 {
  hWorkers[i] = CreateThread(NULL,0,CalSum,(LPVOID)i,0,&dwWorkerID[i]);
  if(hWorkers[i])
   printf("Worker%d starts to work./n",i);
  else
   printf("Worker%d has problem working./n",i);
 }
 start = get_cycles();
 WaitForMultipleObjects(2,hWorkers,TRUE,INFINITE);
 end = get_cycles();
 GetExitCodeThread(hWorkers[0], &dwExitCodes[0]);
 GetExitCodeThread(hWorkers[1], &dwExitCodes[1]);
 work->sum = dwExitCodes[0] + dwExitCodes[1];
 printf("The parallel result is: sum = %ld, ",work->sum);
 cout<<"it costs "<<end<<" - "<<start<<" = "<<end - start<<" cycles."<<endl;
 CloseHandle(hWorkers[0]);
 CloseHandle(hWorkers[1]);

 cout<<endl;

 cout<<"Not well balanced win32 multithreading..."<<endl;
 for(i = 0; i < n; ++i)
 {
  hWorkers[i] = CreateThread(NULL,0,CalSum2,(LPVOID)i,0,&dwWorkerID[i]);
  if(hWorkers[i])
   printf("Worker%d starts to work./n",i);
  else
   printf("Worker%d has problem working./n",i);
 }
 start = get_cycles();
 WaitForMultipleObjects(2,hWorkers,TRUE,INFINITE);
 end = get_cycles();
 GetExitCodeThread(hWorkers[0], &dwExitCodes[0]);
 GetExitCodeThread(hWorkers[1], &dwExitCodes[1]);
 work->sum = dwExitCodes[0] + dwExitCodes[1];
 printf("The parallel result is: sum = %ld, ",work->sum);
 cout<<"it costs "<<end<<" - "<<start<<" = "<<end - start<<" cycles."<<endl;
 CloseHandle(hWorkers[0]);
 CloseHandle(hWorkers[1]);

 cout<<endl;

 cout<<"Sequential...(single thread)"<<endl;
 _int64 sum = 0;
 start = get_cycles();
 for(i = 0; i < N; ++i)
  sum += work->data[i];
 end = get_cycles();
 printf("The sequent result is:  sum = %ld, ",sum);
 cout<<"it costs "<<end<<" - "<<start<<" = "<<end - start<<" cycles."<<endl;

 cout<<endl;

 /**/
 cout<<"OpenMP multithreading..."<<endl;
 sum = 0;
 start = get_cycles();
#pragma omp parallel for private(i) firstprivate(work) reduction(+:sum) num_threads(2)
 for(i = 0; i < N; ++i)
  sum += work->data[i];
 end = get_cycles();
 printf("The openmp result is:   sum = %ld, ",sum);
 cout<<"it costs "<<end<<" - "<<start<<" = "<<end - start<<" cycles."<<endl;
 /**/

 cout<<endl;

 return EXIT_SUCCESS;
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章