前言
因爲某些原因,我需要使用四種編程語言實現一個算法,這裏選擇了排序算法中比較經典的歸併排序。
每一個程序都是從一個數據文件中讀取數據,然後執行歸併排序,將結果寫入文件中。計算並輸出執行歸併排序部分代碼的執行時間。
最後在結果中,我進行了四種語言的執行效率對比。
具體實現
C++
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
using namespace std;
void Merge ( int * Array, int left, int mid, int right)
{
int len = right - left + 1 ;
int * tmp_arr = new int [ len] ;
int i = 0 ;
int p1 = left, p2 = mid + 1 ;
while ( p1 <= mid && p2 <= right)
{
if ( Array[ p1] < Array[ p2] )
tmp_arr[ i++ ] = Array[ p1++ ] ;
else
tmp_arr[ i++ ] = Array[ p2++ ] ;
}
while ( p1 <= mid)
{
tmp_arr[ i++ ] = Array[ p1++ ] ;
}
while ( p2 <= right)
{
tmp_arr[ i++ ] = Array[ p2++ ] ;
}
for ( int j = 0 ; j < len; j++ )
{
Array[ left + j] = tmp_arr[ j] ;
}
}
void MergeSort ( int * Array, int left, int right)
{
if ( left < right)
{
int mid = ( left + right) / 2 ;
MergeSort ( Array, left, mid) ;
MergeSort ( Array, mid+ 1 , right) ;
Merge ( Array, left, mid, right) ;
}
}
int getDataNum ( FILE * fp)
{
int i = 0 , t;
fseek ( fp, 0 , 0 ) ;
while ( fscanf ( fp, "%d" , & t) != EOF )
{
i++ ;
}
return i;
}
void getData ( FILE * fp, int * arr, int len)
{
fseek ( fp, 0 , 0 ) ;
for ( int i = 0 ; i < len; i++ )
{
fscanf ( fp, "%d" , & arr[ i] ) ;
}
}
int main ( )
{
char rpath[ ] = "../data.txt" ;
char wpath[ ] = "../result.txt" ;
FILE * fp;
if ( ( fp = fopen ( rpath, "r" ) ) == NULL )
{
printf ( "打開文件%s失敗\n" , rpath) ;
return 0 ;
}
int len = getDataNum ( fp) ;
if ( len == 0 )
{
printf ( "文件爲空\n" ) ;
return 0 ;
}
int * arr = new int [ len] ;
clock_t start, end;
getData ( fp, arr, len) ;
start = clock ( ) ;
MergeSort ( arr, 0 , len - 1 ) ;
end = clock ( ) ;
printf ( "cost time = %.4lf s" , ( ( double ) end - start) / CLOCKS_PER_SEC) ;
fclose ( fp) ;
return 0 ;
}
Python
import time
def merge ( l, r) :
tmp = [ ]
p1 = p2 = 0
while p1 < len ( l) and p2 < len ( r) :
if l[ p1] < r[ p2] :
tmp. append( l[ p1] )
p1 = p1 + 1
else :
tmp. append( r[ p2] )
p2 = p2 + 1
while p1 < len ( l) :
tmp. append( l[ p1] )
p1 = p1 + 1
while p2 < len ( r) :
tmp. append( r[ p2] )
p2 = p2 + 1
return tmp
def mergeSort ( arr) :
length = len ( arr)
if length <= 1 :
return arr
mid = length // 2
l = mergeSort( arr[ : mid] )
r = mergeSort( arr[ mid: ] )
return merge( l, r)
if __name__ == "__main__" :
rpath = "../data.txt"
wpath = "../result.txt"
with open ( rpath, "r" ) as f1:
list = f1. readlines( )
list = list [ 0 ] . strip( )
list = list . split( " " )
for i in range ( len ( list ) ) :
list [ i] = eval ( list [ i] )
start = time. time( )
mergeSort( list )
end = time. time( )
print ( "time cost = {:.4f} s" . format ( end - start) )
'''
# 將排序結果寫入文件中
with open(wpath, "w") as f2:
for i in range(len(list) ):
f2.write(str(list[i]) + " ")
'''
Java
import java. io. BufferedReader;
import java. io. File;
import java. io. FileReader;
import java. io. FileWriter;
public class MergeSort {
public static void merge ( int [ ] arr, int left, int mid, int right) {
int len = right - left + 1 ;
int [ ] tmp = new int [ len] ;
int p1 = left, p2 = mid + 1 ;
int i = 0 ;
while ( p1 <= mid && p2 <= right) {
if ( arr[ p1] < arr[ p2] ) {
tmp[ i++ ] = arr[ p1++ ] ;
}
else {
tmp[ i++ ] = arr[ p2++ ] ;
}
}
while ( p1 <= mid) {
tmp[ i++ ] = arr[ p1++ ] ;
}
while ( p2 <= right) {
tmp[ i++ ] = arr[ p2++ ] ;
}
for ( int j = 0 ; j < len; j++ ) {
arr[ left + j] = tmp[ j] ;
}
}
public static void mergeSort ( int [ ] arr, int left, int right) {
if ( left < right) {
int mid = ( left + right) / 2 ;
mergeSort ( arr, left, mid) ;
mergeSort ( arr, mid + 1 , right) ;
merge ( arr, left, mid, right) ;
}
}
public static int [ ] getLineFromTxt ( File file, String split) throws Exception{
BufferedReader br = new BufferedReader ( new FileReader ( file) ) ;
String Line = br. readLine ( ) ;
String[ ] arrs = Line. split ( " " ) ;
int [ ] arr = new int [ arrs. length] ;
for ( int i = 0 ; i< arr. length; i++ ) {
arr[ i] = Integer. parseInt ( arrs[ i] ) ;
}
if ( br!= null) {
br. close ( ) ;
br = null;
}
return arr;
}
public static void main ( String[ ] args) throws Exception{
String rpath = "..\\data.txt" ;
String wpath = "..\\result.txt" ;
File f1 = new File ( rpath) ;
int [ ] arr = getLineFromTxt ( f1, " " ) ;
long start = System. currentTimeMillis ( ) ;
mergeSort ( arr, 0 , arr. length - 1 ) ;
long end = System. currentTimeMillis ( ) ;
double cost = ( end - start) / 1000.0 ;
System. out. println ( "cost time = " + cost + "s" ) ;
}
}
Haskell
import Prelude
import System. CPUTime
import Control. DeepSeq
-- 字符串空格分割數字
splitBySpace :: String -> [ String]
-- dropWhile 刪除前空格, break 刪除後空格
splitBySpace str = case dropWhile ( == ' ' ) str of
"" -> [ ]
s' -> w : splitBySpace s' '
where ( w, s'' ) = break ( == ' ' ) s'
-- 字符串轉數字
toInt :: [ String] -> [ Int]
toInt li = [ read x :: Int | x < - li]
-- 輸出排完序的列表爲字符串
output :: [ Int] -> String
output [ ] = ""
output ( x: xs) = show x ++ " " ++ ( output xs)
-- 歸併排序
mergeSort :: Ord a = > [ a] -> [ a]
mergeSort [ ] = [ ]
mergeSort [ x] = [ x]
mergeSort arr = merge ( mergeSort left) ( mergeSort right)
where ( left, right) = splitAt mid_pos arr
mid_pos = ( length arr) `div` 2
-- 合併
merge :: Ord a = > [ a] -> [ a] -> [ a]
merge [ ] x = x
merge x [ ] = x
merge ( x: xs) ( y: ys) | x < y = x : merge xs ( y: ys)
| otherwise = y : merge ( x: xs) ys
main = do
-- 文件路徑
let rpath = "../data.txt"
let wpath = "../result.txt"
file < - readFile rpath
let arr = toInt ( splitBySpace file)
-- 計算歸併排序運行時間,因爲haskell爲惰性計算,所示調用 deepseq 嚴格計算
start < - getCPUTime
let ans = mergeSort arr
end < - arr `deepseq` getCPUTime
let cost_time = ( fromIntegral ( end - start) ) / ( 10 ^12 )
print ( "cost time = " ++ ( show cost_time) ++ " s" )
-- writeFile wpath ( output ans)
小結
在四個程序中,因爲程序設計語言的不同,C++,Python和Java這種命令式語言的算法實現基本類似,Haskell因爲是函數式語言,實現稍有不同。
並且,因爲Haskell爲惰性計算,在計算時間的時候我們要是用deepseq來嚴格計算,得到完整的計算時間。可以參考Stack Overflow上關於這方面的討論,
鏈接在這
對比分析
因爲我們分別計算了四種語言在執行歸併排序部分的時間(只計算了排序時間,沒有包括IO),然後我們就可以通過每個程序排序相同規模的隨機數來對比四種語言的效率。
以下爲我電腦上分別排序100w,200w,500w三種規模的隨機數,四個程序的執行時間、
因爲計算機狀態時刻變化,每個程序分別自行了五次,取平均值(單位:秒)
其中:
C++將源文件編譯爲exe文件,終端直接運行(G++ 7.2.0);
Java編譯爲.class文件,終端java虛擬機運行(Java 11.0.6);
Python使用解釋器運行(Python 3.7.5);
Haskell分爲解釋執行和編譯執行(GHC 8.6.5)。
數據文件爲data.txt文件,放在相應目錄下(src的上級目錄)。
分析與結論
分析:對比後發現對於相同規模的程序,C++和Java的運行速度較快,Haskell其次,Python的運行速度最慢。同時,Haskell編譯爲exe文件後,其運行速度相比較解釋執行運行速度快了一倍多。C++和Haskell均編譯爲exe文件後運行速度也相差近十倍。
因此,程序的運行速度,和運行方式有關,更與程序設計語言有關。