java ForkJoin框架實現統計詞頻性能比較

用ForkJoin框架實現的統計詞頻和一般單線程統計的性能比較

源碼如下:

  • Folder.java
package ForkJoin;

import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
/**
 * Folder 是磁盤文件夾模型
 * @author TreeNode
 *
 */
public class Folder {
    // 子文件夾
    private final List<Folder> subFolders;
    private final List<Document> documents;

    Folder(List<Folder> subFolders, List<Document> documents) {
        this.subFolders = subFolders;
        this.documents = documents;
    }

    public List<Folder> getSubFolders() {
        return subFolders;
    }

    public List<Document> getDocuments() {
        return documents;
    }

    static Folder fromDirectory(File dir) throws IOException {
        List<Document> documents = new LinkedList<>();
        List<Folder> subFolders = new LinkedList<>();
        for (File entry : dir.listFiles()) {
            if (entry.isDirectory()) {
                subFolders.add(Folder.fromDirectory(entry));
            } else {
                documents.add(Document.fromFile(entry));
            }
        }
        return new Folder(subFolders, documents);
    }
}

  • Document.java
package ForkJoin;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

public class Document {
    private final List<String> lines;
    public Document(List<String> lines) {
        this.lines = lines;
    }
    public List<String> getLines() {
        return lines;
    }
    static Document fromFile(File file) throws FileNotFoundException, IOException{
        // 是有序的
        List<String> lines = new LinkedList<>();
        try(BufferedReader reader = new BufferedReader(new FileReader(file))){
            String line = reader.readLine();
            while(line != null){
                lines.add(line);
                line = reader.readLine();
            }
        }
        return new Document(lines);
    }
}
  • WordCounter.java
package ForkJoin;

import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;

public class WordCounter {

    // 分詞
    String[] wordsIn(String line) {
        return line.trim().split("(\\s|\\p{Punct})+");
    }

    // 統計
    Long occurrencesCount(Document document, String searchedWord) {
        long count = 0;
        for (String line : document.getLines()) {
            for (String word : wordsIn(line)) {
                if (searchedWord.equals(word)) {
                    count = count + 1;
                }
            }
        }
        return count;
    }

    /**
     * 單線程統計,遞歸遍歷文件夾,分別統計每個文件夾下文件中詞頻
     * @param folder 搜索的文件夾
     * @param searchedWord 要搜索的單詞
     * @return
     */
    Long countOccurrencesOnSingleThread(Folder folder, String searchedWord) {
        long count = 0;
        for (Folder subFolder : folder.getSubFolders()) {
            count = count + countOccurrencesOnSingleThread(subFolder, searchedWord);
        }
        for (Document document : folder.getDocuments()) {
            count = count + occurrencesCount(document, searchedWord);
        }
        return count;
    }

    /**
     * 在文件中搜索詞頻
     * @author TreeNode
     *
     */
    class DocumentSearchTask extends RecursiveTask<Long> {
        private final Document document;
        private final String searchedWord;

        DocumentSearchTask(Document document, String searchedWord) {
            super();
            this.document = document;
            this.searchedWord = searchedWord;
        }

        @Override
        protected Long compute() {
            return occurrencesCount(document, searchedWord);
        }
    }
    /**
     * 文件夾搜索任務
     * @author TreeNode
     *
     */
    class FolderSearchTask extends RecursiveTask<Long> {
        private final Folder folder;
        private final String searchedWord;

        FolderSearchTask(Folder folder, String searchedWord) {
            super();
            this.folder = folder;
            this.searchedWord = searchedWord;
        }

        @Override
        protected Long compute() {
            long count = 0L;
            List<RecursiveTask<Long>> forks = new LinkedList<>();
            // 拆分任務
            for (Folder subFolder : folder.getSubFolders()) {
                FolderSearchTask task = new FolderSearchTask(subFolder, searchedWord);
                forks.add(task);
                task.fork();
            }
            // 拆分任務
            for (Document document : folder.getDocuments()) {
                DocumentSearchTask task = new DocumentSearchTask(document, searchedWord);
                forks.add(task);
                task.fork();
            }
            // 合併結果
            for (RecursiveTask<Long> task : forks) {
                count = count + task.join();
            }
            // 返回詞數目
            return count;
        }
    }

    /**
     * 多任務統計
     */
    private final ForkJoinPool forkJoinPool = new ForkJoinPool();

    Long countOccurrencesInParallel(Folder folder, String searchedWord) {
        return forkJoinPool.invoke(new FolderSearchTask(folder, searchedWord));
    }

    public static void main(String[] args) throws IOException {
        // 單詞統計對象
        WordCounter wordCounter = new WordCounter();
        // 生成Folder對象
        Folder folder = Folder.fromDirectory(new File("E:\\eclipse38_wp\\c6713_gpio_test"));

        // 重複統計次數
        final int repeatCount = Integer.decode("8");
        long counts;
        long startTime;
        long stopTime;

        long[] singleThreadTimes = new long[repeatCount];
        long[] forkedThreadTimes = new long[repeatCount];

        for (int i = 0; i < repeatCount; i++) {
            startTime = System.currentTimeMillis();
            // 單線程統計 device 出現的次數
            counts = wordCounter.countOccurrencesOnSingleThread(folder,"device");
            stopTime = System.currentTimeMillis();
            singleThreadTimes[i] = (stopTime - startTime);
            System.out.println(counts + " , single thread search took " + singleThreadTimes[i]
                    + "ms");
        }

        for (int i = 0; i < repeatCount; i++) {
            startTime = System.currentTimeMillis();
            counts = wordCounter.countOccurrencesInParallel(folder, "device");
            stopTime = System.currentTimeMillis();
            forkedThreadTimes[i] = (stopTime - startTime);
            System.out
                    .println(counts + " , fork / join search took " + forkedThreadTimes[i] + "ms");
        }

        System.out.println("\nCSV Output:\n");
        System.out.println("Single thread,Fork/Join");
        for (int i = 0; i < repeatCount; i++) {
            System.out.println(singleThreadTimes[i] + "," + forkedThreadTimes[i]);
        }
        System.out.println();
    }

}
  • 輸出結果
10 , single thread search took 147ms
10 , single thread search took 35ms
10 , single thread search took 12ms
10 , single thread search took 10ms
10 , single thread search took 12ms
10 , single thread search took 10ms
10 , single thread search took 11ms
10 , single thread search took 11ms
10 , fork / join search took 8ms
10 , fork / join search took 12ms
10 , fork / join search took 9ms
10 , fork / join search took 6ms
10 , fork / join search took 8ms
10 , fork / join search took 6ms
10 , fork / join search took 7ms
10 , fork / join search took 8ms

CSV Output:

Single thread,Fork/Join
147,8
35,12
12,9
10,6
12,8
10,6
11,7
11,8
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章