用ForkJoin框架實現的統計詞頻和一般單線程統計的性能比較
源碼如下:
- Folder.java
package ForkJoin;
import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
/**
* Folder 是磁盤文件夾模型
* @author TreeNode
*
*/
public class Folder {
// 子文件夾
private final List<Folder> subFolders;
private final List<Document> documents;
Folder(List<Folder> subFolders, List<Document> documents) {
this.subFolders = subFolders;
this.documents = documents;
}
public List<Folder> getSubFolders() {
return subFolders;
}
public List<Document> getDocuments() {
return documents;
}
static Folder fromDirectory(File dir) throws IOException {
List<Document> documents = new LinkedList<>();
List<Folder> subFolders = new LinkedList<>();
for (File entry : dir.listFiles()) {
if (entry.isDirectory()) {
subFolders.add(Folder.fromDirectory(entry));
} else {
documents.add(Document.fromFile(entry));
}
}
return new Folder(subFolders, documents);
}
}
- Document.java
package ForkJoin;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
public class Document {
private final List<String> lines;
public Document(List<String> lines) {
this.lines = lines;
}
public List<String> getLines() {
return lines;
}
static Document fromFile(File file) throws FileNotFoundException, IOException{
// 是有序的
List<String> lines = new LinkedList<>();
try(BufferedReader reader = new BufferedReader(new FileReader(file))){
String line = reader.readLine();
while(line != null){
lines.add(line);
line = reader.readLine();
}
}
return new Document(lines);
}
}
- WordCounter.java
package ForkJoin;
import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;
public class WordCounter {
// 分詞
String[] wordsIn(String line) {
return line.trim().split("(\\s|\\p{Punct})+");
}
// 統計
Long occurrencesCount(Document document, String searchedWord) {
long count = 0;
for (String line : document.getLines()) {
for (String word : wordsIn(line)) {
if (searchedWord.equals(word)) {
count = count + 1;
}
}
}
return count;
}
/**
* 單線程統計,遞歸遍歷文件夾,分別統計每個文件夾下文件中詞頻
* @param folder 搜索的文件夾
* @param searchedWord 要搜索的單詞
* @return
*/
Long countOccurrencesOnSingleThread(Folder folder, String searchedWord) {
long count = 0;
for (Folder subFolder : folder.getSubFolders()) {
count = count + countOccurrencesOnSingleThread(subFolder, searchedWord);
}
for (Document document : folder.getDocuments()) {
count = count + occurrencesCount(document, searchedWord);
}
return count;
}
/**
* 在文件中搜索詞頻
* @author TreeNode
*
*/
class DocumentSearchTask extends RecursiveTask<Long> {
private final Document document;
private final String searchedWord;
DocumentSearchTask(Document document, String searchedWord) {
super();
this.document = document;
this.searchedWord = searchedWord;
}
@Override
protected Long compute() {
return occurrencesCount(document, searchedWord);
}
}
/**
* 文件夾搜索任務
* @author TreeNode
*
*/
class FolderSearchTask extends RecursiveTask<Long> {
private final Folder folder;
private final String searchedWord;
FolderSearchTask(Folder folder, String searchedWord) {
super();
this.folder = folder;
this.searchedWord = searchedWord;
}
@Override
protected Long compute() {
long count = 0L;
List<RecursiveTask<Long>> forks = new LinkedList<>();
// 拆分任務
for (Folder subFolder : folder.getSubFolders()) {
FolderSearchTask task = new FolderSearchTask(subFolder, searchedWord);
forks.add(task);
task.fork();
}
// 拆分任務
for (Document document : folder.getDocuments()) {
DocumentSearchTask task = new DocumentSearchTask(document, searchedWord);
forks.add(task);
task.fork();
}
// 合併結果
for (RecursiveTask<Long> task : forks) {
count = count + task.join();
}
// 返回詞數目
return count;
}
}
/**
* 多任務統計
*/
private final ForkJoinPool forkJoinPool = new ForkJoinPool();
Long countOccurrencesInParallel(Folder folder, String searchedWord) {
return forkJoinPool.invoke(new FolderSearchTask(folder, searchedWord));
}
public static void main(String[] args) throws IOException {
// 單詞統計對象
WordCounter wordCounter = new WordCounter();
// 生成Folder對象
Folder folder = Folder.fromDirectory(new File("E:\\eclipse38_wp\\c6713_gpio_test"));
// 重複統計次數
final int repeatCount = Integer.decode("8");
long counts;
long startTime;
long stopTime;
long[] singleThreadTimes = new long[repeatCount];
long[] forkedThreadTimes = new long[repeatCount];
for (int i = 0; i < repeatCount; i++) {
startTime = System.currentTimeMillis();
// 單線程統計 device 出現的次數
counts = wordCounter.countOccurrencesOnSingleThread(folder,"device");
stopTime = System.currentTimeMillis();
singleThreadTimes[i] = (stopTime - startTime);
System.out.println(counts + " , single thread search took " + singleThreadTimes[i]
+ "ms");
}
for (int i = 0; i < repeatCount; i++) {
startTime = System.currentTimeMillis();
counts = wordCounter.countOccurrencesInParallel(folder, "device");
stopTime = System.currentTimeMillis();
forkedThreadTimes[i] = (stopTime - startTime);
System.out
.println(counts + " , fork / join search took " + forkedThreadTimes[i] + "ms");
}
System.out.println("\nCSV Output:\n");
System.out.println("Single thread,Fork/Join");
for (int i = 0; i < repeatCount; i++) {
System.out.println(singleThreadTimes[i] + "," + forkedThreadTimes[i]);
}
System.out.println();
}
}
- 輸出結果
10 , single thread search took 147ms
10 , single thread search took 35ms
10 , single thread search took 12ms
10 , single thread search took 10ms
10 , single thread search took 12ms
10 , single thread search took 10ms
10 , single thread search took 11ms
10 , single thread search took 11ms
10 , fork / join search took 8ms
10 , fork / join search took 12ms
10 , fork / join search took 9ms
10 , fork / join search took 6ms
10 , fork / join search took 8ms
10 , fork / join search took 6ms
10 , fork / join search took 7ms
10 , fork / join search took 8ms
CSV Output:
Single thread,Fork/Join
147,8
35,12
12,9
10,6
12,8
10,6
11,7
11,8