小测试-线程池

参考：《Java 网络编程》。

有很多数据结构可以作为池，不过队列可能是确保任务以先进先出顺序来执行的最有效的一种数据结构。但是，不管哪种数据结构实现池，都必须要非常注意同步的问题。

因为会有很多线程同时与之交互。避免这个问题最简单的方式就是使用java.util.Vector或者Java集合中API中的同步Collection。ArrayList：List list = Collections.synchronizedList（new ArrayList（））；

例如我们希望使用GZIPOutputStream压缩当前牡蛎中的每一个文件。一方面，IO的操作繁重，每一个文件都必须进行读写，另一方面，数据压缩是“CPU密集”度很高的操作。

每个客户端线程将压缩文件，主线程主要是确定哪些文件需要压缩，然后加入到池中。主线程的速度可能会大大快于压缩线程，因为主线程的任务就是列出目录文件。因此先填充线程池。

GZipThread类，包含一个自由字段pool，它包含对池的引用。这个字段为List，run（）方法从池中删除File对象，压缩每一个文件。当线程准备从池中获得新文件是，如果吃为空，线程就会等待pool对象。pool.wait()。被唤醒是notifyAll（）。

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.zip.GZIPOutputStream;

/**
 * 一）实现线程池最简单的方法是在第一次创建池时分配固定数量的线程。
 * 当池空时，每个线程都在池中等待。当向池中添加一项任务时，所有等待的线程都得到通知。
 * 当一个线程结束其分配的任务时，它就再回到池中获取新的任务。
 * 如果没有得到，他就等待，直到新的任务添加到池中位置。
 * 
 * 二）另一种方法就是将线程本身放到池中，让主程序从池中取出线程，为其分配任务。
 * 如果必须要完成一项任务，而池中没有（可用的）线程时，主程序就生成一个新的线程。
 * 每个线程结束后都会返回到池中。
 * （把这种机制想象为一个联盟大厅，
 * 	只有在当前所有成员都被雇佣时，
 * 	才会有新的工作人员加入联盟。）
 * 
 * @author 00
 *
 */
public class GZipThread extends Thread{

	private List pool;
	private static int filesCompressed = 0;
	
	public GZipThread(List pool){
		this.pool = pool;
	}
	
	private static synchronized void incrementFilesCompressed(){
		filesCompressed ++;
	}
	
	public void run(){
		while(filesCompressed != GZipAllFiles.getNumberOfFilesToBeCompressed()){
			
			File input = null;
			
			synchronized (pool){
				while(pool.isEmpty()){
					if(filesCompressed == GZipAllFiles.getNumberOfFilesToBeCompressed()){
						System.out.println("Thresd ending...");
						return; 
					}
					try{
						pool.wait();
					}catch(InterruptedException e){
						
					}
				}
				input = (File)pool.remove(pool.size() - 1);
				incrementFilesCompressed();
			}
			
			//不压缩已经压缩过的文件
			if(!input.getName().endsWith(".gz")){
				try {
					InputStream in = new FileInputStream(input);
					in = new BufferedInputStream(in);
				
					File output = new File(input.getParent(), input.getName() + ".gz");
					if(!output.exists()){ //不覆盖已经存在的文件
						OutputStream out = new FileOutputStream(output);
						out = new GZIPOutputStream(out);
						out = new BufferedOutputStream(out);
						int b;
						while((b = in.read()) != -1){
							out.write(b);
						}
						out.flush();
						out.close();
						in.close();
					}
				} catch (IOException e){
					System.err.println(e);
				}
			}
		}
	}
}

GZipAllFiles是主线程，构造了线程池Vector对象，将其传递给新高早的GZipThread对象，启动了四个线程，处理文件和目录。这些文件会被添加到池中，最后会有这四个线程来处理。

import java.io.File;
import java.util.Vector;

public class GZipAllFiles {

	public final static int THREAD_COUNT = 4;
	private static int filesToBeCompressed = -1;
	
	public static void main(String[] args){
		
		String[] filesName = {"D://outdata"};
		
		Vector pool = new Vector();
		GZipThread[] threads = new GZipThread[THREAD_COUNT];
		
		for(int i=0; i<threads.length; i++){
			threads[i] = new GZipThread(pool);
			threads[i].start();
		}
		
		int totalFiles = 0;
		for(int i=0; i<filesName.length; i++){
			File f = new File(filesName[i]);
			if(f.exists()){
				if(f.isDirectory()){
					File[] files = f.listFiles();
					for(int j=0; j<files.length; j++){
						totalFiles ++;
						synchronized(pool){
							pool.add(0, files[j]);
							pool.notifyAll();
						}
					}
				}
			}
			else {
				totalFiles ++;
				synchronized (pool) {
					pool.add(0, f);
					pool.notifyAll();
				}
			}
		}
		
		filesToBeCompressed = totalFiles;
		
		//确保让所有等待线程知道没有更多的文件会添加到线程池中；
		for (int i=0; i<threads.length; i++) {
			threads[i].interrupt();
		}
	}
	
	public static int getNumberOfFilesToBeCompressed() {
		return filesToBeCompressed;
	}
}