多線程讀取大數據文件

代碼改錯,已更新爲:http://blog.csdn.net/lrq1988/article/details/17954715

工作之故,要讀取一個幾十萬條的文本,就寫了這個程序,倒騰了倆天,改來改去,並不一定是最終版,姑且先記錄下來。

1、本地讀取以後改爲網絡讀取

2、timer是爲了作定時刷新

3、容器啓動時,首先加載MobileUtil.init()方法

4、多核服務器,加載會更快,根據服務器內核切割獲取的內容來組裝map

5、基於線程安全考慮,HashMap可能改爲ConcurrentHashMap

6、之所以沒用NIO,是因爲文本行數的計算在JDK6不支持,另外實現的代價又高。貌似JDK7已提供相應API。

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;

import org.apache.commons.codec.digest.DigestUtils;

//import org.apache.commons.codec.digest.DigestUtils;

public class MobileUtil {
	private static final ScheduledExecutorService timer = Executors
			.newScheduledThreadPool(Runtime.getRuntime().availableProcessors());
	private static final String fileName = "/Users/leefelix/Downloads/all.csv";
	private static long crc32;// 字符串進行crc32數據校驗
	private static String content = null;// 獲取的內容字符串
	public static HashMap<String, Location> locationMap = new HashMap<String, Location>();
	public static volatile boolean started = false;

	private MobileUtil() {
	}

	private void initial() {
		timer.scheduleWithFixedDelay(new Runnable() {
			public void run() {
				long start = System.nanoTime();
				try {
					//crc32校驗
					System.out.println("md4....");
					if(crc32 == crc32(fileName)){
						System.out.println("md5....");
						return;
					}
					crc32 = crc32(fileName);
					
					FileInputStream fi = new FileInputStream(fileName);
					InputStreamReader inreader = new InputStreamReader(fi,
							"UTF-8");
					BufferedReader reader = new BufferedReader(inreader);
					String line = null;
					long start1 = System.nanoTime();
					StringBuilder sb = new StringBuilder();
					System.out.println(start1);
					int strCount = 0;
					while ((line = reader.readLine()) != null) {
						strCount++;
						sb.append(line + "\r\n");
					}
					System.out.println("end...");
					String tmp=sb.toString();
					System.out.println("tmp length:"+tmp.length());
//					System.out.println("content length:"+content.length());
					content = sb.toString();
					System.out.println("time1:" + (System.nanoTime() - start1));
					String contentCopy = content;
					int total = contentCopy.length();
					System.out.println(total);
					// 使用的線程數量
					int threadCounts = Runtime.getRuntime()
							.availableProcessors();
					ExecutorService exec = Executors
							.newFixedThreadPool(threadCounts);
					List<Callable<HashMap<String, Location>>> callList = new ArrayList<Callable<HashMap<String, Location>>>();
					int len = strCount / threadCounts;// 平均分割strCount
					// strCount小於線程數
					if (len == 0) {
						threadCounts = strCount;// 採用一個線程處理List中的一個元素
						len = strCount / threadCounts;// 重新平均分割List
					}
					for (int i = 0; i < threadCounts; i++) {
						// 根據線程數量切割字符串爲線程數量個子字符串
						final String subContent;
						if (0 == threadCounts - 1) {
							subContent = contentCopy;
						} else {
							int startPos = i * total / threadCounts;
							int endPos = (i + 1) * total / threadCounts;
							if (i != 0)
								while (!contentCopy.substring(startPos - 2,
										startPos).endsWith("\r\n")) {
									startPos++;
								}
							if (i != threadCounts - 1)
								while (!contentCopy.substring(endPos - 2,
										endPos).endsWith("\r\n")) {
									endPos++;
								}
							subContent = contentCopy
									.substring(startPos, endPos);
						}
						callList.add(new Callable<HashMap<String, Location>>() {
							public HashMap<String, Location> call()
									throws Exception {
								String contentCopy = subContent;
								HashMap<String, Location> map = new HashMap<String, Location>();
								while (true) {
									String splitStr = null;
									int j = contentCopy.indexOf("\r\n");
									if (j < 0) {
										break;
									}
									splitStr = contentCopy.substring(0, j);
									Location lc = new Location();
									String[] arr = new String[4];
									arr[0] = splitStr.substring(0,
											splitStr.indexOf("\t")).trim();
									splitStr = splitStr.substring(splitStr
											.indexOf("\t") + 1);
									arr[1] = splitStr.substring(0,
											splitStr.indexOf("\t")).trim();
									splitStr = splitStr.substring(splitStr
											.indexOf("\t") + 1);
									arr[2] = splitStr.substring(0,
											splitStr.indexOf("\t")).trim();
									splitStr = splitStr.substring(splitStr
											.indexOf("\t") + 1);
									arr[3] = splitStr.trim();
									lc.setNum(arr[0]);
									lc.setProvince(arr[1]);
									lc.setCity(arr[2]);
									lc.setOperator(arr[3]);
									map.put(arr[0], lc);
									System.out.println(arr[1]);
									contentCopy = contentCopy.substring(j + 1);
								}
								return map;
							}
						});

						List<Future<HashMap<String, Location>>> futureList = exec
								.invokeAll(callList);
						HashMap<String, Location> result = new HashMap<String, Location>();
						for (Future<HashMap<String, Location>> future : futureList) {
							result.putAll(future.get());
						}
						locationMap = result;
						System.out.println("locationMap:"+result.size());
						started = true;
						System.out.println(true);
						System.out.println(System.nanoTime() - start);
						exec.shutdown();
					}
				} catch (FileNotFoundException e) {
					e.printStackTrace();
					System.out.println("找不到文件" + fileName + "...");
				} catch (IOException e) {
					e.printStackTrace();
					System.out.println("與文件" + fileName + "通信異常...");

				} catch (InterruptedException e) {
					e.printStackTrace();
				} catch (ExecutionException e) {
					e.printStackTrace();
				}
			}
		}, 0, 1, TimeUnit.MINUTES);
	}

	public static void init() {
		final MobileUtil mobileUtil = new MobileUtil();
		mobileUtil.initial();
	}
	public static MobileUtil create(){
		final MobileUtil mobileUtil = new MobileUtil();
		while(!started){
			if(started)break;
		}
		return mobileUtil;
	}

	// static {
	// long start = System.nanoTime();
	// // FileInputStream fis;
	// // FileChannel fc;
	// // ByteBuffer bf;
	// try {
	// if (content != null && md5Data.equals(DigestUtils.md5Hex(content))) {
	//
	// } else {
	// // fis = new FileInputStream(fileName);
	// // // 創建UTF-8/GBK符集
	// // Charset charset = Charset.forName("GBK");
	// // // 得到文件通道
	// // fc = fis.getChannel();
	// // // 分配與文件尺寸等大的緩衝區
	// // bf = ByteBuffer.allocate((int) fc.size());
	// // // 整個文件內容全讀入緩衝區,即是內存映射文件
	// // fc.read(bf);
	// // // 把緩衝中當前位置回覆爲零
	// // bf.rewind();
	// // // 輸出緩衝區中的內容
	// // content = charset.decode(bf).toString();
	// // fc.close();
	// // int strCount = 0;
	// // while (true) {
	// // int j = contentCopy.indexOf("\r\n");
	// // if (j < 0) {
	// // break;
	// // }
	// // strCount++;
	// // contentCopy = contentCopy.substring(j + 1);
	// // }
	// //之所以使用BufferedReader而不使用NIO,是爲了方便計算行數
	// int strCount = 0;
	// FileInputStream fi = new FileInputStream(fileName);
	// InputStreamReader inreader = new InputStreamReader(fi, "GBK");
	// BufferedReader reader = new BufferedReader(inreader);
	// String line = null;
	// long start1 = System.nanoTime();
	// StringBuilder sb = new StringBuilder();
	// while ((line = reader.readLine()) != null) {
	// strCount++;
	// sb.append(line+"\r\n");
	// }
	// content=sb.toString();
	// System.out.println("time1:"+(System.nanoTime()-start1));
	// // long start2 = System.nanoTime();
	// // String ss=null;
	// // while ((line = reader.readLine()) != null) {
	// // strCount++;
	// // ss+=line+"\r\n";
	// // }
	// // content=ss;
	// // System.out.println("time2:"+(System.nanoTime()-start2));
	// md5Data = DigestUtils.md5Hex(content);
	// String contentCopy = content;
	// int total = contentCopy.length();
	// // 使用的線程數量
	// int threadCounts = Runtime.getRuntime().availableProcessors();
	// ExecutorService exec = Executors
	// .newFixedThreadPool(threadCounts);
	// List<Callable<HashMap<String, Location>>> callList = new
	// ArrayList<Callable<HashMap<String, Location>>>();
	// int len = strCount / threadCounts;// 平均分割strCount
	// // strCount小於線程數
	// if (len == 0) {
	// threadCounts = strCount;// 採用一個線程處理List中的一個元素
	// len = strCount / threadCounts;// 重新平均分割List
	// }
	// for (int i = 0; i < threadCounts; i++) {
	// //根據線程數量切割字符串爲線程數量個子字符串
	// final String subContent;
	// if(0 == threadCounts -1){
	// subContent = contentCopy;
	// }else{
	// int startPos = i*total/threadCounts;
	// int endPos = (i+1)*total/threadCounts;
	// if(i!=0)
	// while(!contentCopy.substring(startPos-2,startPos).endsWith("\r\n")){
	// startPos++;
	// }
	// if(i!=threadCounts -1)
	// while(!contentCopy.substring(endPos-2,endPos).endsWith("\r\n")){
	// endPos++;
	// }
	// subContent = contentCopy.substring(startPos,endPos);
	// }
	// callList.add(new Callable<HashMap<String, Location>>() {
	// public HashMap<String, Location> call()
	// throws Exception {
	// String contentCopy = subContent;
	// HashMap<String, Location> map = new HashMap<String, Location>();
	// while (true) {
	// String splitStr = null;
	// int j = contentCopy.indexOf("\r\n");
	// if (j < 0) {
	// break;
	// }
	// splitStr = contentCopy.substring(0, j);
	// Location lc = new Location();
	// String[] arr = new String[4];
	// arr[0] = splitStr.substring(0,
	// splitStr.indexOf("\t")).trim();
	// splitStr = splitStr.substring(splitStr
	// .indexOf("\t") + 1);
	// arr[1] = splitStr.substring(0,
	// splitStr.indexOf("\t")).trim();
	// splitStr = splitStr.substring(splitStr
	// .indexOf("\t") + 1);
	// arr[2] = splitStr.substring(0,
	// splitStr.indexOf("\t")).trim();
	// splitStr = splitStr.substring(splitStr
	// .indexOf("\t") + 1);
	// arr[3] = splitStr.trim();
	// lc.setNum(arr[0]);
	// lc.setProvince(arr[1]);
	// lc.setCity(arr[2]);
	// lc.setOperator(arr[3]);
	// map.put(arr[0], lc);
	// contentCopy = contentCopy.substring(j + 1);
	// }
	// return map;
	// }
	// });
	// }
	//
	// List<Future<HashMap<String, Location>>> futureList = exec
	// .invokeAll(callList);
	// HashMap<String, Location> result = new HashMap<String, Location>();
	// for (Future<HashMap<String, Location>> future : futureList) {
	// result.putAll(future.get());
	// }
	// md5Data = DigestUtils.md5Hex(content);
	// locationMap=result;
	// System.out.println(System.nanoTime() - start);
	// exec.shutdown();
	// }
	// } catch (FileNotFoundException e) {
	// e.printStackTrace();
	// System.out.println("找不到文件" + fileName + "...");
	// } catch (IOException e) {
	// e.printStackTrace();
	// System.out.println("與文件" + fileName + "通信異常...");
	//
	// } catch (InterruptedException e) {
	// e.printStackTrace();
	// } catch (ExecutionException e) {
	// e.printStackTrace();
	// }
	// }
	public static void main(String args[]) throws IOException {
		System.out.println(getCity("13811014978"));
		System.out.println(getOperator("13811014978"));
	}

	static class Location {

		private String num;
		private String province;
		private String city;
		private String operator;

		public Location() {
		}

		public String getNum() {
			return num;
		}

		public void setNum(String num) {
			this.num = num;
		}

		public String getProvince() {
			return province;
		}

		public void setProvince(String province) {
			this.province = province;
		}

		public String getCity() {
			return city;
		}

		public void setCity(String city) {
			this.city = city;
		}

		public String getOperator() {
			return operator;
		}

		public void setOperator(String operator) {
			this.operator = operator;
		}
	}

	public static Location getLocation(String mobile) {
		mobile = getNum7(mobile);
		while(!started){
			if(started)break;
		}
		return locationMap.get(mobile);
	}

	public static String getCity(String mobile) {
		while(!started){
			if(started)break;
		}
		mobile = getNum7(mobile);
		System.out.println("mobile:"+mobile);
		if (locationMap.get(mobile) == null)
			return null;
		return locationMap.get(mobile).getCity();
	}

	public static String getProvince(String mobile) {
		mobile = getNum7(mobile);
		while(!started){
			if(started)break;
		}
		if (locationMap.get(mobile) == null)
			return null;
		return locationMap.get(mobile).getProvince();
	}

	public static String getOperator(String mobile) {
		mobile = getNum7(mobile);
		while(!started){
			if(started)break;
		}
		if (locationMap.get(mobile) == null)
			return null;
		return locationMap.get(mobile).getOperator();
	}

	private static String getNum7(String mobile) {
		mobile = mobile.trim();
		if (mobile.length() != 11 || !mobile.startsWith("1")
				|| !mobile.matches("\\d+"))
			throw new IllegalArgumentException("傳入的手機號碼" + mobile
					+ "不正確,請使用正確的11位數字號碼");
		return mobile.substring(0, 7);
	}

	private static String getContentByUri(String uri) {
		URL url;
		URLConnection urlconn;
		try {
			url = new URL(uri);
			urlconn = url.openConnection();
			HttpURLConnection httpConnection = (HttpURLConnection) urlconn;
			httpConnection.setConnectTimeout(1000000);
			httpConnection.setReadTimeout(1000000);
			httpConnection.setRequestProperty("User-Agent", "new");
			httpConnection.setRequestMethod("POST");
			InputStream in = httpConnection.getInputStream();
			BufferedReader br = new BufferedReader(new InputStreamReader(in,
					"UTF-8"));

			String line = "";
			while ((line = br.readLine()) != null) {
				System.out.println(line);
			}
			br.close();
			in.close();
			return line;
		} catch (IOException e) {
			e.printStackTrace();
			return null;
		}
	}

	private static int getNumber(String des, String reg) {
		Pattern p = Pattern.compile(reg);
		Matcher m = p.matcher(des);
		int count = 0;// 記錄個數
		while (m.find()) {
			count++;
		}
		return count;
	}

	public static String getUrlContent(String url) {
		// CloseableHttpClient httpclient = HttpClients.createDefault();
		// HttpGet httpget = new HttpGet(url);
		// CloseableHttpResponse response = null;
		String value = null;
		// try {
		// response = httpclient.execute(httpget);
		// HttpEntity entity = response.getEntity();
		// if (entity != null) {
		// value=EntityUtils.toString(entity);
		// }
		// } catch (Exception e) {
		// e.printStackTrace();
		// } finally {
		// try {
		// response.close();
		// httpclient.close();
		// } catch (IOException e) {
		// e.printStackTrace();
		// }
		// }
		return value;
	}
	
	private static long crc32(String str){
		CRC32 crc32 = new CRC32();
		byte[] data = str.getBytes();
		for(byte i=0;i<data.length;i++) {
		    data[i] = i;
		}
		crc32.update(data);
		return crc32.getValue();
	}
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章