日誌分析---勉勵自己

1 在實習公司寫了300行的日誌分析的代碼,不得不說真是太醜了,不少問題,確實還差的很遠!
</pre><pre code_snippet_id="1752919" snippet_file_name="blog_20160708_3_6683349" name="code" class="java">package meachine_learning;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class countOfToken {
	
	public static void main(String[] args) throws IOException, ParseException {
		
	    String path1 = "C:\\Users\\liuchaoqun\\Desktop\\log\\eros_extract1.log";
	    String path2 = "C:\\Users\\liuchaoqun\\Desktop\\log\\eros_extract2.log";
	    /*BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
	    System.out.println("請輸入第一個文件的絕對路徑:");
	    String path1 = br.readLine();
	    System.out.println("請輸入第二個文件的絕對路徑:");
	    String path2 = br.readLine();
	    //br.close();
	    */
	    Set<String>set1 = getCount(path1);
	    Set<String>set2 = getCount(path2);
	    Set<String> finallySet = Union(set1,set2);
	    System.out.println("token去重後統計的結果總數爲:" + finallySet.size());
	   
		System.out.println("第一個文件去重後的結果:<day,set<String>>");
		HashMap<String, Set<String>> map1 = getCountByDay(path1);
		
		System.out.println("第二個文件去重後的結果:<day,set<String>>");
		HashMap<String, Set<String>> map2 = getCountByDay(path2);
		
	    //合併去重後的集合
		HashMap<String,Set<String>> res = HashMapUnion(map1,map2);
		
		List<Map.Entry<String,Set<String>>> resInformation= new ArrayList<Map.Entry<String,Set<String>>>(res.entrySet());
		Collections.sort(resInformation,new Comparator<Map.Entry<String, Set<String>>>() {
			@Override
			public int compare(Entry<String, Set<String>> o1, Entry<String, Set<String>> o2) {
				// TODO Auto-generated method stub
				return o1.getKey().compareTo(o2.getKey());
			}
		});
		
		System.out.println("去重後統計結果個數打印:");
		for(int i = 0;i < resInformation.size();++i) {
			System.out.println("時間:2016" + resInformation.get(i).getKey() +":數量"+ resInformation.get(i).getValue().size());
		}
		
		/***********************************************************************************/
		
		System.out.println("以下是給定該年起始時間再按周統計的結果打印,起始時間爲\"0524\":");
		System.out.println("請輸入起始時間,比如:0524表示5月24日");
		BufferedReader readTime =  new BufferedReader(new InputStreamReader(System.in));
		String startTime = readTime.readLine();
		readTime.close();
		printByWeek(resInformation,startTime);
		
       /**************************************************************************************************/	
	    System.out.println("以下是對於每天不去重相關結果的統計:");
	    System.out.println("統計第一個文件不去重token後的結果:<day,Integer>");
	    
		HashMap<String, Integer> mapA = getCountByDay2(path1);
		System.out.println("統計第一個文件不去重token後的結果:<day,Integer>");
		HashMap<String, Integer> mapB = getCountByDay2(path2);
		
	    //合併不重複集合,Integer相加;
		HashMap<String,Integer> ans = HashMapUnionByNumber(mapA,mapB);
		
		List<Map.Entry<String,Integer>> information = new ArrayList<Map.Entry<String,Integer>>(ans.entrySet());
		Collections.sort(information,new Comparator<Map.Entry<String, Integer>>() {
			@Override
			public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
				// TODO Auto-generated method stub
				return o1.getKey().compareTo(o2.getKey());
			}
		});
		
		System.out.println("不重複最終結果打印:");
		for(int i = 0;i < information.size();++i) {
			System.out.println("時間:2016" + information.get(i).getKey() +":數量"+ information.get(i).getValue());
		}
	}
	

    /*
     * 打印最終的結果;
    */ 
	public static void printByWeek(List<Entry<String, Set<String>>> infoIds,String startTime) throws IOException, ParseException {  
		//這裏表示一年中最多有54周
		Set<String>[] week = new Set[54];
		SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMdd");
		SimpleDateFormat w = new SimpleDateFormat("w");
	    boolean flag = false;
		for(int i = 0;i < infoIds.size();++i) {
			// pf:startTime = "0524";
			if(infoIds.get(i).getKey().equals(startTime)) {
				flag = true;
			}
			if(flag) {
				String dateTime = "2016" + infoIds.get(i).getKey();
				Date date = dateFormatter.parse(dateTime);				
				int weekNum = Integer.parseInt(w.format(date));	
				//解析周的參數
				Set<String> set = week[weekNum];
				if(set == null){
					week[weekNum] = infoIds.get(i).getValue();
				}else{
					//集合合併
					Set<String> unionSet = Union(set, infoIds.get(i).getValue());
					week[weekNum] = unionSet;
				}	
			}
		}
		System.out.println("這裏是從20160524開始統計的結果:");
		for (int i = 0;i < week.length;++i) {
			if(week[i] != null) {
				System.out.println("這是該年第"+ i+"周的統計結果:"+ week[i].size());
			}
		}		
	}
	
	/*
	 * 給定路徑path,獲取文件去重後token個數;
	*/
	public static Set<String> getCount(String path) throws IOException {
		File file = new File(path);
		BufferedReader reader = null;
		Set<String> set = new HashSet<String>();	
		try {  
            System.out.println("以行爲單位讀取文件內容,一次讀一整行:");  
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file));  
            reader = new BufferedReader(isr);  
            String tempString = null;  
            Map<String,Object> map = new HashMap<String, Object>();  
            String regex = "\"token\":\"[a-z0-9]*\"";
            Pattern pattern = Pattern.compile(regex);
            while((tempString = reader.readLine()) != null) {
            	Matcher m = pattern.matcher(tempString);
            	while(m.find()){
            		set.add(m.group(0));
            	}
            }
            isr.close();
	    } finally {
	    	System.out.println("success!");
	    }
		return set;
    }
	
	/*
	 * set union 集合合併;
	*/
	public static Set<String> Union(Set<String>setA,Set<String>setB) {
		  setB.addAll(setA);
		  return setB;
	}
	
	/*
	 * HashMap union, Hashmap合併   
	*/
	public static HashMap<String,Set<String>> HashMapUnion(HashMap<String,Set<String>>setA,HashMap<String,Set<String>>setB) {
		
		//統計最後的結果爲:
		HashMap<String, Set<String>> ans = new HashMap<String,Set<String>>();
		Set<String> keySetA = setA.keySet();
		for(String key:keySetA){
			if(!ans.containsKey(key)){
			   ans.put(key, setA.get(key));
			}else{
				//合併集合
				ans.put(key,Union(setA.get(key),ans.get(key)));
			}
		}
		
		Set<String> keySetB = setB.keySet();
		for(String key:keySetB){
			if(!ans.containsKey(key)){
			   ans.put(key, setB.get(key));
			}else{
				//合併集合
			   ans.put(key,Union(setB.get(key),ans.get(key)));
			}
		}
		return ans;
	}
	
	
	/*
	 * 按照每日的數據進行去重
	*/
	public static HashMap<String, Set<String>> getCountByDay(String path) throws IOException {
		HashMap<String,Set<String>> map = new HashMap<String,Set<String>>();
		File file = new File(path);
		BufferedReader reader = null;	
		try {  
            System.out.println("以行爲單位讀取文件內容,一次讀一整行:");  
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file));  
            reader = new BufferedReader(isr);  
            String tempString = null;  
            
            //一次讀入一行,直到讀入null爲文件結束
            String regex = "\"token\":\"[a-z0-9]*\"";
            Pattern pattern = Pattern.compile(regex);
            
            while((tempString = reader.readLine()) != null) {
            	if(tempString.length() >=5){
            		String dateTime = tempString.substring(1,5);
    	        	if(dateTime.charAt(dateTime.length()-1) >= '0' && dateTime.charAt(dateTime.length()-1) <= '9'){
    	        	    if(!map.containsKey(dateTime)) {
    	        	    	Set<String> tempSet = new HashSet();
    	        	    	map.put(dateTime,tempSet);
    	        	    }
    	        		Matcher m = pattern.matcher(tempString);
    	        		while(m.find()){
    	        			Set s = map.get(dateTime);
    	        			s.add(m.group(0));
    	        			map.put(dateTime,s);
    	        		}
    	             }
            	}
            }
            isr.close();
	    } finally{   
	    	System.out.println("success!");
 	    }
		return map;
    }
	
	
	/*
	 *按照每日的數據不去重
	*/
	public static HashMap<String, Integer> getCountByDay2(String path) throws IOException {
		HashMap<String,List<String>> map = new HashMap<String,List<String>>();
	    HashMap<String, Integer> ansMap = new HashMap<String, Integer>();
		File file = new File(path);
		BufferedReader reader = null;
		Set<String> set = new HashSet<String>();	
		try {  
            System.out.println("以行爲單位讀取文件內容,一次讀一整行:");  
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file));  
            reader = new BufferedReader(isr);  
            String tempString = null;  
            //一次讀入一行,直到讀入null爲文件結束
            String regex = "\"token\":\"[a-z0-9]*\"";
            Pattern pattern = Pattern.compile(regex);
            while((tempString = reader.readLine()) != null) {
            	if(tempString.length() >=5){
            		String dateTime = tempString.substring(1,5);
    	        	if(dateTime.charAt(dateTime.length()-1) >= '0' && dateTime.charAt(dateTime.length()-1) <= '9'){
    	        	    if(!map.containsKey(dateTime)) {
    	        	    	List<String> list = new ArrayList<String>();
    	        	    	map.put(dateTime,list);
    	        	    }
    	        		Matcher m = pattern.matcher(tempString);
    	        		while(m.find()){
    	        			List<String> s = map.get(dateTime);
    	        			s.add(m.group(0));
    	        			map.put(dateTime,s);
    	        		}
    	        		//map.put(dateTime,(map.get(dateTime)+set.size()));
    	             }
            	}
            }
            isr.close();
	    } finally {
	       System.out.println("success!");
	       Set<String> s = map.keySet();
	       for (String key : s) {
	    	   ansMap.put(key,map.get(key).size());
	       }
	    }
		return ansMap;	
 	}

	/*
	 * Union HashMap by number;
	*/
	public static HashMap<String,Integer> HashMapUnionByNumber(HashMap<String,Integer> mapA,HashMap<String,Integer> mapB) {
		HashMap<String, Integer> map = new HashMap<String,Integer>();
		Set<String> setA = mapA.keySet();
		for (String key : setA) {
			if(!map.containsKey(key)){
				map.put(key, mapA.get(key));
			}else {
				int tempNum = map.get(key);
				map.put(key,(tempNum + mapA.get(key)));
			}
		}
		Set<String> setB = mapB.keySet();
		for (String key : setB) {
			if(!map.containsKey(key)) {
				map.put(key, mapB.get(key));
			}else {
				int tempNum = map.get(key);
				map.put(key,(tempNum + mapB.get(key)));
			}
		}
		return map;
	}
}
</pre><pre code_snippet_id="1752919" snippet_file_name="blog_20160708_5_1111215" name="code" class="java">                                                                                                                   南無大慈大悲觀世音菩薩


發佈了39 篇原創文章 · 獲贊 3 · 訪問量 2萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章