1 在實習公司寫了300行的日誌分析的代碼,不得不說真是太醜了,不少問題,確實還差的很遠!
</pre><pre code_snippet_id="1752919" snippet_file_name="blog_20160708_3_6683349" name="code" class="java">package meachine_learning;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class countOfToken {
public static void main(String[] args) throws IOException, ParseException {
String path1 = "C:\\Users\\liuchaoqun\\Desktop\\log\\eros_extract1.log";
String path2 = "C:\\Users\\liuchaoqun\\Desktop\\log\\eros_extract2.log";
/*BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
System.out.println("請輸入第一個文件的絕對路徑:");
String path1 = br.readLine();
System.out.println("請輸入第二個文件的絕對路徑:");
String path2 = br.readLine();
//br.close();
*/
Set<String>set1 = getCount(path1);
Set<String>set2 = getCount(path2);
Set<String> finallySet = Union(set1,set2);
System.out.println("token去重後統計的結果總數爲:" + finallySet.size());
System.out.println("第一個文件去重後的結果:<day,set<String>>");
HashMap<String, Set<String>> map1 = getCountByDay(path1);
System.out.println("第二個文件去重後的結果:<day,set<String>>");
HashMap<String, Set<String>> map2 = getCountByDay(path2);
//合併去重後的集合
HashMap<String,Set<String>> res = HashMapUnion(map1,map2);
List<Map.Entry<String,Set<String>>> resInformation= new ArrayList<Map.Entry<String,Set<String>>>(res.entrySet());
Collections.sort(resInformation,new Comparator<Map.Entry<String, Set<String>>>() {
@Override
public int compare(Entry<String, Set<String>> o1, Entry<String, Set<String>> o2) {
// TODO Auto-generated method stub
return o1.getKey().compareTo(o2.getKey());
}
});
System.out.println("去重後統計結果個數打印:");
for(int i = 0;i < resInformation.size();++i) {
System.out.println("時間:2016" + resInformation.get(i).getKey() +":數量"+ resInformation.get(i).getValue().size());
}
/***********************************************************************************/
System.out.println("以下是給定該年起始時間再按周統計的結果打印,起始時間爲\"0524\":");
System.out.println("請輸入起始時間,比如:0524表示5月24日");
BufferedReader readTime = new BufferedReader(new InputStreamReader(System.in));
String startTime = readTime.readLine();
readTime.close();
printByWeek(resInformation,startTime);
/**************************************************************************************************/
System.out.println("以下是對於每天不去重相關結果的統計:");
System.out.println("統計第一個文件不去重token後的結果:<day,Integer>");
HashMap<String, Integer> mapA = getCountByDay2(path1);
System.out.println("統計第一個文件不去重token後的結果:<day,Integer>");
HashMap<String, Integer> mapB = getCountByDay2(path2);
//合併不重複集合,Integer相加;
HashMap<String,Integer> ans = HashMapUnionByNumber(mapA,mapB);
List<Map.Entry<String,Integer>> information = new ArrayList<Map.Entry<String,Integer>>(ans.entrySet());
Collections.sort(information,new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
// TODO Auto-generated method stub
return o1.getKey().compareTo(o2.getKey());
}
});
System.out.println("不重複最終結果打印:");
for(int i = 0;i < information.size();++i) {
System.out.println("時間:2016" + information.get(i).getKey() +":數量"+ information.get(i).getValue());
}
}
/*
* 打印最終的結果;
*/
public static void printByWeek(List<Entry<String, Set<String>>> infoIds,String startTime) throws IOException, ParseException {
//這裏表示一年中最多有54周
Set<String>[] week = new Set[54];
SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMdd");
SimpleDateFormat w = new SimpleDateFormat("w");
boolean flag = false;
for(int i = 0;i < infoIds.size();++i) {
// pf:startTime = "0524";
if(infoIds.get(i).getKey().equals(startTime)) {
flag = true;
}
if(flag) {
String dateTime = "2016" + infoIds.get(i).getKey();
Date date = dateFormatter.parse(dateTime);
int weekNum = Integer.parseInt(w.format(date));
//解析周的參數
Set<String> set = week[weekNum];
if(set == null){
week[weekNum] = infoIds.get(i).getValue();
}else{
//集合合併
Set<String> unionSet = Union(set, infoIds.get(i).getValue());
week[weekNum] = unionSet;
}
}
}
System.out.println("這裏是從20160524開始統計的結果:");
for (int i = 0;i < week.length;++i) {
if(week[i] != null) {
System.out.println("這是該年第"+ i+"周的統計結果:"+ week[i].size());
}
}
}
/*
* 給定路徑path,獲取文件去重後token個數;
*/
public static Set<String> getCount(String path) throws IOException {
File file = new File(path);
BufferedReader reader = null;
Set<String> set = new HashSet<String>();
try {
System.out.println("以行爲單位讀取文件內容,一次讀一整行:");
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
reader = new BufferedReader(isr);
String tempString = null;
Map<String,Object> map = new HashMap<String, Object>();
String regex = "\"token\":\"[a-z0-9]*\"";
Pattern pattern = Pattern.compile(regex);
while((tempString = reader.readLine()) != null) {
Matcher m = pattern.matcher(tempString);
while(m.find()){
set.add(m.group(0));
}
}
isr.close();
} finally {
System.out.println("success!");
}
return set;
}
/*
* set union 集合合併;
*/
public static Set<String> Union(Set<String>setA,Set<String>setB) {
setB.addAll(setA);
return setB;
}
/*
* HashMap union, Hashmap合併
*/
public static HashMap<String,Set<String>> HashMapUnion(HashMap<String,Set<String>>setA,HashMap<String,Set<String>>setB) {
//統計最後的結果爲:
HashMap<String, Set<String>> ans = new HashMap<String,Set<String>>();
Set<String> keySetA = setA.keySet();
for(String key:keySetA){
if(!ans.containsKey(key)){
ans.put(key, setA.get(key));
}else{
//合併集合
ans.put(key,Union(setA.get(key),ans.get(key)));
}
}
Set<String> keySetB = setB.keySet();
for(String key:keySetB){
if(!ans.containsKey(key)){
ans.put(key, setB.get(key));
}else{
//合併集合
ans.put(key,Union(setB.get(key),ans.get(key)));
}
}
return ans;
}
/*
* 按照每日的數據進行去重
*/
public static HashMap<String, Set<String>> getCountByDay(String path) throws IOException {
HashMap<String,Set<String>> map = new HashMap<String,Set<String>>();
File file = new File(path);
BufferedReader reader = null;
try {
System.out.println("以行爲單位讀取文件內容,一次讀一整行:");
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
reader = new BufferedReader(isr);
String tempString = null;
//一次讀入一行,直到讀入null爲文件結束
String regex = "\"token\":\"[a-z0-9]*\"";
Pattern pattern = Pattern.compile(regex);
while((tempString = reader.readLine()) != null) {
if(tempString.length() >=5){
String dateTime = tempString.substring(1,5);
if(dateTime.charAt(dateTime.length()-1) >= '0' && dateTime.charAt(dateTime.length()-1) <= '9'){
if(!map.containsKey(dateTime)) {
Set<String> tempSet = new HashSet();
map.put(dateTime,tempSet);
}
Matcher m = pattern.matcher(tempString);
while(m.find()){
Set s = map.get(dateTime);
s.add(m.group(0));
map.put(dateTime,s);
}
}
}
}
isr.close();
} finally{
System.out.println("success!");
}
return map;
}
/*
*按照每日的數據不去重
*/
public static HashMap<String, Integer> getCountByDay2(String path) throws IOException {
HashMap<String,List<String>> map = new HashMap<String,List<String>>();
HashMap<String, Integer> ansMap = new HashMap<String, Integer>();
File file = new File(path);
BufferedReader reader = null;
Set<String> set = new HashSet<String>();
try {
System.out.println("以行爲單位讀取文件內容,一次讀一整行:");
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
reader = new BufferedReader(isr);
String tempString = null;
//一次讀入一行,直到讀入null爲文件結束
String regex = "\"token\":\"[a-z0-9]*\"";
Pattern pattern = Pattern.compile(regex);
while((tempString = reader.readLine()) != null) {
if(tempString.length() >=5){
String dateTime = tempString.substring(1,5);
if(dateTime.charAt(dateTime.length()-1) >= '0' && dateTime.charAt(dateTime.length()-1) <= '9'){
if(!map.containsKey(dateTime)) {
List<String> list = new ArrayList<String>();
map.put(dateTime,list);
}
Matcher m = pattern.matcher(tempString);
while(m.find()){
List<String> s = map.get(dateTime);
s.add(m.group(0));
map.put(dateTime,s);
}
//map.put(dateTime,(map.get(dateTime)+set.size()));
}
}
}
isr.close();
} finally {
System.out.println("success!");
Set<String> s = map.keySet();
for (String key : s) {
ansMap.put(key,map.get(key).size());
}
}
return ansMap;
}
/*
* Union HashMap by number;
*/
public static HashMap<String,Integer> HashMapUnionByNumber(HashMap<String,Integer> mapA,HashMap<String,Integer> mapB) {
HashMap<String, Integer> map = new HashMap<String,Integer>();
Set<String> setA = mapA.keySet();
for (String key : setA) {
if(!map.containsKey(key)){
map.put(key, mapA.get(key));
}else {
int tempNum = map.get(key);
map.put(key,(tempNum + mapA.get(key)));
}
}
Set<String> setB = mapB.keySet();
for (String key : setB) {
if(!map.containsKey(key)) {
map.put(key, mapB.get(key));
}else {
int tempNum = map.get(key);
map.put(key,(tempNum + mapB.get(key)));
}
}
return map;
}
}
</pre><pre code_snippet_id="1752919" snippet_file_name="blog_20160708_5_1111215" name="code" class="java"> 南無大慈大悲觀世音菩薩