統計文本文件中單詞出現頻率,自己編寫的Java小程序

要求:

從文本文件中讀取文本,文本文件名被作爲命令行參數傳遞,統計單詞不區分大小寫。

1.以字典序顯示輸出,每個單詞後面緊跟它的出現次數

2.出現頻率最高的單詞的統計

******************************************************************************************

WordCont類代碼如下:

package wordcont;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import wordcont.WordEntity;

public class WordCont {

	public void displayWordCount(String fileName){
		//字符統計
    	try {
			BufferedReader reader = new BufferedReader(new FileReader(fileName));
			String line = null;
	        TreeMap<String,Integer> tm = new TreeMap<String,Integer>();
	        
	        while((line=reader.readLine())!=null){
	        	line = line.toLowerCase();
	        	String str[] = line.split("\\s+");
	        	for(int i = 0; i<str.length; i++){
	        		String word = str[i].trim();
	        		if(tm.containsKey(word)){
	        			tm.put(word, tm.get(word)+1);
	        		}else{
	        			tm.put(word, 1);
	        		}
	        	}
	        }
	        //輸出我們想要的字符串格式
            System.out.println("按字典序輸出爲:");
            Iterator iterator=tm.entrySet().iterator();
            while(iterator.hasNext())
            {
                System.out.println(iterator.next());
            }
			
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}catch (IOException e) {
			e.printStackTrace();
		}
    }
    
    public void displayFrequencyWord(String fileName){
    	//顯示輸出
    	try {
            BufferedReader br = new BufferedReader(new FileReader(fileName));
            String s;
            StringBuffer sb = new StringBuffer();
            while ((s = br.readLine()) != null) {
                sb.append(s);
            }
            
            Map<String,Integer> map = new HashMap<String, Integer>();
            StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
            while (st.hasMoreTokens()) {
                String letter = st.nextToken().trim();
                int count;
                if (!map.containsKey(letter)) {
                    count = 1;
                } else {
                    count = map.get(letter).intValue() + 1;
                }
                map.put(letter,count);
            }
            
            Set<WordEntity> set = new TreeSet<WordEntity>();
            for (String key : map.keySet()) {
                set.add(new WordEntity(key,map.get(key)));
            }

            System.out.println("出現頻率最高的單詞:");
            Iterator<WordEntity> it1 = set.iterator();
            int count=it1.next().getCount();
            for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
                WordEntity w = it.next();
                
                if (w.getCount()==count)// 當輸出3個後跳出循環
                    //break;
                
                System.out.println(w.getKey() + " 出現的次數爲: "+ w.getCount());
                
            }
        } catch (FileNotFoundException e) {
            System.out.println("文件未找到~!");
        } catch (IOException e) {
            System.out.println("文件讀異常~!");
        }

    }

}
WordEntity類代碼如下:

package wordcont;

import wordcont.WordEntity;

public class WordEntity implements Comparable<WordEntity>{
	@Override
	public int compareTo(WordEntity o) {
		int cmp = count.intValue() - o.count.intValue();
		return (cmp == 0 ? key.compareTo(o.key) : -cmp);
		//只需在這兒加一個負號就可以決定是升序還是降序排列  -cmp降序排列,cmp升序排列
		//因爲TreeSet會調用WorkForMap的compareTo方法來決定自己的排序
	}

	private String key;
	private Integer count;

	public WordEntity ( String key,Integer count) {
		this.key = key;
		this.count = count;
	}

	public WordEntity(){

	}

	@Override
	public String toString() {
		return key + " 出現的次數爲:" + count;
	}

	public String getKey() {
		return key;
	}

	public Integer getCount() {
		return count;
	}
}
Main主函數代碼如下:
package wordcont;

import java.util.Scanner;

import wordcont.WordCont;

public class Main {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		System.out.println("輸入文件路徑:\n");			
		Scanner in=new Scanner(System.in);
		String line=in.nextLine();
		String fileName= line.trim();
		WordCont wc = new WordCont();
		wc.displayWordCount(fileName);
		wc.displayFrequencyWord(fileName);
	}

}
以上代碼僅是自己寫的一些小程序,可能存在一些冗餘的部分,沒有進行修改,如果需要可以自行拷貝和修改。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章