要求:
從文本文件中讀取文本,文本文件名被作爲命令行參數傳遞,統計單詞不區分大小寫。
1.以字典序顯示輸出,每個單詞後面緊跟它的出現次數2.出現頻率最高的單詞的統計
******************************************************************************************
WordCont類代碼如下:
package wordcont;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import wordcont.WordEntity;
public class WordCont {
public void displayWordCount(String fileName){
//字符統計
try {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String line = null;
TreeMap<String,Integer> tm = new TreeMap<String,Integer>();
while((line=reader.readLine())!=null){
line = line.toLowerCase();
String str[] = line.split("\\s+");
for(int i = 0; i<str.length; i++){
String word = str[i].trim();
if(tm.containsKey(word)){
tm.put(word, tm.get(word)+1);
}else{
tm.put(word, 1);
}
}
}
//輸出我們想要的字符串格式
System.out.println("按字典序輸出爲:");
Iterator iterator=tm.entrySet().iterator();
while(iterator.hasNext())
{
System.out.println(iterator.next());
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
public void displayFrequencyWord(String fileName){
//顯示輸出
try {
BufferedReader br = new BufferedReader(new FileReader(fileName));
String s;
StringBuffer sb = new StringBuffer();
while ((s = br.readLine()) != null) {
sb.append(s);
}
Map<String,Integer> map = new HashMap<String, Integer>();
StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
while (st.hasMoreTokens()) {
String letter = st.nextToken().trim();
int count;
if (!map.containsKey(letter)) {
count = 1;
} else {
count = map.get(letter).intValue() + 1;
}
map.put(letter,count);
}
Set<WordEntity> set = new TreeSet<WordEntity>();
for (String key : map.keySet()) {
set.add(new WordEntity(key,map.get(key)));
}
System.out.println("出現頻率最高的單詞:");
Iterator<WordEntity> it1 = set.iterator();
int count=it1.next().getCount();
for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
if (w.getCount()==count)// 當輸出3個後跳出循環
//break;
System.out.println(w.getKey() + " 出現的次數爲: "+ w.getCount());
}
} catch (FileNotFoundException e) {
System.out.println("文件未找到~!");
} catch (IOException e) {
System.out.println("文件讀異常~!");
}
}
}
WordEntity類代碼如下:
package wordcont;
import wordcont.WordEntity;
public class WordEntity implements Comparable<WordEntity>{
@Override
public int compareTo(WordEntity o) {
int cmp = count.intValue() - o.count.intValue();
return (cmp == 0 ? key.compareTo(o.key) : -cmp);
//只需在這兒加一個負號就可以決定是升序還是降序排列 -cmp降序排列,cmp升序排列
//因爲TreeSet會調用WorkForMap的compareTo方法來決定自己的排序
}
private String key;
private Integer count;
public WordEntity ( String key,Integer count) {
this.key = key;
this.count = count;
}
public WordEntity(){
}
@Override
public String toString() {
return key + " 出現的次數爲:" + count;
}
public String getKey() {
return key;
}
public Integer getCount() {
return count;
}
}
Main主函數代碼如下:package wordcont;
import java.util.Scanner;
import wordcont.WordCont;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
System.out.println("輸入文件路徑:\n");
Scanner in=new Scanner(System.in);
String line=in.nextLine();
String fileName= line.trim();
WordCont wc = new WordCont();
wc.displayWordCount(fileName);
wc.displayFrequencyWord(fileName);
}
}
以上代碼僅是自己寫的一些小程序,可能存在一些冗餘的部分,沒有進行修改,如果需要可以自行拷貝和修改。