要求:
从文本文件中读取文本,文本文件名被作为命令行参数传递,统计单词不区分大小写。
1.以字典序显示输出,每个单词后面紧跟它的出现次数2.出现频率最高的单词的统计
******************************************************************************************
WordCont类代码如下:
package wordcont;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import wordcont.WordEntity;
public class WordCont {
public void displayWordCount(String fileName){
//字符统计
try {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String line = null;
TreeMap<String,Integer> tm = new TreeMap<String,Integer>();
while((line=reader.readLine())!=null){
line = line.toLowerCase();
String str[] = line.split("\\s+");
for(int i = 0; i<str.length; i++){
String word = str[i].trim();
if(tm.containsKey(word)){
tm.put(word, tm.get(word)+1);
}else{
tm.put(word, 1);
}
}
}
//输出我们想要的字符串格式
System.out.println("按字典序输出为:");
Iterator iterator=tm.entrySet().iterator();
while(iterator.hasNext())
{
System.out.println(iterator.next());
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
public void displayFrequencyWord(String fileName){
//显示输出
try {
BufferedReader br = new BufferedReader(new FileReader(fileName));
String s;
StringBuffer sb = new StringBuffer();
while ((s = br.readLine()) != null) {
sb.append(s);
}
Map<String,Integer> map = new HashMap<String, Integer>();
StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
while (st.hasMoreTokens()) {
String letter = st.nextToken().trim();
int count;
if (!map.containsKey(letter)) {
count = 1;
} else {
count = map.get(letter).intValue() + 1;
}
map.put(letter,count);
}
Set<WordEntity> set = new TreeSet<WordEntity>();
for (String key : map.keySet()) {
set.add(new WordEntity(key,map.get(key)));
}
System.out.println("出现频率最高的单词:");
Iterator<WordEntity> it1 = set.iterator();
int count=it1.next().getCount();
for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
if (w.getCount()==count)// 当输出3个后跳出循环
//break;
System.out.println(w.getKey() + " 出现的次数为: "+ w.getCount());
}
} catch (FileNotFoundException e) {
System.out.println("文件未找到~!");
} catch (IOException e) {
System.out.println("文件读异常~!");
}
}
}
WordEntity类代码如下:
package wordcont;
import wordcont.WordEntity;
public class WordEntity implements Comparable<WordEntity>{
@Override
public int compareTo(WordEntity o) {
int cmp = count.intValue() - o.count.intValue();
return (cmp == 0 ? key.compareTo(o.key) : -cmp);
//只需在这儿加一个负号就可以决定是升序还是降序排列 -cmp降序排列,cmp升序排列
//因为TreeSet会调用WorkForMap的compareTo方法来决定自己的排序
}
private String key;
private Integer count;
public WordEntity ( String key,Integer count) {
this.key = key;
this.count = count;
}
public WordEntity(){
}
@Override
public String toString() {
return key + " 出现的次数为:" + count;
}
public String getKey() {
return key;
}
public Integer getCount() {
return count;
}
}
Main主函数代码如下:package wordcont;
import java.util.Scanner;
import wordcont.WordCont;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
System.out.println("输入文件路径:\n");
Scanner in=new Scanner(System.in);
String line=in.nextLine();
String fileName= line.trim();
WordCont wc = new WordCont();
wc.displayWordCount(fileName);
wc.displayFrequencyWord(fileName);
}
}
以上代码仅是自己写的一些小程序,可能存在一些冗余的部分,没有进行修改,如果需要可以自行拷贝和修改。