package com.huaban.analysis.jieba.test;
import java.util.Iterator;
import java.util.List;
import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.SegToken;
import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
public class Test {
public static void main(String[] args) {
(new Test()).testDemo();
}
public void testDemo() {
JiebaSegmenter segmenter = new JiebaSegmenter();
// lots of sentences
String[] sentences =
new String[] {"這是一個伸手不見五指的黑夜。我叫孫悟空,我愛北京,我愛Python和C++。", "我不喜歡日本和服。", "雷猴迴歸人間。",
"工信處女幹事每月經過下屬科室都要親口交代交換機等技術性器件的安裝工作", "結果婚的和尚未結過婚的"};
for (String sentence : sentences) {
System.out.println(segmenter.process(sentence, SegMode.INDEX).toString());
}
// one sentence
String sentence = "這是一個伸手不見五指的黑夜。我叫孫悟空,我愛北京,我愛Python和C++。";
System.out.println(segmenter.process(sentence, SegMode.INDEX).toString());
// just get yourself format
List<SegToken> resultList = segmenter.process(sentence, SegMode.INDEX);
Iterator<SegToken> it = resultList.iterator();
if (!it.hasNext())
return ;
StringBuilder sb = new StringBuilder();
while (it.hasNext()) {
SegToken s = it.next();
if(!" ".equals(s.getWord())){
sb.append(s.getWord()).append(',');
}
}
System.out.println(sb.toString());
}
}
(PS:原項目已打包上傳)