今天遇到一個要探測文件的字符集編碼問題,查了一下資料,發現了一個 探測器: jchardet 需要 chardet.jar
例子如下:
**
* 獲取文件的編碼
*
* @param file
* @param det
* @return
* @throws
FileNotFoundException
* @throws IOException
*
private String
geestFileEncoding(File file, nsDetector det)
throws
FileNotFoundException, IOException {
det.Init(new
nsICharsetDetectionObserver() {
public
void Notify(String charset) {
found
= true;
encoding
= charset;
}
});
BufferedInputStream
imp = new BufferedInputStream(new FileInputStream(
file));
byte[]
buf = new byte[1024];
int
len;
boolean done
= false;
boolean
isAscii = true;
while
((len = imp.read(buf, 0, buf.length)) != -1) {
if
(isAscii)
isAscii
= det.isAscii(buf, len);
if
(!isAscii && !done)
done
= det.DoIt(buf, len, false);
}
det.DataEnd();
if
(isAscii) {
encoding
= "ASCII";
found
= true;
}
if
(!found) {
String
prob[] = det.getProbableCharsets();
if
(prob.length > 0) {
encoding
= prob[0];
}
else {
return
null;
}
}
return
encoding;
}