java讀取UTF-8文本文件第一個字符多出一個問號解決方法

1.創建工具類

import java.io.*;

public class UnicodeReader extends Reader {
  PushbackInputStream internalIn;
  InputStreamReader   internalIn2 = null;
  String              defaultEnc;

  private static final int BOM_SIZE = 4;

  
  UnicodeReader(InputStream in, String defaultEnc) {
     internalIn = new PushbackInputStream(in, BOM_SIZE);
     this.defaultEnc = defaultEnc;
  }

  public String getDefaultEncoding() {
     return defaultEnc;
  }

  
  public String getEncoding() {
     if (internalIn2 == null) return null;
     return internalIn2.getEncoding();
  }

  
  protected void init() throws IOException {
     if (internalIn2 != null) return;

     String encoding;
     byte bom[] = new byte[BOM_SIZE];
     int n, unread;
     n = internalIn.read(bom, 0, bom.length);

     if ( (bom[0] == (byte)0x00) && (bom[1] == (byte)0x00) &&
                 (bom[2] == (byte)0xFE) && (bom[3] == (byte)0xFF) ) {
        encoding = "UTF-32BE";
        unread = n - 4;
     } else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) &&
                 (bom[2] == (byte)0x00) && (bom[3] == (byte)0x00) ) {
        encoding = "UTF-32LE";
        unread = n - 4;
     } else if (  (bom[0] == (byte)0xEF) && (bom[1] == (byte)0xBB) &&
           (bom[2] == (byte)0xBF) ) {
        encoding = "UTF-8";
        unread = n - 3;
     } else if ( (bom[0] == (byte)0xFE) && (bom[1] == (byte)0xFF) ) {
        encoding = "UTF-16BE";
        unread = n - 2;
     } else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) ) {
        encoding = "UTF-16LE";
        unread = n - 2;
     } else {
        // Unicode BOM mark not found, unread all bytes
        encoding = defaultEnc;
        unread = n;
     }    
     //System.out.println("read=" + n + ", unread=" + unread);

     if (unread > 0) internalIn.unread(bom, (n - unread), unread);

     // Use given encoding
     if (encoding == null) {
        internalIn2 = new InputStreamReader(internalIn);
     } else {
        internalIn2 = new InputStreamReader(internalIn, encoding);
     }
  }

  public void close() throws IOException {
     init();
     internalIn2.close();
  }

  public int read(char[] cbuf, int off, int len) throws IOException {
     init();
     return internalIn2.read(cbuf, off, len);
  }

}

2.使用工具類讀取文件

BufferedReader br = new BufferedReader(
     new UnicodeReader(
     new FileInputStream(sqlFile), 
     Charset.defaultCharset().name())); 



3.出現有問號的編寫

  1. File f  = new File("./utf.txt");  
  2.         FileInputStream in = new FileInputStream(f);  
  3.         // 指定讀取文件時以UTF-8的格式讀取  
  4.         BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));  
  5.           
  6.         String line = br.readLine();  
  7.         while(line != null)  
  8.         {  
  9.             System.out.println(line);  
  10.             line = br.readLine();  
  11.         }  

只需編寫工具類,將new InputStreamReader(in, "UTF-8")替換成

new UnicodeReader(new FileInputStream(sqlFile),Charset.defaultCharset().name())就可以解決該問題。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章