用MASM實現讀UCS-2文件

關於UCS-2編碼就不多說了,Google一下就大把大把的了

直接上代碼

;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
.386
.model  flatstdcall
option  casemap :none

include  windows.inc
include  kernel32.inc
include  user32.inc

includelib  kernel32.lib
includelib  msvcrt.lib
includelib  user32.lib

; 用到的C庫函數

; 當然,也可以用API

fopen                proto  C filename:dword , mode:dword
fread                proto  C buffer:dword , nSize:dword , count:dword , fp:dword
fclose               proto  C fp:dword
feof                 proto  C fp:dword
fseek                proto  C fp:dword , lOffset:dword , origin:dword
ftell                proto  C fp:dword
calloc                 proto  C num:dword , len:dword
free                 proto  C memblock:dword

.data

szTestFile        db  'test.txt', 0
szMode            db  'r', 0
szOpenErr         db  'Failed to open file', 0
szNotUCS2         db  'File not  encoding by UCS-2', 0

.code

; 對已打開的fp進行讀取並轉換成UTF-8,存放到buf

;len爲buf的長度,應預留一位作0填充

;fIsBe標記編碼是Big Endian 還是Little Endian

_ReadUCS2AsUTF8    proc     fp:dword , buf:dword , len:dword , fIsBe:byte
                   local    @word :word

        .if         !buf
                    ret
        .endif

        ;先將buf填0,這裏沒判斷len....
        invoke     RtlZeroMemory, buf, len

        .if         !fp
                    ret
        .endif
        mov         edi , buf
        
        .while     1

                    ;按字讀取
                    invoke     fread, addr  @word , sizeof word , 1, fp
                    mov         ax , @word
                    
                    .if         fIsBe == 1        ; big endian, 需交換低高字節
                                xchg     alah
                    .endif
                    
                    .if         ax  < 080h         ; ASCII
                                stosb
                                dec     len
                                
                                .if     len == 1
                                        .break
                                .endif
                                
                    .elseif     ax  < 0800h      
                                mov     bxax     
                                shr     ax , 6
                                or         ax , 0C0h
                                stosb
                                dec     len
                                
                                .if     len == 1
                                        .break
                                .endif

                                mov     axbx
                                and     ax , 03Fh
                                or         ax , 080h
                                stosb
                                
                                dec     len
                                .if     len == 1
                                        .break
                                .endif
                                
                    .elseif     ax  < 0FFFFh
                                mov     bxax
                                shr     ax , 12
                                or         ax , 0E0h
                                stosb
                                
                                .if     len == 1
                                        .break
                                .endif
                                
                                mov     axbx
                                shr     ax , 6
                                and     ax , 03Fh
                                or         ax , 080h
                                stosb
                                dec     len
                                .if     len == 1
                                        .break
                                .endif
                                
                                mov     axbx
                                and     ax , 03Fh
                                or         ax , 080h
                                stosb
                                dec     len
                                .if     len == 1
                                        .break
                                .endif
                    .endif
                    
                    invoke     feof, fp
                    .if         eax
                            .break
                    .endif
        .endw

        ret
_ReadUCS2AsUTF8    endp

_Test        proc
            local  @buffer
            local  @fp
            local  @len
            local  @mark[2]:byte
            local  @unicode

        invoke     fopen, addr  szTestFile, addr  szMode
        .if         !eax
                    invoke     MessageBox, 0, addr  szOpenErr, 0, 0
                    ret
        .endif
        
        mov         @fp, eax

        ; 取文件長度
        invoke     fseek, @fp, 0, SEEK_END
        invoke     ftell, @fp
        ; 這裏不作文件長度判斷
        mov         @len, eax
        ;
        invoke     fseek, @fp, 0, SEEK_SET
        invoke     calloc, @len, 1
        mov         @buffer, eax
        ; 讀取文件標誌

        ; 對UCS-2來說,是用兩字節標記的,其它UCS-4之類的類似

        ; 嚴格來說,這裏的判斷還不算嚴謹。。。

        invoke     fread, addr  @mark, 2, 1, @fp
        
        lea         esi , @mark
        cld
        lodsb
        .if         al  == 0FEh
                    lodsb
                    .if     al  == 0FFh   ;Big Endian
                            invoke     _ReadUCS2AsUTF8, @fp, @buffer, @len, 1
                    .endif
                    
        .elseif     al  == 0FFh
                    lodsb
                    .if     al  == 0FEh  ;Little Endian
                            invoke     _ReadUCS2AsUTF8, @fp, @buffer, @len, 0
                    .endif
        .else      ; 其它格式
                    invoke     MessageBox, 0, addr  szNotUCS2, 0, 0
                    jmp     _exit
        .endif                 
        ;轉換成ANSI
        invoke     MultiByteToWideChar, CP_UTF8, 0, @buffer, -1, 0, 0
        mov         @len, eax
        invoke     calloc, @len, 2
        mov         @unicode, eax
        invoke     MultiByteToWideChar, CP_UTF8, 0, @buffer, -1, @unicode, @len
        invoke         free, @buffer
        
        invoke     WideCharToMultiByte, CP_ACP, 0, @unicode, -1, 0, 0, 0, 0
        mov         @len, eax
        invoke     calloc, @len, 1
        mov         @buffer, eax
        invoke     WideCharToMultiByte, CP_ACP, 0, @unicode, -1, @buffer, @len, 0, 0
        invoke     MessageBox, 0, @buffer, 0, 0
_exit:        
        invoke     fclose, @fp
        invoke     free, @buffer
        
        ret
_Test        endp

start:
        call         _Test
        invoke     ExitProcess, 0
        
end     start

;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

 

挺簡單的一個測試程序,新建一個test.txt放到程序根目錄下即可

可用Notepad++轉換成其它編碼多測試幾下

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章