1.ICUUC簡介
ICU4C是ICU在C/C++平臺下的版本, ICU(International Component for Unicode)是基於”IBM公共許可證”的,與開源組織合作研究的, 用於支持軟件國際化的開源項目。ICU4C提供了C/C++平臺強大的國際化開發能力,軟件開發者幾乎可以使用ICU4C解決任何國際化的問題,根據各地的風俗和語言習慣,實現對數字、貨幣、時間、日期、和消息的格式化、解析,對字符串進行大小寫轉換、整理、搜索和排序等功能,必須一提的是,ICU4C提供了強大的BIDI算法,對阿拉伯語等BIDI語言提供了完善的支持。
2.安裝
2.1 在http://www.icu-project.org/download/4.2.html下載ICU4C庫,我下載的是icu4c-49_1_2-src.tgz。
2.2 執行如下命令,安裝成功:
tar -zxvf icu4c-49_1_2-src.tgz
cd icu/source
./configure
make
make install
1
2
3
4
5
3.代碼
3.1 myicu.h
#ifndef MYICU_H
#define MYICU_H
#include “unicode/utypes.h”
#include “unicode/ucsdet.h”
#include “unicode/ucnv.h”
#include
#include
#include
#include
#include
using namespace std;
#define BUF_MAX 4096
class MyIcu{
public:
MyIcu(const char* filename);
bool detectTextEncoding();
bool convertoUtf8();
int convert(const char *toConverterName, const char fromConverterName,
char target, int32_t targetCapacity, const char source, int32_t sourceLength);
~MyIcu();
private:
const char m_filename;
FILE file;
char detected;
};
#endif //MYICU_H
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
3.2 myicu.cpp
#include “myicu.h”
const int BUFFSIZE=8192;
MyIcu::MyIcu(const char* filename):m_filename(filename){
}
MyIcu::~MyIcu(){
fclose(file);
delete [] detected;
}
bool MyIcu::detectTextEncoding(){
UCharsetDetector* csd;
const UCharsetMatch **csm;
UErrorCode status = U_ZERO_ERROR;
char buffer[BUFFSIZE];
int inputLength,match, matchCount = 0;
file = fopen(m_filename, “rb”);
if (file == NULL) {
cout<<“open file error”<<endl;
return 0;
}
inputLength = (int32_t) fread(buffer, 1, BUFFSIZE, file);
csd = ucsdet_open(&status);
ucsdet_setText(csd, buffer,inputLength, &status);
csm = ucsdet_detectAll(csd, &matchCount, &status);
if(csm == NULL){
ucsdet_close(csd);
return 0;
}
detected = new char[128];
#if 0
for(match = 0; match < matchCount; match += 1) {
const char *name = ucsdet_getName(csm[match], &status);
const char *lang = ucsdet_getLanguage(csm[match], &status);
int32_t confidence = ucsdet_getConfidence(csm[match], &status);
if (lang == NULL || strlen(lang) == 0) {
lang = "**";
}
cout<<name <<"("<<lang<<")"<<confidence<<endl;
}
#endif
if(matchCount > 0)
{
detected = strdup(ucsdet_getName(csm[0], &status)); //分配了內存, 需要釋放
if(status != U_ZERO_ERROR)
return false;
}
cout<<"charset = "<<detected<<endl;
ucsdet_close(csd);
return 1;
}
bool MyIcu::convertoUtf8(){
file = fopen(m_filename, “rb”);
if(file == NULL)
{
cout<<“open file error”<<endl;
return 0;
}
int len = 0;
//char *detected;
char *buffer = new char[BUF_MAX];
char *target = new char[BUF_MAX * 2];
while(true)
{
memset(buffer, 0, BUF_MAX);
memset(target, 0, BUF_MAX * 2);
len = (int32_t)fread(buffer, sizeof(char), BUF_MAX, file);
if(detected == NULL)
{
if(!detectTextEncoding()) //編碼探測
break;
}
//轉換爲utf8字符編碼
if(convert("UTF-8", detected, target, BUF_MAX * 2, (const char*)buffer, len) != U_ZERO_ERROR)
{
cout<<"ucnv_convert error"<<endl;
break;
}
cout<<target<<endl;//打印出轉換的文件的字符串
if(len < BUF_MAX)
break;
}
delete [] buffer;
delete [] target;
return 1;
}
int MyIcu::convert(const char *toConverterName, const char *fromConverterName,
char *target, int32_t targetCapacity, const char *source, int32_t sourceLength){
UErrorCode error = U_ZERO_ERROR;
ucnv_convert(toConverterName, fromConverterName, target, targetCapacity,
source, sourceLength, &error);
return error;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
3.3 main.cpp
#include “myicu.h”
#include
#include
#define BUF_MAX 4096
int main(){
const char* filename = “123.txt”;
MyIcu myicu(filename);
//char* buff = new char[126];
bool flag = myicu.detectTextEncoding();
if(!flag){
std::cout<<“解析錯誤!”<<endl;
}
bool flag2 = myicu.convertoUtf8();
if(!flag2){
std::cout<<“轉換錯誤!”<<endl;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
4編譯
g++ -o target main.cpp myicu.cpp -licuuc -licui18n
1
如果找不到icuuc和icui18n動態庫的話,執行如下命令:
vim /etc/ld.so.conf
1
將/usr/local/目錄加進去,然後再
ldconfig
1
就行了。
你們可以試下自己準備的文件。
參考文檔:
http://icu-project.org/apiref/icu4c/index.html
————————————————
版權聲明:本文爲CSDN博主「扮豬吃餃子」的原創文章,遵循 CC 4.0 BY-SA 版權協議,轉載請附上原文出處鏈接及本聲明。
原文鏈接:https://blog.csdn.net/weixin_28712713/article/details/77894404