libxml2使用指南

一、Libxml2介紹：

Libxml2 是一個xml的c語言版的解析器，本來是爲Gnome項目開發的工具，是一個基於MIT License的免費開源軟件。它除了支持c語言版以外，還支持c++、PHP、Pascal、Ruby、Tcl等語言的綁定，能在Windows、Linux、Solaris、MacOsX等平臺上運行。功能還是相當強大的，相信滿足一般用戶需求沒有任何問題。

二、Libxml2安裝：

一般如果在安裝系統的時候選中了所有開發庫和開發工具的話(Fedora Core系列下)，應該不用安裝，下面介紹一下手動安裝：
    1) 從xmlsoft站點或ftp(ftp.xmlsoft.org)站點下載libxml壓縮包(libxml2-xxxx.tar.gz)
    2) 對壓縮包進行解壓縮：tar xvzf libxml2-xxxx.tar.gz
    3) 進入解壓縮後的文件夾中運行：A）./configure B）make C）make install

4）編譯：$gcc test.c -o test -I /usr/local/include/libxml2/ -L /usr/local/lib -lxml2

安裝完成後就可以使用簡單的代碼解析XML文件，包括本地和遠程的文件，但是在編碼上有一些問題。Libxml默認只支持 UTF－8的編碼，無論輸入輸出都是UTF-8，所以如果你解析完一個XML得到的結果都是UTF－8的，如果需要輸出GB2312或者其它編碼，需要 ICONV來做轉碼(生成UTF－8編碼的文件也可以用它做),如果系統中沒有安裝iconv的話，需要安裝libiconv。

1) 下載libiconv壓縮包(例如libiconv-1.11.tar.gz)

2) 對壓縮包進行解壓縮：tar xvzf libiconv-1.11.tar.gz

3) 進入解壓縮後的文件夾中運行：A）./configure B）make C）make install
三、關於XML：

在開始研究 Libxml2 庫之前，先了解一下XML的相關基礎。XML 是一種基於文本的格式，它可用來創建能夠通過各種語言和平臺訪問的結構化數據。它包括一系列類似 HTML 的標記，並以樹型結構來對這些標記進行排列。

例如，可參見清單 1 中介紹的簡單文檔。爲了更清楚地顯示 XML 的一般概念，下面是一個簡化的XML文件。

清單 1. 一個簡單的 XML 文件

<?xml version="1.0" encoding="UTF-8"?>

<files>

<action>delete</action>

</files>

清單 1 中的第一行是 XML 聲明，它告訴負責處理 XML 的應用程序，即解析器，將要處理的 XML 的版本。大部分的文件使用版本 1.0 編寫，但也有少量的版本 1.1 的文件。它還定義了所使用的編碼。大部分文件使用 UTF-8，但是，XML 設計用來集成各種語言中的數據，包括那些不使用英語字母的語言。

接下來出現的是元素。一個元素以開始標記開始(如 <files>)，並以結束標記結束(如 </files>)，其中使用斜線 (/) 來區別於開始標記。元素是 Node 的一種類型。XML 文檔對象模型 (DOM) 定義了幾種不同的 Nodes 類型，包括:

Elements(如 files 或者 age)

Attributes(如 units)

Text(如 root 或者 10)

元素可以具有子節點。例如，age 元素有一個子元素，即文本節點 10。

XML 解析器可以利用這種父子結構來遍歷文檔，甚至修改文檔的結構或內容。LibXML2 是這樣的解析器中的其中一種，並且文中的示例應用程序正是使用這種結構來實現該目的。對於各種不同的環境，有許多不同的解析器和庫。LibXML2 是用於 UNIX 環境的解析器和庫中最好的一種，並且經過擴展，它提供了對幾種腳本語言的支持，如 Perl 和 Python。

三、使用Libxml2

項目中要實現一個管理XML文件的後臺程序，需要對XML文件進行創建，解析，修改，查找等操作，下面介紹如何利用libxml2提供的庫來實現上述功能。

1、創建XML文檔：

我們使用xmlNewDoc()來創建XML文檔，然後使用

xmlNewNode(),xmlNewChild(),xmlNewProp(),xmlNewText()等函數向XML文件中添加節點及子節點，設置元素和屬性，創建完畢後用xmlSaveFormatFileEnc()來保存XML文件到磁盤(該函數可以設置保存XML文件時的編碼格式)。

示例1：

#include <stdio.h>

#include <libxml/parser.h>

#include <libxml/tree.h>

int main(int argc, char **argv)

{

xmlDocPtr doc = NULL;

xmlNodePtr root_node = NULL, node = NULL, node1 = NULL;

// Creates a new document, a node and set it as a root node

doc = xmlNewDoc(BAD_CAST "1.0");

root_node = xmlNewNode(NULL, BAD_CAST "root");

xmlDocSetRootElement(doc, root_node);

//creates a new node, which is "attached" as child node of root_node node.

xmlNewChild(root_node, NULL, BAD_CAST "node1",BAD_CAST "content of node1");

// xmlNewProp() creates attributes, which is "attached" to an node.

node=xmlNewChild(root_node, NULL, BAD_CAST "node3", BAD_CAST"node has attributes");

xmlNewProp(node, BAD_CAST "attribute", BAD_CAST "yes");

//Here goes another way to create nodes.

node = xmlNewNode(NULL, BAD_CAST "node4");

node1 = xmlNewText(BAD_CAST"other way to create content");

xmlAddChild(node, node1);

xmlAddChild(root_node, node);

//Dumping document to stdio or file

xmlSaveFormatFileEnc(argc > 1 ? argv[1] : "-", doc, "UTF-8", 1);

xmlFreeDoc(doc);

xmlCleanupParser();

xmlMemoryDump();//debug memory for regression tests

return(0);

}

2、解析XML文檔

解析文檔時僅僅需要文件名並只調用一個函數，並有錯誤檢查，常用的相關函數有 xmlParseFile(),xmlParseDoc(),獲取文檔指針後，就可以使用xmlDocGetRootElement()來獲取根元素節點指針，利用該指針就可以在DOM樹裏漫遊了，結束後要調用xmlFreeDoc()釋放。

例2：

xmlDocPtr doc; //定義解析文檔指針

xmlNodePtr cur; //定義結點指針(你需要它爲了在各個結點間移動)

xmlChar *key;

doc = xmlReadFile(url, MY_ENCODING, 256); //解析文件

if (doc == NULL ) {

fprintf(stderr,"Document not parsed successfully. \n");

return;

}

cur = xmlDocGetRootElement(doc); //確定文檔根元素

if (cur == NULL) {

fprintf(stderr,"empty document\n");

xmlFreeDoc(doc);

return;

}

if (xmlStrcmp(cur->name, (const xmlChar *) "root")) {

fprintf(stderr,"document of the wrong type, root node != root");

xmlFreeDoc(doc);

return;

}

cur = cur->xmlChildrenNode;

while(cur!=NULL) {

if ((!xmlStrcmp(cur->name, (const xmlChar *)"keyword"))) {

key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);

printf("keyword: %s\n", key);

xmlFree(key);

}

cur = cur->next;

}

xmlFreeDoc(doc);

3、修改XML元素及屬性等信息

要修改XML文檔裏的元素及屬性等信息，先需要解析XML文檔，獲得一個節點指針(xmlNodePtr node),利用該節點指針漫遊DOM樹，就可以在XML文檔中獲取，修改，添加相關信息。

示例3：

得到一個節點的內容：

xmlChar *value = xmlNodeGetContent(node);

返回值value應該使用xmlFree(value)釋放內存

得到一個節點的某屬性值：

xmlChar *value = xmlGetProp(node, (const xmlChar *)"prop1");

返回值需要xmlFree(value)釋放內存

設置一個節點的內容：

xmlNodeSetContent(node, (const xmlChar *)"test");

設置一個節點的某屬性值：

xmlSetProp(node, (const xmlChar *)"prop1", (const xmlChar *)"v1");

添加一個節點元素：

xmlNewTextChild(node, NULL, (const xmlChar *)"keyword", (const xmlChar *)"test Element");

添加一個節點屬性：

xmlNewProp(node, (const xmlChar *)"prop1", (const xmlChar *)"test Prop");

4、查找XML節點

有時候對一個XML文檔我們可能只關心其中某一個或某幾個特定的Element的值或其屬性，如果漫遊DOM樹將是很痛苦也很無聊的事，利用XPath可以非常方便地得到你想的Element。下面是一個自定義函數：

示例4：

xmlXPathObjectPtr get_nodeset(xmlDocPtr doc, const xmlChar *xpath) {

xmlXPathContextPtr context;

xmlXPathObjectPtr result;

context = xmlXPathNewContext(doc);

if (context == NULL) {

printf("context is NULL\n");

return NULL;

}

result = xmlXPathexpression_r(xpath, context);

xmlXPathFreeContext(context);

if (result == NULL) {

printf("xmlXPathExpression return NULL\n");

return NULL;

}

if (xmlXPathNodeSetIsEmpty(result->nodesetval)) {

xmlXPathFreeObject(result);

printf("nodeset is empty\n");

return NULL;

}

return result;

}

在doc指向的XML文檔中查詢滿足xpath表達式條件的節點，返回滿足這一條件的節點集合查詢條件xpath的寫法參見xpath相關資料。在查詢完畢獲取結果集後，就可以通過返回的 xmlXPathObjectPtr 結構訪問該節點：

示例5：

xmlChar *xpath = ("/root/node/[@key='keyword']");

xmlXPathObjectPtr app_result = get_nodeset(doc,xpath);

if (app_result == NULL) {

printf("app_result is NULL\n");

return;

}

int i = 0;

xmlChar *value;

if(app_result) {

xmlNodeSetPtr nodeset = app_result->nodesetval;

for (i=0; i < nodeset->nodeNr; i++) {

cur = nodeset->nodeTab[i];

cur = cur->xmlChildrenNode;

while(cur!=NULL) {

value = xmlGetProp(cur,(const xmlChar *)"key");

if (value != NULL) {

printf("value: %s\n\n", d_ConvertCharset("utf-8", "GBK", (char *)value));

xmlFree(value);

}

value = xmlNodeGetContent(cur);

if (value != NULL) {

printf("value: %s\n\n", d_ConvertCharset("utf-8", "GBK", (char *)value));

xmlFree(value);

}}}

xmlXPathFreeObject (app_result);

}

通過get_nodeset()返回的結果集，我們可以獲取該節點的元素及屬性，也可以修改該節點的值。示例中在獲取值打印的時候用到 d_ConvertCharset()函數來改變編碼格式爲GBK，以方便正確讀取可能的中文字符。

5、編碼問題

由於Libxml一般以UTF-8格式保存和操縱數據，如果你的程序使用其它的數據格式，比如中文字符(GB2312，GBK編碼)，就必須使用Libxml函數轉換到UTF-8。如果你想你的程序以除UTF-8外的其它編碼方式輸出也必須做轉換。

下面的示例程序提供幾個函數來實現對數據編碼格式的轉換，其中有的要用到Libiconv，因此爲了確保他們能正常工作，先檢查以下系統中是否已經安裝libiconv庫。

示例6：

xmlChar *ConvertInput(const char *in, const char *encoding) {

unsigned char *out;

int ret;

int size;

int out_size;

int temp;

xmlCharEncodingHandlerPtr handler;

if (in == 0)

return 0;

handler = xmlFindCharEncodingHandler(encoding);

if (!handler) {

printf("ConvertInput: no encoding handler found for '%s'\n", encoding ? encoding : "");

return 0;

}

size = (int) strlen(in) + 1;

out_size = size * 2 - 1;

out = (unsigned char *) xmlMalloc((size_t) out_size);

if (out != 0) {

temp = size - 1;

ret = handler->input(out, &out_size, (const unsigned char *) in, &temp);

if ((ret < 0) || (temp - size + 1)) {

if (ret < 0) {

printf("ConvertInput: conversion wasn't successful.\n");

} else {

printf("ConvertInput:conversion wasn't successful. converted: %i octets.\n", temp);

}

xmlFree(out);

out = 0;

} else {

out = (unsigned char *) xmlRealloc(out, out_size + 1);

out[out_size] = 0;

}

} else {printf("ConvertInput: no mem\n");}

return out;

}

示例7：

char * Convert( char *encFrom, char *encTo, const char * in) {

static char bufin[1024], bufout[1024], *sin, *sout;

int mode, lenin, lenout, ret, nline;

iconv_t c_pt;

if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1) {

printf("iconv_open false: %s ==> %s\n", encFrom, encTo);

return NULL;

}

iconv(c_pt, NULL, NULL, NULL, NULL);

lenin = strlen(in) + 1;

lenout = 1024;

sin = (char *)in;

sout = bufout;

ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);

if (ret == -1) {

return NULL;

}

iconv_close(c_pt);

return bufout;

}

示例8：

char *d_ConvertCharset(char *cpEncodeFrom, char *cpEncodeTo, const char *cpInput) {

static char s_strBufOut[1024], *sin, *cpOut;

size_t iInputLen, iOutLen, iReturn;

iconv_t c_pt;

if ((c_pt = iconv_open(cpEncodeTo, cpEncodeFrom)) == (iconv_t)-1) {

printf("iconv_open failed!\n");

return NULL;

}

iconv(c_pt, NULL, NULL, NULL, NULL);

iInputLen = strlen(cpInput) + 1;

iOutLen = 1024;

sin = (char *)cpInput;

cpOut = s_strBufOut;

iReturn = iconv(c_pt, &sin, &iInputLen, &cpOut, &iOutLen);

if (iReturn == -1) {

return NULL;

}

iconv_close(c_pt);

return s_strBufOut;

}

<?xml version="1.0" encoding="UTF-8"?>

<root>

<node1>content of node 1</node1>

<node3 attribute="yes">node has attributes</node3>

<node4>other way to create content</nod4>

</root>)

遍歷程序代碼

#include <iostream>

#include <libxml/parser.h>

#include <libxml/tree.h>

using namespace std;

int main(int argc,char** argv)

{

xmlDocPtr doc=NULL;

xmlNodePtr cur=NULL;

char* name=NULL;

char* value=NULL;

xmlKeepBlanksDefault (0);

if(argc<2)

{

cout<<"argc must be 2 or above."<<endl;

return -1;

}

doc=xmlParseFile(argv[1]);//創建Dom樹

if(doc==NULL)

{

cout<<"Loading xml file failed."<<endl;

exit(1);

}

cur=xmlDocGetRootElement(doc);//獲取根節點

if(cur==NULL)

{

cout<<"empty file"<<endl;

xmlFreeDoc(doc);

exit(2);

}

//walk the tree

cur=cur->xmlChildrenNode;//get sub node

while(cur !=NULL)

{

name=(char*)(cur->name);

value=(char*)xmlNodeGetContent(cur);

cout<<"name is: "<<name<<", value is: "<<value<<endl;

xmlFree(value);

cur=cur->next;

}

xmlFreeDoc(doc);//釋放xml解析庫所用資源

xmlCleanupParser();

return 0;

}

說明：

1. 當使用dom樹來解析xml文檔時，由於默認的方式是把節點間的空白當作第一個子節點，所以

爲了能和常說的第一個子節點相符，需調用xmlKeepBlanksDefault (0)函數來忽略這種空白。

2. 對於使用xmlChar* xmlNodeGetContent(xmlNodePtr cur)函數獲取節點內容後，必須調用xmlFree()來對所分配的內存進行釋放。

使用Xpath獲取特定結點的內容(使用的xml文檔見上面)：

#include <iostream>

#include <string>

using namespace std;

#include <libxml/tree.h>

#include <libxml/parser.h>

#include <libxml/xpath.h>

#include <libxml/xpathInternals.h>

int main(int argc,char** argv)

{

xmlDocPtr doc;

xmlXPathContextPtr xpathCtx;

xmlXPathObjectPtr xpathObj;

xmlNodeSetPtr nodeset;

string xpathExpr;

char* val=NULL;

int size,i;

if(argc<2)

{

cout<<"argc must be 2 or above."<<endl;

return -1;

}

/* Load XML document */

doc = xmlParseFile(argv[1]);

if (doc == NULL)

{

cout<<"Error: unable to parse file: "<<argv[1]<<endl;

return -1;

}

/* Create xpath evaluation context */

xpathCtx = xmlXPathNewContext(doc);

if(xpathCtx == NULL)

{

cout<<"Error: unable to create new XPath context"<<endl;

xmlXPathFreeContext(xpathCtx);

xmlFreeDoc(doc);

return -2;

}

xpathExpr="/root/node3";

/* Evaluate xpath expression */

xpathObj = xmlXPathEvalExpression((const xmlChar*)(xpathExpr.c_str()), xpathCtx);

if(xpathObj == NULL)

{

cout<<"Error: unable to evaluate xpath expression"<<xpathExpr<<endl;

xmlXPathFreeContext(xpathCtx);

xmlFreeDoc(doc);

return -3;

}

/* get values of the selected nodes */

nodeset=xpathObj->nodesetval;

if(xmlXPathNodeSetIsEmpty(nodeset))

{

cout<<"No such nodes."<<endl;

xmlXPathFreeObject(xpathObj);

xmlXPathFreeContext(xpathCtx);

xmlFreeDoc(doc);

return -4;

}

//get the value

size = (nodeset) ? nodeset->nodeNr : 0;

for(i = 0; i <size; i++)

{

val=(char*)xmlNodeListGetString(doc,nodeset->nodeTab[i]->xmlChildrenNode,1);

cout<<"the results are: "<<val<<endl;

xmlFree(val);

}

//Cleanup of XPath data

xmlXPathFreeObject(xpathObj);

xmlXPathFreeContext(xpathCtx);

/* free the document */

xmlFreeDoc(doc);

xmlCleanupParser();

return 0;

}

u010437105

發佈了7 篇原創文章 · 獲贊 7 · 訪問量 4萬+

私信關注

libxml2使用指南

Android啓動過程-萬字長文(Android14)

這種嵌套字典類型的數據，我想把它讀取到df裏，如何操作？

微調真的能讓LLM學到新東西嗎:引入新知識可能讓模型產生更多的幻覺

iNeuOS工業互聯網操作系統，增加電力IEC104協議

微服務實踐k8s&dapr開發部署實驗（3）訂閱發佈

kbgressdb之數據結構V0.2

驅動工程師面試題彙編

Vim的IDE配置（詳細教程）

Android安全分析挑戰：運行時篡改Dalvik字節碼 .

Git的Patch功能

Dalvik opcodes

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結