引言
最近有個項目需要解析xml 文件,獲取其中的節點內容, 小編選擇了一個編碼簡單又高效的dom4j來完成。
1、xml內容
<?xml version="1.0" encoding="UTF-8"?>
<RecognizeResult>
<Speech Uri="/Sub/2019-12-03.3/file/5149-15892322607-20191202141010-rJKTcXfpB_datang.wav" Duration="252840">
<ResultCode>0</ResultCode>
<Confidence>100</Confidence>
<Subject Name="RecognizeText">
<Role Name="R0">
<EndPoint Count="44">
<Item Begin="13340" End="13450">
<Text>喂。 </Text>
<Time>13340,13450 </Time>
</Item>
<Item Begin="15860" End="16240">
<Text>喂。 </Text>
<Time>15860,16240 </Time>
</Item>
</EndPoint>
</Role>
<Role Name="R1">
<EndPoint Count="35">
<Item Begin="17990" End="20080">
<Text>哎 喂 是 王 斌 先生 是嗎 啊! </Text>
<Time>17990,18100 18100,18340 18340,18550 18550,18940 18940,19120 19120,19510 19510,19820 19860,20080 </Time>
</Item>
<Item Begin="20630" End="21190">
<Text>對 是啊! </Text>
<Time>20630,20860 20860,21190 </Time>
</Item>
</EndPoint>
</Role>
</Subject>
</Speech>
</RecognizeResult>
需求是,將其中的漢子分角色(R0,R1)解析出來,並且拼接成字符串,然後發送給消息隊列。
2、引入jar包
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>
3、代碼實現
package com.zqf.zj.zhijian.service;
import com.zqf.common.utils.DateUtils;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author zhenghao
* @description: 解析xml
* @date 2019/12/318:33
*/
@Service
public class ParseXmlService {
private static Logger log = LoggerFactory.getLogger(ParseXmlService.class);
@Value("${base.file.path}")
private String baseFilePath;
//獲得文件 測試方法
public void xmlFile() {
//String toDayStartYMD = DateUtils.getToDayStartYMD();
String date = "2019-12";
for (int j = 1; j <= 4; j++) {
String toDayStartYMD = date + "-0" + j;
for (int i = 0; i < 24; i++) {
String filePath = baseFilePath + toDayStartYMD + "." + i + "/file/";
log.info("文件路徑" + filePath);
parseXml(filePath);
}
}
}
public void parseXml(String strFile) {
try {
long l = System.currentTimeMillis();
List<String> R0List = new ArrayList<>();
List<String> R1List = new ArrayList<>();
File file = new File(strFile);
String[] filePath = file.list();
if (filePath == null || filePath.length <= 0) {
return;
}
log.info("xml個數" + filePath.length);
for (String s : filePath) {
if (!s.contains(".xml")) {
continue;
}
String tempFilePath = strFile + s;
//1.創建Reader對象
SAXReader reader = new SAXReader();
//2.加載xml
Document document = reader.read(new File(tempFilePath));
//3.獲取根節點
Element rootElement = document.getRootElement();
StringBuilder sb = new StringBuilder();
//4、獲得指定子節點
Element speechElement = rootElement.element("Speech");
//5、獲得節點屬性
Attribute duration = speechElement.attribute("Duration");
String value = duration.getValue();
int telLength = Integer.valueOf(value) / 1000;
if (telLength <= 45) {
continue;
}
//默認返回第一節點
Element subjectElement = speechElement.element("Subject");
if (subjectElement == null) {
continue;
}
Iterator iterator3 = subjectElement.elementIterator();
while (iterator3.hasNext()) {
Element roleElement = (Element) iterator3.next();
Attribute name = roleElement.attribute("Name");
Element endPointElement = roleElement.element("EndPoint");
//獲得所有子節點
Iterator iterator1 = endPointElement.elementIterator();
while (iterator1.hasNext()) {
Element itemElement = (Element) iterator1.next();
Element textElement = itemElement.element("Text");
String stringValue = textElement.getStringValue();
if (name.getValue().equals("R0")) {
sb.append(stringValue);
R0List.add(stringValue);
} else {
R1List.add(stringValue);
}
}
}
}
System.out.println(System.currentTimeMillis() - l);
} catch (DocumentException e) {
e.printStackTrace();
}
}
4、多種解析xml方式對比,請參考下面文章