XML-可擴展標記語言 ,用於標記電子文件使其具有結構性的標記語言,可以用來標記數據、定義數據類型,是一種允許用戶對自己的標記語言進行定義的源語言,被廣泛的運用於數據傳輸和存儲。請編寫一段程序,不使用語言之外的開源庫,解析對應的XML文件,並格式化後在屏幕上打印出來。
舉個例子如下,當給定下述XML文件時:
<?xml version="1.0" ?>
<Books>
<Book>
<Name = “The C++ Programming Language” Author=”Bjarne Stroustrup” />
</Book>
<Book>
<Name = “Effective C++” Author = “Scott Meyers” />
</Book>
</Books>
它對應的輸出應該是:
Books
Book 1
Name:The C++ Programming Language
Author:Bjarne Stroustrup
Book 2
Name:Effective C++
Author:Scott Meyers
輸入:簡化的一段xml文件,用字符串表示,如下(屬性名字不包含引號和等號,也不包含大於小於等特殊字符,詳細規則見下面的答題說明)
string in = "<?xml version=\"1.0\" ?><Books><Book><Name = \"The C++ Programming Language\" Author=\"Bjarne Stroustrup\" /></Book><Book><Name = \"Effective C++\" Author = \"Scott Meyers\" /></Book></Books>";
輸出:對輸入的xml字符串解析,得到輸出如下:
string out = "Books\r\n\tBook 1\r\n\t\tName:The C++ Programming Language\r\n\t\tAuthor:Bjarne Stroustrup\r\n\tBook 2\r\n\t\tName:Effective C++\r\n\t\tAuthor:Scott Meyers";
我的結果:
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class XMLparse
{
public static String ParsingXML(String in)
{
in = removeHeader(in);
StringBuilder sb = new StringBuilder();
String level_1 = getLevelTag(in);
sb.append(level_1+"\r\n");
String level_1_Str = getXmlValue(in, level_1);
String level_2 = getLevelTag(level_1_Str);
List<String> level_2_list = getXmlList(in, level_2);
for(String level_2_Str : level_2_list){
sb.append("\t"+level_2+"\r\n");
sb.append(getAttrVal(level_2_Str));
}
return sb.toString();
}
private static String getLevelTag(String xml){
Pattern p = Pattern.compile("<.*?>", Pattern.DOTALL);
Matcher m = p.matcher(xml);
String retStr = null;
if(m.find()){
retStr = m.group();
}
return null != retStr ? retStr.replace("<", "").replace(">",""):"";
}
private static StringBuilder getAttrVal(String xml){
StringBuilder sb = new StringBuilder();
Pattern p = Pattern.compile("(\\w+)\\s*=\\s*\"([^\"]+)\"", Pattern.DOTALL);
Matcher m = p.matcher(xml);
String retStr = null;
while(m.find()){
retStr = m.group();
if(retStr.indexOf("=")>0){
sb.append("\t\t"+retStr.split("=")[0]+":"+retStr.split("=")[1]+"\r\n");
}
}
return sb;
}
private static String removeHeader(String in){
Pattern p = Pattern.compile("<\\?.*?xml.*? \\?>", Pattern.DOTALL);
Matcher m = p.matcher(in);
String retStr = null;
if(m.find()){
retStr = in.replace(m.group(), "");
}
return retStr;
}
private static String getXmlValue(String xml, String filePath) {
String rtnMsg = "";
if (null == xml || null == filePath || "".equals(xml) || "".equals(filePath)) {
return rtnMsg;
}
filePath = filePath.replace("\r\n", "");
filePath = filePath.replace("\t", "");
String[] paths = filePath.split("/");
String xmlRes = "(.*?)";
String path;
for (int i = paths.length - 1; i >= 0; i--) {
path = paths[i];
if (i == paths.length - 1) {
xmlRes = "<" + path.trim() + ">" + xmlRes + "</" + path.trim() + ">";
} else {
xmlRes = "<" + path.trim() + ">.*?" + xmlRes + ".*?</" + path.trim() + ">";
}
}
Pattern pattern = Pattern.compile(xmlRes, Pattern.DOTALL);
Matcher matcher = pattern.matcher(xml);
while (matcher.find()) {
rtnMsg = matcher.group(1);
return rtnMsg;
}
return rtnMsg;
}
private static List<String> getXmlList(String xml, String filePath) {
List<String> list = new ArrayList<String>();
if (null == xml || null == filePath || "".equals(xml) || "".equals(filePath)) {
return list;
}
filePath = filePath.replace("\r\n", "");
filePath = filePath.replace("\t", "");
String[] paths = filePath.split("/");
String xmlRes = "(.*?)";
String path;
for (int i = paths.length - 1; i >= 0; i--) {
path = paths[i];
if (i == paths.length - 1) {
xmlRes = "<" + path.trim() + ">" + xmlRes + "</" + path.trim() + ">";
} else {
xmlRes = "<" + path.trim() + ">.*?" + xmlRes + ".*?</" + path.trim() + ">";
}
}
Pattern pattern = Pattern.compile(xmlRes, Pattern.DOTALL);
Matcher matcher = pattern.matcher(xml);
while (matcher.find()) {
list.add(matcher.group(1));
}
return list;
}
//start 提示:自動閱卷起始唯一標識,請勿刪除或增加。
public static void main(String args[])
{
String in = "<?xml version=\"1.0\" ?><Books><Book><Name = \"The C++ Programming Language\" Author=\"Bjarne Stroustrup\" /></Book><Book><Name = \"Effective C++\" Author = \"Scott Meyers\" /></Book></Books>";
System.out.println(ParsingXML(in));
}
//end //提示:自動閱卷結束唯一標識,請勿刪除或增加。
}