Java StAX解析XML

1DOM提供了一個易於使用的API,與SAXStAX相比,它的優勢在於支持XPath,不過,它也迫使將整個文檔讀入存儲器中,這對於小文檔來說沒什麼,但會影響大文檔的性能,而對於非常大的文檔來說,這是根本禁止的。

2SAX通過作爲一種“推”機制的解析器來處理該方面,也就是說,對於該解析器在文檔中遇到的每種結構,都會生成相應的事件,程序員可以選擇自己感興趣的事件進行處理,不足之處在於SAX通常生成的大量事件是程序員並不關係的。而且,SAX API不提供迭代文檔處理,從頭到尾摧毀整個事件。

 

3StAX方法解析XML

StAXStreaming API for XML,當前最有效的XML處理方法,因此特別適合於處理複雜流程,比如數據庫綁定和SOAP消息。StAX創建的信息集是非常小,可以直接作爲垃圾收集的候選對象。這讓XML處理任務佔用較小的空間,使得它不僅適用於小型堆設備,比如移動電話,而且適用於長期運行的服務器端應用程序。

SAX不同,StAX能夠對XML文檔進行寫操作,這減少了需要處理的API數量。

StAX提供兩種不同的解析數據模型:光標模型和迭代器模型。

Catalog.xml

<?xml version="1.0" encoding="UTF-8"?>
<catalog>
    <book sku="123_xaa">
        <title>King Lear</title>
        <author>William Shakespeare</author>
        <price>6.95</price>
        <category>classics</category>
    </book>
    <book sku="988_yty">
        <title>Hamlet</title>
        <author>William Shakespeare</author>
        <price>5.95</price>
        <category>classics</category>
    </book>
    <book sku="434_asd">
        <title>1984</title>
        <author>George Orwell</author>
        <price>12.95</price>
        <category>classics</category>
    </book>
    <book sku="876_pep">
        <title>Java Generics and Collections</title>
        <authors>
            <author>Maurice Naftalin</author>
            <author>Phillip Wadler</author>
        </authors>
        <price>34.99</price>
        <category>programming</category>
    </book>
</catalog>

 使用StAX光標模型:XMLStreamReader

import static java.lang.System.out;
import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.XMLEvent;
/**
 * StAX光標模型
 * @author K
 *
 */
public class StaxCursor {
	private static final String db = "/ch02/Catalog.xml";

	// we'll hold values here as we find them
	private Set<String> uniqueAuthors;

	public static void main(String... args) {
		StaxCursor p = new StaxCursor();
		p.find();
	}

	// constructor
	public StaxCursor() {
		uniqueAuthors = new TreeSet<String>();
	}

	// parse the document and offload work to helpers
	public void find() {
		XMLInputFactory xif = XMLInputFactory.newInstance();
		// forward-only, most efficient way to read
		XMLStreamReader reader = null;

		// get ahold of the file
		final InputStream is = StaxCursor.class.getResourceAsStream(db);

		// whether current event represents elem, attrib, etc
		int eventType;
		String current = "";

		try {
			// create the reader from the stream
			reader = xif.createXMLStreamReader(is);

			// work with stream and get the type of event
			// we're inspecting
			while (reader.hasNext()) {
				// because this is Cursor, we get an integer token to next event
				eventType = reader.next();

				// do different work depending on current event
				switch (eventType) {
				case XMLEvent.START_ELEMENT:
					// save element name for later
					current = reader.getName().toString();

					printSkus(current, reader);
					break;

				case XMLEvent.CHARACTERS:
					findAuthors(current, reader);
					break;
				}
			} // end loop
			out.println("Unique Authors=" + uniqueAuthors);

		} catch (XMLStreamException e) {
			out.println("Cannot parse: " + e);
		}
	}

	// get the name and value of the book's sku attribute
	private void printSkus(String current, XMLStreamReader r) {
		current = r.getName().toString();

		if ("book".equals(current)) {
			String k = r.getAttributeName(0).toString();
			String v = r.getAttributeValue(0);
			out.println("AttribName " + k + "=" + v);
		}
	}

	// inspect author elements and read their values.
	private void findAuthors(String current, XMLStreamReader r)
			throws XMLStreamException {

		if ("author".equals(current)) {
			String v = r.getText().trim();

			// can get whitespace value, so ignore
			if (v.length() > 0) {
				uniqueAuthors.add(v);
			}
		}
	}
}

 使用StAX迭代器模型:迭代器API比較靈活,而且易於擴展

import static java.lang.System.out;
import java.io.InputStream;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;

/**
 * StAX迭代器模型
 * @author K
 *
 */
public class StaxIterator {
	private static final String db = "/ch02/Catalog.xml";
	public static void main(String... args) {
		StaxIterator p = new StaxIterator();
		p.find();
	}
	public void find() {
		XMLInputFactory xif = XMLInputFactory.newInstance();
		// forward-only, most efficient way to read
		XMLEventReader reader = null;

		// get ahold of the file
		final InputStream is = StaxIterator.class.getResourceAsStream(db);

		try {
			// create the reader from the stream
			reader = xif.createXMLEventReader(is);

			// work with stream and get the type of event
			// we're inspecting
			while (reader.hasNext()) {
				XMLEvent e = reader.nextEvent();

				if (e.isStartElement()) {
					e = e.asStartElement().getAttributeByName(new QName("sku"));
					if (e != null) {
						out.println(e);
					}
				}
			} // end loop

		} catch (XMLStreamException e) {
			out.println("Cannot parse: " + e);
		}
	}
}

 使用StAX光標API編寫XML數據流

import static java.lang.System.out;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;

public class WriteStax {
	private static final String REPAIR_NS = "javax.xml.stream.isRepairingNamespaces";

	private static final String NS = "http://ns.example.com/books";

	public static void main(String... args) {
		XMLOutputFactory factory = XMLOutputFactory.newInstance();
		// autobox
		factory.setProperty(REPAIR_NS, true);

		try {
			// setup a destination file
			FileOutputStream fos = new FileOutputStream("result.xml");

			// create the writer
			final XMLStreamWriter xsw = factory.createXMLStreamWriter(fos);
			xsw.setDefaultNamespace(NS);

			// open the document. Can also add encoding, etc
			xsw.writeStartDocument("1.0");
			xsw.writeEndDocument();

			xsw.writeComment("Powered by StAX");

			// make enclosing book
			xsw.writeStartElement("book");
			xsw.writeNamespace("b", NS);
			xsw.writeAttribute("sku", "345_iui");

			// make title child element
			xsw.writeStartElement(NS, "title");
			xsw.writeCharacters("White Noise");
			xsw.writeEndElement(); // close title

			xsw.writeEndElement(); // close book

			// clean up
			xsw.flush();
			fos.close();
			xsw.close();

			out.print("All done.");
		} catch (FileNotFoundException fnfe) {
			fnfe.printStackTrace();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} catch (XMLStreamException xse) {
			xse.printStackTrace();
		}
	}
}

 API非常靈活,允許按照不同程度的規範化和合法性來編寫XML。可以快速、清晰底生成這樣的XML片段:適合於傳輸到SOAP主體的有效載荷中或其他任何希望粘貼某種標記的地方。

一般來說,在兩種模式中進行抉擇時,如果希望能夠修改事件流和採用更靈活的API,就選擇迭代器。如果希望得到更快的可行新能和更小的空間,就使用光標API

 

使用過濾器來提高應用程序的性能和清晰度,方法是指示解析器只提供我們所感性起的事件,使光標模式解析更有效率。實現StreamFilter接口的accept方法,然後使用它構造XMLStreamReader。當使用EventReader時,要做的所有事情就是實現EventFilter接口的accept方法。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章