1   
2   
3   
4   
5   
6   
7   
8   package org.dom4j.io;
9   
10  import java.io.InputStream;
11  import java.io.Reader;
12  import java.util.Iterator;
13  
14  import javax.xml.namespace.QName;
15  import javax.xml.stream.XMLEventReader;
16  import javax.xml.stream.XMLInputFactory;
17  import javax.xml.stream.XMLStreamConstants;
18  import javax.xml.stream.XMLStreamException;
19  import javax.xml.stream.events.Attribute;
20  import javax.xml.stream.events.Characters;
21  import javax.xml.stream.events.Comment;
22  import javax.xml.stream.events.EndElement;
23  import javax.xml.stream.events.EntityReference;
24  import javax.xml.stream.events.Namespace;
25  import javax.xml.stream.events.ProcessingInstruction;
26  import javax.xml.stream.events.StartDocument;
27  import javax.xml.stream.events.StartElement;
28  import javax.xml.stream.events.XMLEvent;
29  
30  import org.dom4j.CharacterData;
31  import org.dom4j.Document;
32  import org.dom4j.DocumentFactory;
33  import org.dom4j.Element;
34  import org.dom4j.Entity;
35  import org.dom4j.Node;
36  
37  /***
38   * Reads a DOM4J {@link Document}, as well as other {@link Node}s, from a StAX
39   * {@link XMLEventReader}.
40   * 
41   * @author Christian Niles
42   */
43  public class STAXEventReader {
44      /*** Reference to the DocumentFactory used to build DOM4J nodes. */
45      private DocumentFactory factory;
46  
47      /*** A StAX input factory, used to construct streams from IO streams. */
48      private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
49  
50      /***
51       * Constructs a default <code>STAXEventReader</code> instance with a
52       * default {@link DocumentFactory}.
53       */
54      public STAXEventReader() {
55          this.factory = DocumentFactory.getInstance();
56      }
57  
58      /***
59       * Constructs a <code>STAXEventReader</code> instance that uses the
60       * specified {@link DocumentFactory}to construct DOM4J {@link Node}s.
61       * 
62       * @param factory
63       *            The DocumentFactory to use when constructing DOM4J nodes, or
64       *            <code>null</code> if a default should be used.
65       */
66      public STAXEventReader(DocumentFactory factory) {
67          if (factory != null) {
68              this.factory = factory;
69          } else {
70              this.factory = DocumentFactory.getInstance();
71          }
72      }
73  
74      /***
75       * Sets the DocumentFactory to be used when constructing DOM4J nodes.
76       * 
77       * @param documentFactory
78       *            The DocumentFactory to use when constructing DOM4J nodes, or
79       *            <code>null</code> if a default should be used.
80       */
81      public void setDocumentFactory(DocumentFactory documentFactory) {
82          if (documentFactory != null) {
83              this.factory = documentFactory;
84          } else {
85              this.factory = DocumentFactory.getInstance();
86          }
87      }
88  
89      /***
90       * Constructs a StAX event stream from the provided I/O stream and reads a
91       * DOM4J document from it.
92       * 
93       * @param is
94       *            The I/O stream from which the Document will be read.
95       * 
96       * @return The Document that was read from the stream.
97       * 
98       * @throws XMLStreamException
99       *             If an error occurs reading content from the stream.
100      */
101     public Document readDocument(InputStream is) throws XMLStreamException {
102         return readDocument(is, null);
103     }
104 
105     /***
106      * Constructs a StAX event stream from the provided I/O character stream and
107      * reads a DOM4J document from it.
108      * 
109      * @param reader
110      *            The character stream from which the Document will be read.
111      * 
112      * @return The Document that was read from the stream.
113      * 
114      * @throws XMLStreamException
115      *             If an error occurs reading content from the stream.
116      */
117     public Document readDocument(Reader reader) throws XMLStreamException {
118         return readDocument(reader, null);
119     }
120 
121     /***
122      * Constructs a StAX event stream from the provided I/O stream and reads a
123      * DOM4J document from it.
124      * 
125      * @param is
126      *            The I/O stream from which the Document will be read.
127      * @param systemId
128      *            A system id used to resolve entities.
129      * 
130      * @return The Document that was read from the stream.
131      * 
132      * @throws XMLStreamException
133      *             If an error occurs reading content from the stream.
134      */
135     public Document readDocument(InputStream is, String systemId)
136             throws XMLStreamException {
137         XMLEventReader eventReader = inputFactory.createXMLEventReader(
138                 systemId, is);
139 
140         try {
141             return readDocument(eventReader);
142         } finally {
143             eventReader.close();
144         }
145     }
146 
147     /***
148      * Constructs a StAX event stream from the provided I/O character stream and
149      * reads a DOM4J document from it.
150      * 
151      * @param reader
152      *            The character stream from which the Document will be read.
153      * @param systemId
154      *            A system id used to resolve entities.
155      * 
156      * @return The Document that was read from the stream.
157      * 
158      * @throws XMLStreamException
159      *             If an error occurs reading content from the stream.
160      */
161     public Document readDocument(Reader reader, String systemId)
162             throws XMLStreamException {
163         XMLEventReader eventReader = inputFactory.createXMLEventReader(
164                 systemId, reader);
165 
166         try {
167             return readDocument(eventReader);
168         } finally {
169             eventReader.close();
170         }
171     }
172 
173     /***
174      * Reads a {@link Node}from the event stream. If the next event is a
175      * {@link StartElement}, all events until the closing {@link EndElement}
176      * will be read, and the resulting nodes will be added to the returned
177      * {@link Element}.
178      * 
179      * <p>
180      * <strong>Pre-Conditions </strong>: The stream must be positioned before an
181      * event other than an <code>EndElement</code>,<code>EndDocument</code>,
182      * or any DTD-related events, which are not currently supported.
183      * </p>
184      * 
185      * @param reader
186      *            The reader from which events will be read.
187      * 
188      * @return A DOM4J {@link Node}constructed from the read events.
189      * 
190      * @throws XMLStreamException
191      *             If an error occurs reading from the stream, or the stream was
192      *             positioned before an unsupported event.
193      */
194     public Node readNode(XMLEventReader reader) throws XMLStreamException {
195         XMLEvent event = reader.peek();
196 
197         if (event.isStartElement()) {
198             return readElement(reader);
199         } else if (event.isCharacters()) {
200             return readCharacters(reader);
201         } else if (event.isStartDocument()) {
202             return readDocument(reader);
203         } else if (event.isProcessingInstruction()) {
204             return readProcessingInstruction(reader);
205         } else if (event.isEntityReference()) {
206             return readEntityReference(reader);
207         } else if (event.isAttribute()) {
208             return readAttribute(reader);
209         } else if (event.isNamespace()) {
210             return readNamespace(reader);
211         } else {
212             throw new XMLStreamException("Unsupported event: " + event);
213         }
214     }
215 
216     /***
217      * Reads a DOM4J {@link Document}from the provided stream. The stream
218      * should be positioned at the start of a document, or before a {@link
219      * StartElement} event.
220      * 
221      * @param reader
222      *            The event stream from which to read the {@link Document}.
223      * 
224      * @return The {@link Document}that was read from the stream.
225      * 
226      * @throws XMLStreamException
227      *             If an error occurs reading events from the stream.
228      */
229     public Document readDocument(XMLEventReader reader)
230             throws XMLStreamException {
231         Document doc = null;
232 
233         while (reader.hasNext()) {
234             XMLEvent nextEvent = reader.peek();
235             int type = nextEvent.getEventType();
236 
237             switch (type) {
238                 case XMLStreamConstants.START_DOCUMENT:
239 
240                     StartDocument event = (StartDocument) reader.nextEvent();
241 
242                     if (doc == null) {
243                         
244                         if (event.encodingSet()) {
245                             String encodingScheme = event
246                                     .getCharacterEncodingScheme();
247                             doc = factory.createDocument(encodingScheme);
248                         } else {
249                             doc = factory.createDocument();
250                         }
251                     } else {
252                         
253                         String msg = "Unexpected StartDocument event";
254                         throw new XMLStreamException(msg, event.getLocation());
255                     }
256 
257                     break;
258 
259                 case XMLStreamConstants.END_DOCUMENT:
260                 case XMLStreamConstants.SPACE:
261                 case XMLStreamConstants.CHARACTERS:
262 
263                     
264                     reader.nextEvent();
265 
266                     break;
267 
268                 default:
269 
270                     if (doc == null) {
271                         
272                         doc = factory.createDocument();
273                     }
274 
275                     Node n = readNode(reader);
276                     doc.add(n);
277             }
278         }
279 
280         return doc;
281     }
282 
283     /***
284      * Reads a DOM4J Element from the provided event stream. The stream must be
285      * positioned before an {@link StartElement}event. In addition to the
286      * initial start event, all events up to and including the closing {@link
287      * EndElement} will be read, and included with the returned element.
288      * 
289      * @param eventReader
290      *            The event stream from which to read the Element.
291      * 
292      * @return The Element that was read from the stream.
293      * 
294      * @throws XMLStreamException
295      *             If an error occured reading events from the stream, or the
296      *             stream was not positioned before a {@linkStartElement}event.
297      */
298     public Element readElement(XMLEventReader eventReader)
299             throws XMLStreamException {
300         XMLEvent event = eventReader.peek();
301 
302         if (event.isStartElement()) {
303             
304             StartElement startTag = eventReader.nextEvent().asStartElement();
305             Element elem = createElement(startTag);
306 
307             
308             while (true) {
309                 if (!eventReader.hasNext()) {
310                     String msg = "Unexpected end of stream while reading"
311                             + " element content";
312                     throw new XMLStreamException(msg);
313                 }
314 
315                 XMLEvent nextEvent = eventReader.peek();
316 
317                 if (nextEvent.isEndElement()) {
318                     EndElement endElem = eventReader.nextEvent().asEndElement();
319 
320                     if (!endElem.getName().equals(startTag.getName())) {
321                         throw new XMLStreamException("Expected "
322                                 + startTag.getName() + " end-tag, but found"
323                                 + endElem.getName());
324                     }
325 
326                     break;
327                 }
328 
329                 Node child = readNode(eventReader);
330                 elem.add(child);
331             }
332 
333             return elem;
334         } else {
335             throw new XMLStreamException("Expected Element event, found: "
336                     + event);
337         }
338     }
339 
340     /***
341      * Constructs a DOM4J Attribute from the provided event stream. The stream
342      * must be positioned before an {@link Attribute}event.
343      * 
344      * @param reader
345      *            The event stream from which to read the Attribute.
346      * 
347      * @return The Attribute that was read from the stream.
348      * 
349      * @throws XMLStreamException
350      *             If an error occured reading events from the stream, or the
351      *             stream was not positioned before an {@linkAttribute}event.
352      */
353     public org.dom4j.Attribute readAttribute(XMLEventReader reader)
354             throws XMLStreamException {
355         XMLEvent event = reader.peek();
356 
357         if (event.isAttribute()) {
358             Attribute attr = (Attribute) reader.nextEvent();
359 
360             return createAttribute(null, attr);
361         } else {
362             throw new XMLStreamException("Expected Attribute event, found: "
363                     + event);
364         }
365     }
366 
367     /***
368      * Constructs a DOM4J Namespace from the provided event stream. The stream
369      * must be positioned before a {@link Namespace}event.
370      * 
371      * @param reader
372      *            The event stream from which to read the Namespace.
373      * 
374      * @return The Namespace that was read from the stream.
375      * 
376      * @throws XMLStreamException
377      *             If an error occured reading events from the stream, or the
378      *             stream was not positioned before a {@linkNamespace}event.
379      */
380     public org.dom4j.Namespace readNamespace(XMLEventReader reader)
381             throws XMLStreamException {
382         XMLEvent event = reader.peek();
383 
384         if (event.isNamespace()) {
385             Namespace ns = (Namespace) reader.nextEvent();
386 
387             return createNamespace(ns);
388         } else {
389             throw new XMLStreamException("Expected Namespace event, found: "
390                     + event);
391         }
392     }
393 
394     /***
395      * Constructs a DOM4J Text or CDATA section from the provided event stream.
396      * The stream must be positioned before a {@link Characters}event.
397      * 
398      * @param reader
399      *            The event stream from which to read the Text or CDATA.
400      * 
401      * @return The Text or CDATA that was read from the stream.
402      * 
403      * @throws XMLStreamException
404      *             If an error occured reading events from the stream, or the
405      *             stream was not positioned before a {@linkCharacters}event.
406      */
407     public CharacterData readCharacters(XMLEventReader reader)
408             throws XMLStreamException {
409         XMLEvent event = reader.peek();
410 
411         if (event.isCharacters()) {
412             Characters characters = reader.nextEvent().asCharacters();
413 
414             return createCharacterData(characters);
415         } else {
416             throw new XMLStreamException("Expected Characters event, found: "
417                     + event);
418         }
419     }
420 
421     /***
422      * Constructs a DOM4J Comment from the provided event stream. The stream
423      * must be positioned before a {@link Comment}event.
424      * 
425      * @param reader
426      *            The event stream from which to read the Comment.
427      * 
428      * @return The Comment that was read from the stream.
429      * 
430      * @throws XMLStreamException
431      *             If an error occured reading events from the stream, or the
432      *             stream was not positioned before a {@linkComment}event.
433      */
434     public org.dom4j.Comment readComment(XMLEventReader reader)
435             throws XMLStreamException {
436         XMLEvent event = reader.peek();
437 
438         if (event instanceof Comment) {
439             return createComment((Comment) reader.nextEvent());
440         } else {
441             throw new XMLStreamException("Expected Comment event, found: "
442                     + event);
443         }
444     }
445 
446     /***
447      * Constructs a DOM4J Entity from the provided event stream. The stream must
448      * be positioned before an {@link EntityReference}event.
449      * 
450      * @param reader
451      *            The event stream from which to read the {@link
452      *            EntityReference}.
453      * 
454      * @return The {@link org.dom4j.Entity}that was read from the stream.
455      * 
456      * @throws XMLStreamException
457      *             If an error occured reading events from the stream, or the
458      *             stream was not positioned before an {@linkEntityReference}
459      *             event.
460      */
461     public Entity readEntityReference(XMLEventReader reader)
462             throws XMLStreamException {
463         XMLEvent event = reader.peek();
464 
465         if (event.isEntityReference()) {
466             EntityReference entityRef = (EntityReference) reader.nextEvent();
467 
468             return createEntity(entityRef);
469         } else {
470             throw new XMLStreamException("Expected EntityRef event, found: "
471                     + event);
472         }
473     }
474 
475     /***
476      * Constructs a DOM4J ProcessingInstruction from the provided event stream.
477      * The stream must be positioned before a {@link ProcessingInstruction}
478      * event.
479      * 
480      * @param reader
481      *            The event stream from which to read the ProcessingInstruction.
482      * 
483      * @return The ProcessingInstruction that was read from the stream.
484      * 
485      * @throws XMLStreamException
486      *             If an error occured reading events from the stream, or the
487      *             stream was not positioned before a {@link
488      *             ProcessingInstruction} event.
489      */
490     public org.dom4j.ProcessingInstruction readProcessingInstruction(
491             XMLEventReader reader) throws XMLStreamException {
492         XMLEvent event = reader.peek();
493 
494         if (event.isProcessingInstruction()) {
495             ProcessingInstruction pi = (ProcessingInstruction) reader
496                     .nextEvent();
497 
498             return createProcessingInstruction(pi);
499         } else {
500             throw new XMLStreamException("Expected PI event, found: " + event);
501         }
502     }
503 
504     /***
505      * Constructs a new DOM4J Element from the provided StartElement event. All
506      * attributes and namespaces will be added to the returned element.
507      * 
508      * @param startEvent
509      *            The StartElement event from which to construct the new DOM4J
510      *            Element.
511      * 
512      * @return The Element constructed from the provided StartElement event.
513      */
514     public Element createElement(StartElement startEvent) {
515         QName qname = startEvent.getName();
516         org.dom4j.QName elemName = createQName(qname);
517 
518         Element elem = factory.createElement(elemName);
519 
520         
521         for (Iterator i = startEvent.getAttributes(); i.hasNext();) {
522             Attribute attr = (Attribute) i.next();
523             elem.addAttribute(createQName(attr.getName()), attr.getValue());
524         }
525 
526         
527         for (Iterator i = startEvent.getNamespaces(); i.hasNext();) {
528             Namespace ns = (Namespace) i.next();
529             elem.addNamespace(ns.getPrefix(), ns.getNamespaceURI());
530         }
531 
532         return elem;
533     }
534 
535     /***
536      * Constructs a new DOM4J Attribute from the provided StAX Attribute event.
537      * 
538      * @param elem
539      *            DOCUMENT ME!
540      * @param attr
541      *            The Attribute event from which to construct the new DOM4J
542      *            Attribute.
543      * 
544      * @return The Attribute constructed from the provided Attribute event.
545      */
546     public org.dom4j.Attribute createAttribute(Element elem, Attribute attr) {
547         return factory.createAttribute(elem, createQName(attr.getName()), attr
548                 .getValue());
549     }
550 
551     /***
552      * Constructs a new DOM4J Namespace from the provided StAX Namespace event.
553      * 
554      * @param ns
555      *            The Namespace event from which to construct the new DOM4J
556      *            Namespace.
557      * 
558      * @return The Namespace constructed from the provided Namespace event.
559      */
560     public org.dom4j.Namespace createNamespace(Namespace ns) {
561         return factory.createNamespace(ns.getPrefix(), ns.getNamespaceURI());
562     }
563 
564     /***
565      * Constructs a new DOM4J Text or CDATA object from the provided Characters
566      * event.
567      * 
568      * @param characters
569      *            The Characters event from which to construct the new DOM4J
570      *            Text or CDATA object.
571      * 
572      * @return The Text or CDATA object constructed from the provided Characters
573      *         event.
574      */
575     public CharacterData createCharacterData(Characters characters) {
576         String data = characters.getData();
577 
578         if (characters.isCData()) {
579             return factory.createCDATA(data);
580         } else {
581             return factory.createText(data);
582         }
583     }
584 
585     /***
586      * Constructs a new DOM4J Comment from the provided StAX Comment event.
587      * 
588      * @param comment
589      *            The Comment event from which to construct the new DOM4J
590      *            Comment.
591      * 
592      * @return The Comment constructed from the provided Comment event.
593      */
594     public org.dom4j.Comment createComment(Comment comment) {
595         return factory.createComment(comment.getText());
596     }
597 
598     /***
599      * Constructs a new DOM4J Entity from the provided StAX EntityReference
600      * event.
601      * 
602      * @param entityRef
603      *            The EntityReference event from which to construct the new
604      *            DOM4J Entity.
605      * 
606      * @return The Entity constructed from the provided EntityReference event.
607      */
608     public org.dom4j.Entity createEntity(EntityReference entityRef) {
609         return factory.createEntity(entityRef.getName(), entityRef
610                 .getDeclaration().getReplacementText());
611     }
612 
613     /***
614      * Constructs a new DOM4J ProcessingInstruction from the provided StAX
615      * ProcessingInstruction event.
616      * 
617      * @param pi
618      *            The ProcessingInstruction event from which to construct the
619      *            new DOM4J ProcessingInstruction.
620      * 
621      * @return The ProcessingInstruction constructed from the provided
622      *         ProcessingInstruction event.
623      */
624     public org.dom4j.ProcessingInstruction createProcessingInstruction(
625             ProcessingInstruction pi) {
626         return factory
627                 .createProcessingInstruction(pi.getTarget(), pi.getData());
628     }
629 
630     /***
631      * Constructs a new DOM4J QName from the provided JAXP QName.
632      * 
633      * @param qname
634      *            The JAXP QName from which to create a DOM4J QName.
635      * 
636      * @return The newly constructed DOM4J QName.
637      */
638     public org.dom4j.QName createQName(QName qname) {
639         return factory.createQName(qname.getLocalPart(), qname.getPrefix(),
640                 qname.getNamespaceURI());
641     }
642 }
643 
644 
645 
646 
647 
648 
649 
650 
651 
652 
653 
654 
655 
656 
657 
658 
659 
660 
661 
662 
663 
664 
665 
666 
667 
668 
669 
670 
671 
672 
673 
674 
675 
676 
677 
678 
679