1   
2   
3   
4   
5   
6   
7   
8   package org.dom4j.io;
9   
10  /***
11   * <p>
12   * <code>OutputFormat</code> represents the format configuration used by
13   * {@linkXMLWriter}and its base classes to format the XML output
14   * </p>
15   * 
16   * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
17   * @version $Revision: 1.17 $
18   */
19  public class OutputFormat implements Cloneable {
20      /*** standard value to indent by, if we are indenting */
21      protected static final String STANDARD_INDENT = "  ";
22  
23      /***
24       * Whether or not to suppress the XML declaration - default is
25       * <code>false</code>
26       */
27      private boolean suppressDeclaration = false;
28  
29      /***
30       * Whether or not to print new line after the XML declaration - default is
31       * <code>true</code>
32       */
33      private boolean newLineAfterDeclaration = true;
34  
35      /*** The encoding format */
36      private String encoding = "UTF-8";
37  
38      /***
39       * Whether or not to output the encoding in the XML declaration - default is
40       * <code>false</code>
41       */
42      private boolean omitEncoding = false;
43  
44      /*** The default indent is no spaces (as original document) */
45      private String indent = null;
46  
47      /***
48       * Whether or not to expand empty elements to
49       * <tagName></tagName> - default is <code>false</code>
50       */
51      private boolean expandEmptyElements = false;
52  
53      /***
54       * The default new line flag, set to do new lines only as in original
55       * document
56       */
57      private boolean newlines = false;
58  
59      /*** New line separator */
60      private String lineSeparator = "\n";
61  
62      /*** should we preserve whitespace or not in text nodes? */
63      private boolean trimText = false;
64  
65      /*** pad string-element boundaries with whitespace */
66      private boolean padText = false;
67  
68      /*** Whether or not to use XHTML standard. */
69      private boolean doXHTML = false;
70  
71      /***
72       * Controls when to output a line.separtor every so many tags in case of no
73       * lines and total text trimming.
74       */
75      private int newLineAfterNTags = 0; 
76  
77      /*** Quote character to use when writing attributes. */
78      private char attributeQuoteChar = '\"';
79  
80      /***
81       * Creates an <code>OutputFormat</code> with no additional whitespace
82       * (indent or new lines) added. The whitespace from the element text content
83       * is fully preserved.
84       */
85      public OutputFormat() {
86      }
87  
88      /***
89       * Creates an <code>OutputFormat</code> with the given indent added but no
90       * new lines added. All whitespace from element text will be included.
91       * 
92       * @param indent
93       *            is the indent string to be used for indentation (usually a
94       *            number of spaces).
95       */
96      public OutputFormat(String indent) {
97          this.indent = indent;
98      }
99  
100     /***
101      * Creates an <code>OutputFormat</code> with the given indent added with
102      * optional newlines between the Elements. All whitespace from element text
103      * will be included.
104      * 
105      * @param indent
106      *            is the indent string to be used for indentation (usually a
107      *            number of spaces).
108      * @param newlines
109      *            whether new lines are added to layout the
110      */
111     public OutputFormat(String indent, boolean newlines) {
112         this.indent = indent;
113         this.newlines = newlines;
114     }
115 
116     /***
117      * Creates an <code>OutputFormat</code> with the given indent added with
118      * optional newlines between the Elements and the given encoding format.
119      * 
120      * @param indent
121      *            is the indent string to be used for indentation (usually a
122      *            number of spaces).
123      * @param newlines
124      *            whether new lines are added to layout the
125      * @param encoding
126      *            is the text encoding to use for writing the XML
127      */
128     public OutputFormat(String indent, boolean newlines, String encoding) {
129         this.indent = indent;
130         this.newlines = newlines;
131         this.encoding = encoding;
132     }
133 
134     public String getLineSeparator() {
135         return lineSeparator;
136     }
137 
138     /***
139      * <p>
140      * This will set the new-line separator. The default is <code>\n</code>.
141      * Note that if the "newlines" property is false, this value is irrelevant.
142      * To make it output the system default line ending string, call
143      * <code>setLineSeparator(System.getProperty("line.separator"))</code>
144      * </p>
145      * 
146      * @param separator
147      *            <code>String</code> line separator to use.
148      * 
149      * @see #setNewlines(boolean)
150      */
151     public void setLineSeparator(String separator) {
152         lineSeparator = separator;
153     }
154 
155     public boolean isNewlines() {
156         return newlines;
157     }
158 
159     /***
160      * DOCUMENT ME!
161      * 
162      * @param newlines
163      *            <code>true</code> indicates new lines should be printed,
164      *            else new lines are ignored (compacted).
165      * 
166      * @see #setLineSeparator(String)
167      */
168     public void setNewlines(boolean newlines) {
169         this.newlines = newlines;
170     }
171 
172     public String getEncoding() {
173         return encoding;
174     }
175 
176     /***
177      * DOCUMENT ME!
178      * 
179      * @param encoding
180      *            encoding format
181      */
182     public void setEncoding(String encoding) {
183         if (encoding != null) {
184             this.encoding = encoding;
185         }
186     }
187 
188     public boolean isOmitEncoding() {
189         return omitEncoding;
190     }
191 
192     /***
193      * <p>
194      * This will set whether the XML declaration (<code><?xml version="1.0"
195      * encoding="UTF-8"?></code>)
196      * includes the encoding of the document. It is common to suppress this in
197      * protocols such as WML and SOAP.
198      * </p>
199      * 
200      * @param omitEncoding
201      *            <code>boolean</code> indicating whether or not the XML
202      *            declaration should indicate the document encoding.
203      */
204     public void setOmitEncoding(boolean omitEncoding) {
205         this.omitEncoding = omitEncoding;
206     }
207 
208     /***
209      * <p>
210      * This will set whether the XML declaration (<code><?xml version="1.0"
211      * encoding="UTF-8"?></code>)
212      * is included or not. It is common to suppress this in protocols such as
213      * WML and SOAP.
214      * </p>
215      * 
216      * @param suppressDeclaration
217      *            <code>boolean</code> indicating whether or not the XML
218      *            declaration should be suppressed.
219      */
220     public void setSuppressDeclaration(boolean suppressDeclaration) {
221         this.suppressDeclaration = suppressDeclaration;
222     }
223 
224     /***
225      * DOCUMENT ME!
226      * 
227      * @return true if the output of the XML declaration (<code><?xml
228      *         version="1.0"?></code>)
229      *         should be suppressed else false.
230      */
231     public boolean isSuppressDeclaration() {
232         return suppressDeclaration;
233     }
234 
235     /***
236      * <p>
237      * This will set whether a new line is printed after the XML declaration
238      * (assuming it is not supressed.)
239      * </p>
240      * 
241      * @param newLineAfterDeclaration
242      *            <code>boolean</code> indicating whether or not to print new
243      *            line following the XML declaration. The default is true.
244      */
245     public void setNewLineAfterDeclaration(boolean newLineAfterDeclaration) {
246         this.newLineAfterDeclaration = newLineAfterDeclaration;
247     }
248 
249     /***
250      * DOCUMENT ME!
251      * 
252      * @return true if a new line should be printed following XML declaration
253      */
254     public boolean isNewLineAfterDeclaration() {
255         return newLineAfterDeclaration;
256     }
257 
258     public boolean isExpandEmptyElements() {
259         return expandEmptyElements;
260     }
261 
262     /***
263      * <p>
264      * This will set whether empty elements are expanded from
265      * <code><tagName></code> to
266      * <code><tagName></tagName></code>.
267      * </p>
268      * 
269      * @param expandEmptyElements
270      *            <code>boolean</code> indicating whether or not empty
271      *            elements should be expanded.
272      */
273     public void setExpandEmptyElements(boolean expandEmptyElements) {
274         this.expandEmptyElements = expandEmptyElements;
275     }
276 
277     public boolean isTrimText() {
278         return trimText;
279     }
280 
281     /***
282      * <p>
283      * This will set whether the text is output verbatim (false) or with
284      * whitespace stripped as per <code>{@link
285      * org.dom4j.Element#getTextTrim()}</code>.
286      * </p>
287      * 
288      * <p>
289      * </p>
290      * 
291      * <p>
292      * Default: false
293      * </p>
294      * 
295      * @param trimText
296      *            <code>boolean</code> true=>trim the whitespace, false=>use
297      *            text verbatim
298      */
299     public void setTrimText(boolean trimText) {
300         this.trimText = trimText;
301     }
302 
303     public boolean isPadText() {
304         return padText;
305     }
306 
307     /***
308      * <p>
309      * Ensure that text immediately preceded by or followed by an element will
310      * be "padded" with a single space. This is used to allow make
311      * browser-friendly HTML, avoiding trimText's transformation of, e.g.,
312      * <code>The quick <b>brown</b> fox</code> into <code>The
313      * quick<b>brown</b>fox</code>
314      * (the latter will run the three separate words together into a single
315      * word). This setting is not too useful if you haven't also called
316      * {@link #setTrimText}.
317      * </p>
318      * 
319      * <p>
320      * The padding string will only be added if the text itself starts or ends
321      * with some whitespace characters.
322      * </p>
323      * 
324      * <p>
325      * Default: false
326      * </p>
327      * 
328      * @param padText
329      *            <code>boolean</code> if true, pad string-element boundaries
330      */
331     public void setPadText(boolean padText) {
332         this.padText = padText;
333     }
334 
335     public String getIndent() {
336         return indent;
337     }
338 
339     /***
340      * <p>
341      * This will set the indent <code>String</code> to use; this is usually a
342      * <code>String</code> of empty spaces. If you pass null, or the empty
343      * string (""), then no indentation will happen.
344      * </p>
345      * Default: none (null)
346      * 
347      * @param indent
348      *            <code>String</code> to use for indentation.
349      */
350     public void setIndent(String indent) {
351         
352         if ((indent != null) && (indent.length() <= 0)) {
353             indent = null;
354         }
355 
356         this.indent = indent;
357     }
358 
359     /***
360      * Set the indent on or off. If setting on, will use the value of
361      * STANDARD_INDENT, which is usually two spaces.
362      * 
363      * @param doIndent
364      *            if true, set indenting on; if false, set indenting off
365      */
366     public void setIndent(boolean doIndent) {
367         if (doIndent) {
368             this.indent = STANDARD_INDENT;
369         } else {
370             this.indent = null;
371         }
372     }
373 
374     /***
375      * <p>
376      * This will set the indent <code>String</code>'s size; an indentSize of
377      * 4 would result in the indention being equivalent to the
378      * <code>String</code> "    " (four space characters).
379      * </p>
380      * 
381      * @param indentSize
382      *            <code>int</code> number of spaces in indentation.
383      */
384     public void setIndentSize(int indentSize) {
385         StringBuffer indentBuffer = new StringBuffer();
386 
387         for (int i = 0; i < indentSize; i++) {
388             indentBuffer.append(" ");
389         }
390 
391         this.indent = indentBuffer.toString();
392     }
393 
394     /***
395      * <p>
396      * Whether or not to use the XHTML standard: like HTML but passes an XML
397      * parser with real, closed tags. Also, XHTML CDATA sections will be output
398      * with the CDATA delimiters: ( " <b><![CDATA[ </b>" and "
399      * <b>]]> </b>" ) otherwise, the class HTMLWriter will output the
400      * CDATA text, but not the delimiters.
401      * </p>
402      * 
403      * <p>
404      * Default is <code>false</code>
405      * </p>
406      * 
407      * @return DOCUMENT ME!
408      */
409     public boolean isXHTML() {
410         return doXHTML;
411     }
412 
413     /***
414      * <p>
415      * This will set whether or not to use the XHTML standard: like HTML but
416      * passes an XML parser with real, closed tags. Also, XHTML CDATA sections
417      * will be output with the CDATA delimiters: ( " <b><[CDATA[
418      * </b>" and " <b>]]< </b>) otherwise, the class HTMLWriter
419      * will output the CDATA text, but not the delimiters.
420      * </p>
421      * 
422      * <p>
423      * Default: false
424      * </p>
425      * 
426      * @param xhtml
427      *            <code>boolean</code> true=>conform to XHTML, false=>conform
428      *            to HTML, can have unclosed tags, etc.
429      */
430     public void setXHTML(boolean xhtml) {
431         doXHTML = xhtml;
432     }
433 
434     public int getNewLineAfterNTags() {
435         return newLineAfterNTags;
436     }
437 
438     /***
439      * Controls output of a line.separator every tagCount tags when isNewlines
440      * is false. If tagCount equals zero, it means don't do anything special. If
441      * greater than zero, then a line.separator will be output after tagCount
442      * tags have been output. Used when you would like to squeeze the html as
443      * much as possible, but some browsers don't like really long lines. A tag
444      * count of 10 would produce a line.separator in the output after 10 close
445      * tags (including single tags).
446      * 
447      * @param tagCount
448      *            DOCUMENT ME!
449      */
450     public void setNewLineAfterNTags(int tagCount) {
451         newLineAfterNTags = tagCount;
452     }
453 
454     public char getAttributeQuoteCharacter() {
455         return attributeQuoteChar;
456     }
457 
458     /***
459      * Sets the character used to quote attribute values. The specified
460      * character must be a valid XML attribute quote character, otherwise an
461      * <code>IllegalArgumentException</code> will be thrown.
462      * 
463      * @param quoteChar
464      *            The character to use when quoting attribute values.
465      * 
466      * @throws IllegalArgumentException
467      *             If the specified character is not a valid XML attribute quote
468      *             character.
469      */
470     public void setAttributeQuoteCharacter(char quoteChar) {
471         if ((quoteChar == '\'') || (quoteChar == '"')) {
472             attributeQuoteChar = quoteChar;
473         } else {
474             throw new IllegalArgumentException("Invalid attribute quote "
475                     + "character (" + quoteChar + ")");
476         }
477     }
478 
479     /***
480      * Parses command line arguments of the form <code>-omitEncoding
481      * -indentSize 3 -newlines -trimText</code>
482      * 
483      * @param args
484      *            is the array of command line arguments
485      * @param i
486      *            is the index in args to start parsing options
487      * 
488      * @return the index of first parameter that we didn't understand
489      */
490     public int parseOptions(String[] args, int i) {
491         for (int size = args.length; i < size; i++) {
492             if (args[i].equals("-suppressDeclaration")) {
493                 setSuppressDeclaration(true);
494             } else if (args[i].equals("-omitEncoding")) {
495                 setOmitEncoding(true);
496             } else if (args[i].equals("-indent")) {
497                 setIndent(args[++i]);
498             } else if (args[i].equals("-indentSize")) {
499                 setIndentSize(Integer.parseInt(args[++i]));
500             } else if (args[i].startsWith("-expandEmpty")) {
501                 setExpandEmptyElements(true);
502             } else if (args[i].equals("-encoding")) {
503                 setEncoding(args[++i]);
504             } else if (args[i].equals("-newlines")) {
505                 setNewlines(true);
506             } else if (args[i].equals("-lineSeparator")) {
507                 setLineSeparator(args[++i]);
508             } else if (args[i].equals("-trimText")) {
509                 setTrimText(true);
510             } else if (args[i].equals("-padText")) {
511                 setPadText(true);
512             } else if (args[i].startsWith("-xhtml")) {
513                 setXHTML(true);
514             } else {
515                 return i;
516             }
517         }
518 
519         return i;
520     }
521 
522     /***
523      * A static helper method to create the default pretty printing format. This
524      * format consists of an indent of 2 spaces, newlines after each element and
525      * all other whitespace trimmed, and XMTML is false.
526      * 
527      * @return DOCUMENT ME!
528      */
529     public static OutputFormat createPrettyPrint() {
530         OutputFormat format = new OutputFormat();
531         format.setIndentSize(2);
532         format.setNewlines(true);
533         format.setTrimText(true);
534         format.setPadText(true);
535 
536         return format;
537     }
538 
539     /***
540      * A static helper method to create the default compact format. This format
541      * does not have any indentation or newlines after an alement and all other
542      * whitespace trimmed
543      * 
544      * @return DOCUMENT ME!
545      */
546     public static OutputFormat createCompactFormat() {
547         OutputFormat format = new OutputFormat();
548         format.setIndent(false);
549         format.setNewlines(false);
550         format.setTrimText(true);
551 
552         return format;
553     }
554 }
555 
556 
557 
558 
559 
560 
561 
562 
563 
564 
565 
566 
567 
568 
569 
570 
571 
572 
573 
574 
575 
576 
577 
578 
579 
580 
581 
582 
583 
584 
585 
586 
587 
588 
589 
590 
591