123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608 |
- /*--
-
- $Id: Format.java,v 1.13 2007/11/10 05:29:01 jhunter Exp $
-
- Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions, and the disclaimer that follows
- these conditions in the documentation and/or other materials
- provided with the distribution.
-
- 3. The name "JDOM" must not be used to endorse or promote products
- derived from this software without prior written permission. For
- written permission, please contact <request_AT_jdom_DOT_org>.
-
- 4. Products derived from this software may not be called "JDOM", nor
- may "JDOM" appear in their name, without prior written permission
- from the JDOM Project Management <request_AT_jdom_DOT_org>.
-
- In addition, we request (but do not require) that you include in the
- end-user documentation provided with the redistribution and/or in the
- software itself an acknowledgement equivalent to the following:
- "This product includes software developed by the
- JDOM Project (http://www.jdom.org/)."
- Alternatively, the acknowledgment may be graphical using the logos
- available at http://www.jdom.org/images/logos.
-
- THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- SUCH DAMAGE.
-
- This software consists of voluntary contributions made by many
- individuals on behalf of the JDOM Project and was originally
- created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
- Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
- on the JDOM Project, please see <http://www.jdom.org/>.
-
- */
-
- package org.jdom.output;
-
- import java.lang.reflect.Method;
-
- /**
- * Class to encapsulate XMLOutputter format options.
- * Typical users can use the standard format configurations obtained by
- * {@link #getRawFormat} (no whitespace changes),
- * {@link #getPrettyFormat} (whitespace beautification), and
- * {@link #getCompactFormat} (whitespace normalization).
- * <p>
- * Several modes are available to effect the way textual content is printed.
- * See the documentation for {@link TextMode} for details.
- *
- * @version $Revision: 1.13 $, $Date: 2007/11/10 05:29:01 $
- * @author Jason Hunter
- */
- public class Format implements Cloneable {
-
- private static final String CVS_ID =
- "@(#) $RCSfile: Format.java,v $ $Revision: 1.13 $ $Date: 2007/11/10 05:29:01 $ $Name: jdom_1_1 $";
-
- /**
- * Returns a new Format object that performs no whitespace changes, uses
- * the UTF-8 encoding, doesn't expand empty elements, includes the
- * declaration and encoding, and uses the default entity escape strategy.
- * Tweaks can be made to the returned Format instance without affecting
- * other instances.
-
- * @return a Format with no whitespace changes
- */
- public static Format getRawFormat() {
- return new Format();
- }
-
- /**
- * Returns a new Format object that performs whitespace beautification with
- * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
- * includes the declaration and encoding, and uses the default entity
- * escape strategy.
- * Tweaks can be made to the returned Format instance without affecting
- * other instances.
- *
- * @return a Format with whitespace beautification
- */
- public static Format getPrettyFormat() {
- Format f = new Format();
- f.setIndent(STANDARD_INDENT);
- f.setTextMode(TextMode.TRIM);
- return f;
- }
-
- /**
- * Returns a new Format object that performs whitespace normalization, uses
- * the UTF-8 encoding, doesn't expand empty elements, includes the
- * declaration and encoding, and uses the default entity escape strategy.
- * Tweaks can be made to the returned Format instance without affecting
- * other instances.
- *
- * @return a Format with whitespace normalization
- */
- public static Format getCompactFormat() {
- Format f = new Format();
- f.setTextMode(TextMode.NORMALIZE);
- return f;
- }
-
- /** standard value to indent by, if we are indenting */
- private static final String STANDARD_INDENT = " ";
-
- /** standard string with which to end a line */
- private static final String STANDARD_LINE_SEPARATOR = "\r\n";
-
- /** standard encoding */
- private static final String STANDARD_ENCODING = "UTF-8";
-
-
- /** The default indent is no spaces (as original document) */
- String indent = null;
-
- /** New line separator */
- String lineSeparator = STANDARD_LINE_SEPARATOR;
-
- /** The encoding format */
- String encoding = STANDARD_ENCODING;
-
- /** Whether or not to output the XML declaration
- * - default is <code>false</code> */
- boolean omitDeclaration = false;
-
- /** Whether or not to output the encoding in the XML declaration
- * - default is <code>false</code> */
- boolean omitEncoding = false;
-
- /** Whether or not to expand empty elements to
- * <tagName></tagName> - default is <code>false</code> */
- boolean expandEmptyElements = false;
-
- /** Whether TrAX output escaping disabling/enabling PIs are ignored
- * or processed - default is <code>false</code> */
- boolean ignoreTrAXEscapingPIs = false;
-
- /** text handling mode */
- TextMode mode = TextMode.PRESERVE;
-
- /** entity escape logic */
- EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
-
- /**
- * Creates a new Format instance with default (raw) behavior.
- */
- private Format() { }
-
- /**
- * Sets the {@link EscapeStrategy} to use for character escaping.
- *
- * @param strategy the EscapeStrategy to use
- * @return a pointer to this Format for chaining
- */
- public Format setEscapeStrategy(EscapeStrategy strategy) {
- escapeStrategy = strategy;
- return this;
- }
-
- /**
- * Returns the current escape strategy
- *
- * @return the current escape strategy
- */
- public EscapeStrategy getEscapeStrategy() {
- return escapeStrategy;
- }
-
- /**
- * This will set the newline separator (<code>lineSeparator</code>).
- * The default is <code>\r\n</code>. To make it output
- * the system default line ending string, call
- * <code>setLineSeparator(System.getProperty("line.separator"))</code>.
- *
- * <p>
- * To output "UNIX-style" documents, call
- * <code>setLineSeparator("\n")</code>. To output "Mac-style"
- * documents, call <code>setLineSeparator("\r")</code>. DOS-style
- * documents use CR-LF ("\r\n"), which is the default.
- * </p>
- *
- * <p>
- * Note that this only applies to newlines generated by the
- * outputter. If you parse an XML document that contains newlines
- * embedded inside a text node, and you do not set TextMode.NORMALIZE,
- * then the newlines will be output
- * verbatim, as "\n" which is how parsers normalize them.
- * </p>
- *
- * <p>
- * If the format's "indent" property is null (as is the default
- * for the Raw and Compact formats), then this value only effects the
- * newlines written after the declaration and doctype.
- * </p>
- *
- * @see #setTextMode
- *
- * @param separator <code>String</code> line separator to use.
- * @return a pointer to this Format for chaining
- */
- public Format setLineSeparator(String separator) {
- this.lineSeparator = separator;
- return this;
- }
-
- /**
- * Returns the current line separator.
- *
- * @return the current line separator
- */
- public String getLineSeparator() {
- return lineSeparator;
- }
-
- /**
- * This will set whether the XML declaration
- * (<code><?xml version="1.0"
- * encoding="UTF-8"?></code>)
- * includes the encoding of the document. It is common to omit
- * this in uses such as WML and other wireless device protocols.
- *
- * @param omitEncoding <code>boolean</code> indicating whether or not
- * the XML declaration should indicate the document encoding.
- * @return a pointer to this Format for chaining
- */
- public Format setOmitEncoding(boolean omitEncoding) {
- this.omitEncoding = omitEncoding;
- return this;
- }
-
- /**
- * Returns whether the XML declaration encoding will be omitted.
- *
- * @return whether the XML declaration encoding will be omitted
- */
- public boolean getOmitEncoding() {
- return omitEncoding;
- }
-
- /**
- * This will set whether the XML declaration
- * (<code><?xml version="1.0"?gt;</code>)
- * will be omitted or not. It is common to omit this in uses such
- * as SOAP and XML-RPC calls.
- *
- * @param omitDeclaration <code>boolean</code> indicating whether or not
- * the XML declaration should be omitted.
- * @return a pointer to this Format for chaining
- */
- public Format setOmitDeclaration(boolean omitDeclaration) {
- this.omitDeclaration = omitDeclaration;
- return this;
- }
-
- /**
- * Returns whether the XML declaration will be omitted.
- *
- * @return whether the XML declaration will be omitted
- */
- public boolean getOmitDeclaration() {
- return omitDeclaration;
- }
-
- /**
- * This will set whether empty elements are expanded from
- * <code><tagName/></code> to
- * <code><tagName></tagName></code>.
- *
- * @param expandEmptyElements <code>boolean</code> indicating whether or not
- * empty elements should be expanded.
- * @return a pointer to this Format for chaining
- */
- public Format setExpandEmptyElements(boolean expandEmptyElements) {
- this.expandEmptyElements = expandEmptyElements;
- return this;
- }
-
- /**
- * Returns whether empty elements are expanded.
- *
- * @return whether empty elements are expanded
- */
- public boolean getExpandEmptyElements() {
- return expandEmptyElements;
- }
-
- /**
- * This will set whether JAXP TrAX processing instructions for
- * disabling/enabling output escaping are ignored. Disabling
- * output escaping allows using XML text as element content and
- * outputing it verbatim, i.e. as element children would be.
- * <p>
- * When processed, these processing instructions are removed from
- * the generated XML text and control whether the element text
- * content is output verbatim or with escaping of the pre-defined
- * entities in XML 1.0. The text to be output verbatim shall be
- * surrounded by the
- * <code><?javax.xml.transform.disable-output-escaping ?></code>
- * and <code><?javax.xml.transform.enable-output-escaping ?></code>
- * PIs.</p>
- * <p>
- * When ignored, the processing instructions are present in the
- * generated XML text and the pre-defined entities in XML 1.0 are
- * escaped.
- * <p>
- * Default: <code>false</code>.</p>
- *
- * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
- * whether or not TrAX ouput escaping PIs are ignored.
- *
- * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
- * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
- */
- public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
- this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
- }
-
- /**
- * Returns whether JAXP TrAX processing instructions for
- * disabling/enabling output escaping are ignored.
- *
- * @return whether or not TrAX ouput escaping PIs are ignored.
- */
- public boolean getIgnoreTrAXEscapingPIs() {
- return ignoreTrAXEscapingPIs;
- }
-
- /**
- * This sets the text output style. Options are available as static
- * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
- *
- * @return a pointer to this Format for chaining
- */
- public Format setTextMode(Format.TextMode mode) {
- this.mode = mode;
- return this;
- }
-
- /**
- * Returns the current text output style.
- *
- * @return the current text output style
- */
- public Format.TextMode getTextMode() {
- return mode;
- }
-
- /**
- * This will set the indent <code>String</code> to use; this
- * is usually a <code>String</code> of empty spaces. If you pass
- * the empty string (""), then no indentation will happen but newlines
- * will still be generated. Passing null will result in no indentation
- * and no newlines generated. Default: none (null)
- *
- * @param indent <code>String</code> to use for indentation.
- * @return a pointer to this Format for chaining
- */
- public Format setIndent(String indent) {
- this.indent = indent;
- return this;
- }
-
- /**
- * Returns the indent string in use.
- *
- * @return the indent string in use
- */
- public String getIndent() {
- return indent;
- }
-
- /**
- * Sets the output encoding. The name should be an accepted XML
- * encoding.
- *
- * @param encoding the encoding format. Use XML-style names like
- * "UTF-8" or "ISO-8859-1" or "US-ASCII"
- * @return a pointer to this Format for chaining
- */
- public Format setEncoding(String encoding) {
- this.encoding = encoding;
- escapeStrategy = new DefaultEscapeStrategy(encoding);
- return this;
- }
-
- /**
- * Returns the configured output encoding.
- *
- * @return the output encoding
- */
- public String getEncoding() {
- return encoding;
- }
-
- protected Object clone() {
- Format format = null;
-
- try {
- format = (Format) super.clone();
- }
- catch (CloneNotSupportedException ce) {
- }
-
- return format;
- }
-
-
- /**
- * Handle common charsets quickly and easily. Use reflection
- * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
- * If JDK 1.4 isn't around, default to no special encoding.
- */
- class DefaultEscapeStrategy implements EscapeStrategy {
- private int bits;
- Object encoder;
- Method canEncode;
-
- public DefaultEscapeStrategy(String encoding) {
- if ("UTF-8".equalsIgnoreCase(encoding) ||
- "UTF-16".equalsIgnoreCase(encoding)) {
- bits = 16;
- }
- else if ("ISO-8859-1".equalsIgnoreCase(encoding) ||
- "Latin1".equalsIgnoreCase(encoding)) {
- bits = 8;
- }
- else if ("US-ASCII".equalsIgnoreCase(encoding) ||
- "ASCII".equalsIgnoreCase(encoding)) {
- bits = 7;
- }
- else {
- bits = 0;
- //encoder = Charset.forName(encoding).newEncoder();
- try {
- Class charsetClass = Class.forName("java.nio.charset.Charset");
- Class encoderClass = Class.forName("java.nio.charset.CharsetEncoder");
- Method forName = charsetClass.getMethod("forName", new Class[]{String.class});
- Object charsetObj = forName.invoke(null, new Object[]{encoding});
- Method newEncoder = charsetClass.getMethod("newEncoder", null);
- encoder = newEncoder.invoke(charsetObj, null);
- canEncode = encoderClass.getMethod("canEncode", new Class[]{char.class});
- }
- catch (Exception ignored) {
- }
- }
- }
-
- public boolean shouldEscape(char ch) {
- if (bits == 16) {
- return false;
- }
- if (bits == 8) {
- if ((int) ch > 255)
- return true;
- else
- return false;
- }
- if (bits == 7) {
- if ((int) ch > 127)
- return true;
- else
- return false;
- }
- else {
- if (canEncode != null && encoder != null) {
- try {
- Boolean val = (Boolean) canEncode.invoke(encoder, new Object[]{new Character(ch)});
- return !val.booleanValue();
- }
- catch (Exception ignored) {
- }
- }
- // Return false if we don't know. This risks not escaping
- // things which should be escaped, but also means people won't
- // start getting loads of unnecessary escapes.
- return false;
- }
- }
- }
-
-
- /**
- * Class to signify how text should be handled on output. The following
- * table provides details.
- *
- * <table>
- * <tr>
- * <th align="left">
- * Text Mode
- * </th>
- * <th>
- * Resulting behavior.
- * </th>
- * </tr>
- *
- * <tr valign="top">
- * <td>
- * <i>PRESERVE (Default)</i>
- * </td>
- * <td>
- * All content is printed in the format it was created, no whitespace
- * or line separators are are added or removed.
- * </td>
- * </tr>
- *
- * <tr valign="top">
- * <td>
- * TRIM_FULL_WHITE
- * </td>
- * <td>
- * Content between tags consisting of all whitespace is not printed.
- * If the content contains even one non-whitespace character, it is
- * printed verbatim, whitespace and all.
- * </td>
- * </tr>
- *
- * <tr valign="top">
- * <td>
- * TRIM
- * </td>
- * <td>
- * Same as TrimAllWhite, plus leading/trailing whitespace are
- * trimmed.
- * </td>
- * </tr>
- *
- * <tr valign="top">
- * <td>
- * NORMALIZE
- * </td>
- * <td>
- * Same as TextTrim, plus addition interior whitespace is compressed
- * to a single space.
- * </td>
- * </tr>
- * </table>
- *
- * In most cases textual content is aligned with the surrounding tags
- * (after the appropriate text mode is applied). In the case where the only
- * content between the start and end tags is textual, the start tag, text,
- * and end tag are all printed on the same line. If the document being
- * output already has whitespace, it's wise to turn on TRIM mode so the
- * pre-existing whitespace can be trimmed before adding new whitespace.
- * <p>
- * When a element has a xml:space attribute with the value of "preserve",
- * all formating is turned off and reverts back to the default until the
- * element and its contents have been printed. If a nested element contains
- * another xml:space with the value "default" formatting is turned back on
- * for the child element and then off for the remainder of the parent
- * element.
- */
- public static class TextMode {
- /**
- * Mode for literal text preservation.
- */
- public static final TextMode PRESERVE = new TextMode("PRESERVE");
-
- /**
- * Mode for text trimming (left and right trim).
- */
- public static final TextMode TRIM = new TextMode("TRIM");
-
- /**
- * Mode for text normalization (left and right trim plus internal
- * whitespace is normalized to a single space.
- * @see org.jdom.Element#getTextNormalize
- */
- public static final TextMode NORMALIZE = new TextMode("NORMALIZE");
-
- /**
- * Mode for text trimming of content consisting of nothing but
- * whitespace but otherwise not changing output.
- */
- public static final TextMode TRIM_FULL_WHITE =
- new TextMode("TRIM_FULL_WHITE");
-
- private final String name;
-
- private TextMode(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
- }
- }
|