123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047 |
- /*--
-
- $Id: SAXBuilder.java,v 1.92 2007/11/10 05:29:00 jhunter Exp $
-
- Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions, and the disclaimer that follows
- these conditions in the documentation and/or other materials
- provided with the distribution.
-
- 3. The name "JDOM" must not be used to endorse or promote products
- derived from this software without prior written permission. For
- written permission, please contact <request_AT_jdom_DOT_org>.
-
- 4. Products derived from this software may not be called "JDOM", nor
- may "JDOM" appear in their name, without prior written permission
- from the JDOM Project Management <request_AT_jdom_DOT_org>.
-
- In addition, we request (but do not require) that you include in the
- end-user documentation provided with the redistribution and/or in the
- software itself an acknowledgement equivalent to the following:
- "This product includes software developed by the
- JDOM Project (http://www.jdom.org/)."
- Alternatively, the acknowledgment may be graphical using the logos
- available at http://www.jdom.org/images/logos.
-
- THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- SUCH DAMAGE.
-
- This software consists of voluntary contributions made by many
- individuals on behalf of the JDOM Project and was originally
- created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
- Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
- on the JDOM Project, please see <http://www.jdom.org/>.
-
- */
-
- package org.jdom.input;
-
- import java.io.*;
- import java.lang.reflect.*;
- import java.net.*;
- import java.util.*;
-
- import org.jdom.*;
-
- import org.xml.sax.*;
- import org.xml.sax.helpers.XMLReaderFactory;
-
- /**
- * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
- * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
- * third-party SAX parser (chosen by JAXP by default, or you can choose
- * manually) to handle the parsing duties and simply listens to the SAX events
- * to construct a document. Details which SAX does not provide, such as
- * whitespace outside the root element, are not represented in the JDOM
- * document. Information about SAX can be found at <a
- * href="http://www.saxproject.org">http://www.saxproject.org</a>.
- * <p>
- * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
- * be converted by the SAX parser into absolute paths.
- *
- * @version $Revision: 1.92 $, $Date: 2007/11/10 05:29:00 $
- * @author Jason Hunter
- * @author Brett McLaughlin
- * @author Dan Schaffer
- * @author Philip Nelson
- * @author Alex Rosen
- */
- public class SAXBuilder {
-
- private static final String CVS_ID =
- "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.92 $ $Date: 2007/11/10 05:29:00 $ $Name: jdom_1_1 $";
-
- /**
- * Default parser class to use. This is used when no other parser
- * is given and JAXP isn't available.
- */
- private static final String DEFAULT_SAX_DRIVER =
- "org.apache.xerces.parsers.SAXParser";
-
- /** Whether validation should occur */
- private boolean validate;
-
- /** Whether expansion of entities should occur */
- private boolean expand = true;
-
- /** Adapter class to use */
- private String saxDriverClass;
-
- /** ErrorHandler class to use */
- private ErrorHandler saxErrorHandler = null;
-
- /** EntityResolver class to use */
- private EntityResolver saxEntityResolver = null;
-
- /** DTDHandler class to use */
- private DTDHandler saxDTDHandler = null;
-
- /** XMLFilter instance to use */
- private XMLFilter saxXMLFilter = null;
-
- /** The factory for creating new JDOM objects */
- private JDOMFactory factory = new DefaultJDOMFactory();
-
- /** Whether to ignore ignorable whitespace */
- private boolean ignoringWhite = false;
-
- /** Whether to ignore all whitespace content */
- private boolean ignoringBoundaryWhite = false;
-
- /** User-specified features to be set on the SAX parser */
- private HashMap features = new HashMap(5);
-
- /** User-specified properties to be set on the SAX parser */
- private HashMap properties = new HashMap(5);
-
- /**
- * Whether parser reuse is allowed.
- * <p>Default: <code>true</code></p>
- */
- private boolean reuseParser = true;
-
- /** The current SAX parser, if parser reuse has been activated. */
- private XMLReader saxParser = null;
-
- /**
- * Creates a new SAXBuilder which will attempt to first locate
- * a parser via JAXP, then will try to use a set of default
- * SAX Drivers. The underlying parser will not validate.
- */
- public SAXBuilder() {
- this(false);
- }
-
- /**
- * Creates a new SAXBuilder which will attempt to first locate
- * a parser via JAXP, then will try to use a set of default
- * SAX Drivers. The underlying parser will validate or not
- * according to the given parameter.
- *
- * @param validate <code>boolean</code> indicating if
- * validation should occur.
- */
- public SAXBuilder(boolean validate) {
- this.validate = validate;
- }
-
- /**
- * Creates a new SAXBuilder using the specified SAX parser.
- * The underlying parser will not validate.
- *
- * @param saxDriverClass <code>String</code> name of SAX Driver
- * to use for parsing.
- */
- public SAXBuilder(String saxDriverClass) {
- this(saxDriverClass, false);
- }
-
- /**
- * Creates a new SAXBuilder using the specified SAX parser.
- * The underlying parser will validate or not
- * according to the given parameter.
- *
- * @param saxDriverClass <code>String</code> name of SAX Driver
- * to use for parsing.
- * @param validate <code>boolean</code> indicating if
- * validation should occur.
- */
- public SAXBuilder(String saxDriverClass, boolean validate) {
- this.saxDriverClass = saxDriverClass;
- this.validate = validate;
- }
-
- /**
- * Returns the driver class assigned in the constructor, or null if none.
- *
- * @return the driver class assigned in the constructor
- */
- public String getDriverClass() {
- return saxDriverClass;
- }
-
- /**
- * Returns the current {@link org.jdom.JDOMFactory} in use.
- * @return the factory in use
- */
- public JDOMFactory getFactory() {
- return factory;
- }
-
- /**
- * This sets a custom JDOMFactory for the builder. Use this to build
- * the tree with your own subclasses of the JDOM classes.
- *
- * @param factory <code>JDOMFactory</code> to use
- */
- public void setFactory(JDOMFactory factory) {
- this.factory = factory;
- }
-
- /**
- * Returns whether validation is to be performed during the build.
- *
- * @return whether validation is to be performed during the build
- */
- public boolean getValidation() {
- return validate;
- }
-
- /**
- * This sets validation for the builder.
- *
- * @param validate <code>boolean</code> indicating whether validation
- * should occur.
- */
- public void setValidation(boolean validate) {
- this.validate = validate;
- }
-
- /**
- * Returns the {@link ErrorHandler} assigned, or null if none.
- * @return the ErrorHandler assigned, or null if none
- */
- public ErrorHandler getErrorHandler() {
- return saxErrorHandler;
- }
-
- /**
- * This sets custom ErrorHandler for the <code>Builder</code>.
- *
- * @param errorHandler <code>ErrorHandler</code>
- */
- public void setErrorHandler(ErrorHandler errorHandler) {
- saxErrorHandler = errorHandler;
- }
-
- /**
- * Returns the {@link EntityResolver} assigned, or null if none.
- *
- * @return the EntityResolver assigned
- */
- public EntityResolver getEntityResolver() {
- return saxEntityResolver;
- }
-
- /**
- * This sets custom EntityResolver for the <code>Builder</code>.
- *
- * @param entityResolver <code>EntityResolver</code>
- */
- public void setEntityResolver(EntityResolver entityResolver) {
- saxEntityResolver = entityResolver;
- }
-
- /**
- * Returns the {@link DTDHandler} assigned, or null if none.
- *
- * @return the DTDHandler assigned
- */
- public DTDHandler getDTDHandler() {
- return saxDTDHandler;
- }
-
- /**
- * This sets custom DTDHandler for the <code>Builder</code>.
- *
- * @param dtdHandler <code>DTDHandler</code>
- */
- public void setDTDHandler(DTDHandler dtdHandler) {
- saxDTDHandler = dtdHandler;
- }
-
- /**
- * Returns the {@link XMLFilter} used during parsing, or null if none.
- *
- * @return the XMLFilter used during parsing
- */
- public XMLFilter getXMLFilter() {
- return saxXMLFilter;
- }
-
- /**
- * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
- *
- * @param xmlFilter the filter to use
- */
- public void setXMLFilter(XMLFilter xmlFilter) {
- saxXMLFilter = xmlFilter;
- }
-
- /**
- * Returns whether element content whitespace is to be ignored during the
- * build.
- *
- * @return whether element content whitespace is to be ignored during the
- * build
- */
- public boolean getIgnoringElementContentWhitespace() {
- return ignoringWhite;
- }
-
- /**
- * Specifies whether or not the parser should elminate whitespace in
- * element content (sometimes known as "ignorable whitespace") when
- * building the document. Only whitespace which is contained within
- * element content that has an element only content model will be
- * eliminated (see XML Rec 3.2.1). For this setting to take effect
- * requires that validation be turned on. The default value of this
- * setting is <code>false</code>.
- *
- * @param ignoringWhite Whether to ignore ignorable whitespace
- */
- public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
- this.ignoringWhite = ignoringWhite;
- }
-
- /**
- * Returns whether or not the parser will elminate element content
- * containing only whitespace.
- *
- * @return <code>boolean</code> - whether only whitespace content will
- * be ignored during build.
- *
- * @see #setIgnoringBoundaryWhitespace
- */
- public boolean getIgnoringBoundaryWhitespace() {
- return ignoringBoundaryWhite;
- }
-
- /**
- * Specifies whether or not the parser should elminate boundary whitespace,
- * a term that indicates whitespace-only text between element tags. This
- * feature is a lot like {@link #setIgnoringElementContentWhitespace(boolean)}
- * but this feature is more aggressive and doesn't require validation be
- * turned on. The {@link #setIgnoringElementContentWhitespace(boolean)}
- * call impacts the SAX parse process while this method impacts the JDOM
- * build process, so it can be beneficial to turn both on for efficiency.
- * For implementation efficiency, this method actually removes all
- * whitespace-only text() nodes. That can, in some cases (like beteween an
- * element tag and a comment), include whitespace that isn't just boundary
- * whitespace. The default is <code>false</code>.
- *
- * @param ignoringBoundaryWhite Whether to ignore whitespace-only text
- * noes
- */
- public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) {
- this.ignoringBoundaryWhite = ignoringBoundaryWhite;
- }
-
- /**
- * Returns whether the contained SAX parser instance is reused across
- * multiple parses. The default is true.
- *
- * @return whether the contained SAX parser instance is reused across
- * multiple parses
- */
- public boolean getReuseParser() {
- return reuseParser;
- }
-
- /**
- * Specifies whether this builder shall reuse the same SAX parser
- * when performing subsequent parses or allocate a new parser for
- * each parse. The default value of this setting is
- * <code>true</code> (parser reuse).
- * <p>
- * <strong>Note</strong>: As SAX parser instances are not thread safe,
- * the parser reuse feature should not be used with SAXBuilder instances
- * shared among threads.</p>
- *
- * @param reuseParser Whether to reuse the SAX parser.
- */
- public void setReuseParser(boolean reuseParser) {
- this.reuseParser = reuseParser;
- this.saxParser = null;
- }
-
- /**
- * This sets a feature on the SAX parser. See the SAX documentation for
- * </p>
- * <p>
- * NOTE: SAXBuilder requires that some particular features of the SAX parser be
- * set up in certain ways for it to work properly. The list of such features
- * may change in the future. Therefore, the use of this method may cause
- * parsing to break, and even if it doesn't break anything today it might
- * break parsing in a future JDOM version, because what JDOM parsers require
- * may change over time. Use with caution.
- * </p>
- *
- * @param name The feature name, which is a fully-qualified URI.
- * @param value The requested state of the feature (true or false).
- */
- public void setFeature(String name, boolean value) {
- // Save the specified feature for later.
- features.put(name, new Boolean(value));
- }
-
- /**
- * This sets a property on the SAX parser. See the SAX documentation for
- * more information.
- * <p>
- * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
- * set up in certain ways for it to work properly. The list of such properties
- * may change in the future. Therefore, the use of this method may cause
- * parsing to break, and even if it doesn't break anything today it might
- * break parsing in a future JDOM version, because what JDOM parsers require
- * may change over time. Use with caution.
- * </p>
- *
- * @param name The property name, which is a fully-qualified URI.
- * @param value The requested value for the property.
- */
- public void setProperty(String name, Object value) {
- // Save the specified property for later.
- properties.put(name, value);
- }
-
- /**
- * This builds a document from the supplied
- * input source.
- *
- * @param in <code>InputSource</code> to read from
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed
- */
- public Document build(InputSource in)
- throws JDOMException, IOException {
- SAXHandler contentHandler = null;
-
- try {
- // Create and configure the content handler.
- contentHandler = createContentHandler();
- configureContentHandler(contentHandler);
-
- XMLReader parser = this.saxParser;
- if (parser == null) {
- // Create and configure the parser.
- parser = createParser();
-
- // Install optional filter
- if (saxXMLFilter != null) {
- // Connect filter chain to parser
- XMLFilter root = saxXMLFilter;
- while (root.getParent() instanceof XMLFilter) {
- root = (XMLFilter)root.getParent();
- }
- root.setParent(parser);
-
- // Read from filter
- parser = saxXMLFilter;
- }
-
- // Configure parser
- configureParser(parser, contentHandler);
-
- if (reuseParser == true) {
- this.saxParser = parser;
- }
- }
- else {
- // Reset content handler as SAXHandler instances cannot
- // be reused
- configureParser(parser, contentHandler);
- }
-
- // Parse the document.
- parser.parse(in);
-
- return contentHandler.getDocument();
- }
- catch (SAXParseException e) {
- Document doc = contentHandler.getDocument();
- if (doc.hasRootElement() == false) {
- doc = null;
- }
-
- String systemId = e.getSystemId();
- if (systemId != null) {
- throw new JDOMParseException("Error on line " +
- e.getLineNumber() + " of document " + systemId, e, doc);
- } else {
- throw new JDOMParseException("Error on line " +
- e.getLineNumber(), e, doc);
- }
- }
- catch (SAXException e) {
- throw new JDOMParseException("Error in building: " +
- e.getMessage(), e, contentHandler.getDocument());
- }
- finally {
- // Explicitly nullify the handler to encourage GC
- // It's a stack var so this shouldn't be necessary, but it
- // seems to help on some JVMs
- contentHandler = null;
- }
- }
-
- /**
- * This creates the SAXHandler that will be used to build the Document.
- *
- * @return <code>SAXHandler</code> - resultant SAXHandler object.
- */
- protected SAXHandler createContentHandler() {
- SAXHandler contentHandler = new SAXHandler(factory);
- return contentHandler;
- }
-
- /**
- * This configures the SAXHandler that will be used to build the Document.
- * <p>
- * The default implementation simply passes through some configuration
- * settings that were set on the SAXBuilder: setExpandEntities() and
- * setIgnoringElementContentWhitespace().
- * </p>
- */
- protected void configureContentHandler(SAXHandler contentHandler) {
- // Setup pass through behavior
- contentHandler.setExpandEntities(expand);
- contentHandler.setIgnoringElementContentWhitespace(ignoringWhite);
- contentHandler.setIgnoringBoundaryWhitespace(ignoringBoundaryWhite);
- }
-
- /**
- * This creates the XMLReader to be used for reading the XML document.
- * <p>
- * The default behavior is to (1) use the saxDriverClass, if it has been
- * set, (2) try to obtain a parser from JAXP, if it is available, and
- * (3) if all else fails, use a hard-coded default parser (currently
- * the Xerces parser). Subclasses may override this method to determine
- * the parser to use in a different way.
- * </p>
- *
- * @return <code>XMLReader</code> - resultant XMLReader object.
- */
- protected XMLReader createParser() throws JDOMException {
- XMLReader parser = null;
- if (saxDriverClass != null) {
- // The user knows that they want to use a particular class
- try {
- parser = XMLReaderFactory.createXMLReader(saxDriverClass);
-
- // Configure parser
- setFeaturesAndProperties(parser, true);
- }
- catch (SAXException e) {
- throw new JDOMException("Could not load " + saxDriverClass, e);
- }
- } else {
- // Try using JAXP...
- // Note we need JAXP 1.1, and if JAXP 1.0 is all that's
- // available then the getXMLReader call fails and we skip
- // to the hard coded default parser
- try {
- // Get factory class and method.
- Class factoryClass =
- Class.forName("org.jdom.input.JAXPParserFactory");
-
- Method createParser =
- factoryClass.getMethod("createParser",
- new Class[] { boolean.class, Map.class, Map.class });
-
- // Create SAX parser.
- parser = (XMLReader)createParser.invoke(null,
- new Object[] { new Boolean(validate),
- features, properties });
-
- // Configure parser
- setFeaturesAndProperties(parser, false);
- }
- catch (JDOMException e) {
- throw e;
- }
- catch (NoClassDefFoundError e) {
- // The class loader failed to resolve the dependencies
- // of org.jdom.input.JAXPParserFactory. This probably means
- // that no JAXP parser is present in its class path.
- // => Ignore and try allocating default SAX parser instance.
- }
- catch (Exception e) {
- // Ignore and try allocating default SAX parser instance.
- }
- }
-
- // Check to see if we got a parser yet, if not, try to use a
- // hard coded default
- if (parser == null) {
- try {
- parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER);
- // System.out.println("using default " + DEFAULT_SAX_DRIVER);
- saxDriverClass = parser.getClass().getName();
-
- // Configure parser
- setFeaturesAndProperties(parser, true);
- }
- catch (SAXException e) {
- throw new JDOMException("Could not load default SAX parser: "
- + DEFAULT_SAX_DRIVER, e);
- }
- }
-
- return parser;
- }
-
- /**
- * This configures the XMLReader to be used for reading the XML document.
- * <p>
- * The default implementation sets various options on the given XMLReader,
- * such as validation, DTD resolution, entity handlers, etc., according
- * to the options that were set (e.g. via <code>setEntityResolver</code>)
- * and set various SAX properties and features that are required for JDOM
- * internals. These features may change in future releases, so change this
- * behavior at your own risk.
- * </p>
- */
- protected void configureParser(XMLReader parser, SAXHandler contentHandler)
- throws JDOMException {
-
- // Setup SAX handlers.
-
- parser.setContentHandler(contentHandler);
-
- if (saxEntityResolver != null) {
- parser.setEntityResolver(saxEntityResolver);
- }
-
- if (saxDTDHandler != null) {
- parser.setDTDHandler(saxDTDHandler);
- } else {
- parser.setDTDHandler(contentHandler);
- }
-
- if (saxErrorHandler != null) {
- parser.setErrorHandler(saxErrorHandler);
- } else {
- parser.setErrorHandler(new BuilderErrorHandler());
- }
-
- // Setup lexical reporting.
- boolean lexicalReporting = false;
- try {
- parser.setProperty("http://xml.org/sax/handlers/LexicalHandler",
- contentHandler);
- lexicalReporting = true;
- } catch (SAXNotSupportedException e) {
- // No lexical reporting available
- } catch (SAXNotRecognizedException e) {
- // No lexical reporting available
- }
-
- // Some parsers use alternate property for lexical handling (grr...)
- if (!lexicalReporting) {
- try {
- parser.setProperty(
- "http://xml.org/sax/properties/lexical-handler",
- contentHandler);
- lexicalReporting = true;
- } catch (SAXNotSupportedException e) {
- // No lexical reporting available
- } catch (SAXNotRecognizedException e) {
- // No lexical reporting available
- }
- }
-
- // Try setting the DeclHandler if entity expansion is off
- if (!expand) {
- try {
- parser.setProperty(
- "http://xml.org/sax/properties/declaration-handler",
- contentHandler);
- } catch (SAXNotSupportedException e) {
- // No lexical reporting available
- } catch (SAXNotRecognizedException e) {
- // No lexical reporting available
- }
- }
- }
-
- private void setFeaturesAndProperties(XMLReader parser,
- boolean coreFeatures)
- throws JDOMException {
- // Set any user-specified features on the parser.
- Iterator iter = features.keySet().iterator();
- while (iter.hasNext()) {
- String name = (String)iter.next();
- Boolean value = (Boolean)features.get(name);
- internalSetFeature(parser, name, value.booleanValue(), name);
- }
-
- // Set any user-specified properties on the parser.
- iter = properties.keySet().iterator();
- while (iter.hasNext()) {
- String name = (String)iter.next();
- internalSetProperty(parser, name, properties.get(name), name);
- }
-
- if (coreFeatures) {
- // Set validation.
- try {
- internalSetFeature(parser,
- "http://xml.org/sax/features/validation",
- validate, "Validation");
- } catch (JDOMException e) {
- // If validation is not supported, and the user is requesting
- // that we don't validate, that's fine - don't throw an
- // exception.
- if (validate)
- throw e;
- }
-
- // Setup some namespace features.
- internalSetFeature(parser,
- "http://xml.org/sax/features/namespaces",
- true, "Namespaces");
- internalSetFeature(parser,
- "http://xml.org/sax/features/namespace-prefixes",
- true, "Namespace prefixes");
- }
-
- // Set entity expansion
- // Note SAXHandler can work regardless of how this is set, but when
- // entity expansion it's worth it to try to tell the parser not to
- // even bother with external general entities.
- // Apparently no parsers yet support this feature.
- // XXX It might make sense to setEntityResolver() with a resolver
- // that simply ignores external general entities
- try {
- if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
- parser.setFeature("http://xml.org/sax/features/external-general-entities", expand);
- }
- }
- catch (SAXNotRecognizedException e) { /* Ignore... */ }
- catch (SAXNotSupportedException e) { /* Ignore... */ }
- }
-
- /**
- * Tries to set a feature on the parser. If the feature cannot be set,
- * throws a JDOMException describing the problem.
- */
- private void internalSetFeature(XMLReader parser, String feature,
- boolean value, String displayName) throws JDOMException {
- try {
- parser.setFeature(feature, value);
- } catch (SAXNotSupportedException e) {
- throw new JDOMException(
- displayName + " feature not supported for SAX driver " + parser.getClass().getName());
- } catch (SAXNotRecognizedException e) {
- throw new JDOMException(
- displayName + " feature not recognized for SAX driver " + parser.getClass().getName());
- }
- }
-
- /**
- * <p>
- * Tries to set a property on the parser. If the property cannot be set,
- * throws a JDOMException describing the problem.
- * </p>
- */
- private void internalSetProperty(XMLReader parser, String property,
- Object value, String displayName) throws JDOMException {
- try {
- parser.setProperty(property, value);
- } catch (SAXNotSupportedException e) {
- throw new JDOMException(
- displayName + " property not supported for SAX driver " + parser.getClass().getName());
- } catch (SAXNotRecognizedException e) {
- throw new JDOMException(
- displayName + " property not recognized for SAX driver " + parser.getClass().getName());
- }
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * input stream.
- * </p>
- *
- * @param in <code>InputStream</code> to read from
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed.
- */
- public Document build(InputStream in)
- throws JDOMException, IOException {
- return build(new InputSource(in));
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * filename.
- * </p>
- *
- * @param file <code>File</code> to read from
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed
- */
- public Document build(File file)
- throws JDOMException, IOException {
- try {
- URL url = fileToURL(file);
- return build(url);
- } catch (MalformedURLException e) {
- throw new JDOMException("Error in building", e);
- }
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * URL.
- * </p>
- *
- * @param url <code>URL</code> to read from.
- * @return <code>Document</code> - resultant Document object.
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed.
- */
- public Document build(URL url)
- throws JDOMException, IOException {
- String systemID = url.toExternalForm();
- return build(new InputSource(systemID));
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * input stream.
- * </p>
- *
- * @param in <code>InputStream</code> to read from.
- * @param systemId base for resolving relative URIs
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed
- */
- public Document build(InputStream in, String systemId)
- throws JDOMException, IOException {
-
- InputSource src = new InputSource(in);
- src.setSystemId(systemId);
- return build(src);
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * Reader. It's the programmer's responsibility to make sure
- * the reader matches the encoding of the file. It's often easier
- * and safer to use an InputStream rather than a Reader, and to let the
- * parser auto-detect the encoding from the XML declaration.
- * </p>
- *
- * @param characterStream <code>Reader</code> to read from
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed
- */
- public Document build(Reader characterStream)
- throws JDOMException, IOException {
- return build(new InputSource(characterStream));
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * Reader. It's the programmer's responsibility to make sure
- * the reader matches the encoding of the file. It's often easier
- * and safer to use an InputStream rather than a Reader, and to let the
- * parser auto-detect the encoding from the XML declaration.
- * </p>
- *
- * @param characterStream <code>Reader</code> to read from.
- * @param systemId base for resolving relative URIs
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed
- */
- public Document build(Reader characterStream, String systemId)
- throws JDOMException, IOException {
-
- InputSource src = new InputSource(characterStream);
- src.setSystemId(systemId);
- return build(src);
- }
-
- /**
- * <p>
- * This builds a document from the supplied
- * URI.
- * </p>
- * @param systemId URI for the input
- * @return <code>Document</code> resultant Document object
- * @throws JDOMException when errors occur in parsing
- * @throws IOException when an I/O error prevents a document
- * from being fully parsed
- */
- public Document build(String systemId)
- throws JDOMException, IOException {
- return build(new InputSource(systemId));
- }
-
- // /**
- // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
- // * here to work with JDK 1.1.
- // *
- // * @see java.io.File
- // *
- // * @param f the file to convert
- // * @return the file path converted to a file: URL
- // */
- // protected URL fileToURL(File f) throws MalformedURLException {
- // String path = f.getAbsolutePath();
- // if (File.separatorChar != '/') {
- // path = path.replace(File.separatorChar, '/');
- // }
- // if (!path.startsWith("/")) {
- // path = "/" + path;
- // }
- // if (!path.endsWith("/") && f.isDirectory()) {
- // path = path + "/";
- // }
- // return new URL("file", "", path);
- // }
-
- /** Custom File.toUrl() implementation to handle special chars in file names
- *
- * @param file file object whose path will be converted
- * @return URL form of the file, with special characters handled
- * @throws MalformedURLException if there's a problem constructing a URL
- */
- private static URL fileToURL(File file) throws MalformedURLException {
- StringBuffer buffer = new StringBuffer();
- String path = file.getAbsolutePath();
-
- // Convert non-URL style file separators
- if (File.separatorChar != '/') {
- path = path.replace(File.separatorChar, '/');
- }
-
- // Make sure it starts at root
- if (!path.startsWith("/")) {
- buffer.append('/');
- }
-
- // Copy, converting URL special characters as we go
- int len = path.length();
- for (int i = 0; i < len; i++) {
- char c = path.charAt(i);
- if (c == ' ')
- buffer.append("%20");
- else if (c == '#')
- buffer.append("%23");
- else if (c == '%')
- buffer.append("%25");
- else if (c == '&')
- buffer.append("%26");
- else if (c == ';')
- buffer.append("%3B");
- else if (c == '<')
- buffer.append("%3C");
- else if (c == '=')
- buffer.append("%3D");
- else if (c == '>')
- buffer.append("%3E");
- else if (c == '?')
- buffer.append("%3F");
- else if (c == '~')
- buffer.append("%7E");
- else
- buffer.append(c);
- }
-
- // Make sure directories end with slash
- if (!path.endsWith("/") && file.isDirectory()) {
- buffer.append('/');
- }
-
- // Return URL
- return new URL("file", "", buffer.toString());
- }
-
- /**
- * Returns whether or not entities are being expanded into normal text
- * content.
- *
- * @return whether entities are being expanded
- */
- public boolean getExpandEntities() {
- return expand;
- }
-
- /**
- * <p>
- * This sets whether or not to expand entities for the builder.
- * A true means to expand entities as normal content. A false means to
- * leave entities unexpanded as <code>EntityRef</code> objects. The
- * default is true.
- * </p>
- * <p>
- * When this setting is false, the internal DTD subset is retained; when
- * this setting is true, the internal DTD subset is not retained.
- * </p>
- * <p>
- * Note that Xerces (at least up to 1.4.4) has a bug where entities
- * in attribute values will be misreported if this flag is turned off,
- * resulting in entities to appear within element content. When turning
- * entity expansion off either avoid entities in attribute values, or
- * use another parser like Crimson.
- * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
- * </p>
- *
- * @param expand <code>boolean</code> indicating whether entity expansion
- * should occur.
- */
- public void setExpandEntities(boolean expand) {
- this.expand = expand;
- }
- }
|