Desktop tool for browsing account info from EVE-Online
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SAXBuilder.java 37KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047
  1. /*--
  2. $Id: SAXBuilder.java,v 1.92 2007/11/10 05:29:00 jhunter Exp $
  3. Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
  4. All rights reserved.
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions
  7. are met:
  8. 1. Redistributions of source code must retain the above copyright
  9. notice, this list of conditions, and the following disclaimer.
  10. 2. Redistributions in binary form must reproduce the above copyright
  11. notice, this list of conditions, and the disclaimer that follows
  12. these conditions in the documentation and/or other materials
  13. provided with the distribution.
  14. 3. The name "JDOM" must not be used to endorse or promote products
  15. derived from this software without prior written permission. For
  16. written permission, please contact <request_AT_jdom_DOT_org>.
  17. 4. Products derived from this software may not be called "JDOM", nor
  18. may "JDOM" appear in their name, without prior written permission
  19. from the JDOM Project Management <request_AT_jdom_DOT_org>.
  20. In addition, we request (but do not require) that you include in the
  21. end-user documentation provided with the redistribution and/or in the
  22. software itself an acknowledgement equivalent to the following:
  23. "This product includes software developed by the
  24. JDOM Project (http://www.jdom.org/)."
  25. Alternatively, the acknowledgment may be graphical using the logos
  26. available at http://www.jdom.org/images/logos.
  27. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  28. WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  29. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
  31. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  34. USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  35. ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  36. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  37. OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  38. SUCH DAMAGE.
  39. This software consists of voluntary contributions made by many
  40. individuals on behalf of the JDOM Project and was originally
  41. created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
  42. Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
  43. on the JDOM Project, please see <http://www.jdom.org/>.
  44. */
  45. package org.jdom.input;
  46. import java.io.*;
  47. import java.lang.reflect.*;
  48. import java.net.*;
  49. import java.util.*;
  50. import org.jdom.*;
  51. import org.xml.sax.*;
  52. import org.xml.sax.helpers.XMLReaderFactory;
  53. /**
  54. * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
  55. * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
  56. * third-party SAX parser (chosen by JAXP by default, or you can choose
  57. * manually) to handle the parsing duties and simply listens to the SAX events
  58. * to construct a document. Details which SAX does not provide, such as
  59. * whitespace outside the root element, are not represented in the JDOM
  60. * document. Information about SAX can be found at <a
  61. * href="http://www.saxproject.org">http://www.saxproject.org</a>.
  62. * <p>
  63. * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
  64. * be converted by the SAX parser into absolute paths.
  65. *
  66. * @version $Revision: 1.92 $, $Date: 2007/11/10 05:29:00 $
  67. * @author Jason Hunter
  68. * @author Brett McLaughlin
  69. * @author Dan Schaffer
  70. * @author Philip Nelson
  71. * @author Alex Rosen
  72. */
  73. public class SAXBuilder {
  74. private static final String CVS_ID =
  75. "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.92 $ $Date: 2007/11/10 05:29:00 $ $Name: jdom_1_1 $";
  76. /**
  77. * Default parser class to use. This is used when no other parser
  78. * is given and JAXP isn't available.
  79. */
  80. private static final String DEFAULT_SAX_DRIVER =
  81. "org.apache.xerces.parsers.SAXParser";
  82. /** Whether validation should occur */
  83. private boolean validate;
  84. /** Whether expansion of entities should occur */
  85. private boolean expand = true;
  86. /** Adapter class to use */
  87. private String saxDriverClass;
  88. /** ErrorHandler class to use */
  89. private ErrorHandler saxErrorHandler = null;
  90. /** EntityResolver class to use */
  91. private EntityResolver saxEntityResolver = null;
  92. /** DTDHandler class to use */
  93. private DTDHandler saxDTDHandler = null;
  94. /** XMLFilter instance to use */
  95. private XMLFilter saxXMLFilter = null;
  96. /** The factory for creating new JDOM objects */
  97. private JDOMFactory factory = new DefaultJDOMFactory();
  98. /** Whether to ignore ignorable whitespace */
  99. private boolean ignoringWhite = false;
  100. /** Whether to ignore all whitespace content */
  101. private boolean ignoringBoundaryWhite = false;
  102. /** User-specified features to be set on the SAX parser */
  103. private HashMap features = new HashMap(5);
  104. /** User-specified properties to be set on the SAX parser */
  105. private HashMap properties = new HashMap(5);
  106. /**
  107. * Whether parser reuse is allowed.
  108. * <p>Default: <code>true</code></p>
  109. */
  110. private boolean reuseParser = true;
  111. /** The current SAX parser, if parser reuse has been activated. */
  112. private XMLReader saxParser = null;
  113. /**
  114. * Creates a new SAXBuilder which will attempt to first locate
  115. * a parser via JAXP, then will try to use a set of default
  116. * SAX Drivers. The underlying parser will not validate.
  117. */
  118. public SAXBuilder() {
  119. this(false);
  120. }
  121. /**
  122. * Creates a new SAXBuilder which will attempt to first locate
  123. * a parser via JAXP, then will try to use a set of default
  124. * SAX Drivers. The underlying parser will validate or not
  125. * according to the given parameter.
  126. *
  127. * @param validate <code>boolean</code> indicating if
  128. * validation should occur.
  129. */
  130. public SAXBuilder(boolean validate) {
  131. this.validate = validate;
  132. }
  133. /**
  134. * Creates a new SAXBuilder using the specified SAX parser.
  135. * The underlying parser will not validate.
  136. *
  137. * @param saxDriverClass <code>String</code> name of SAX Driver
  138. * to use for parsing.
  139. */
  140. public SAXBuilder(String saxDriverClass) {
  141. this(saxDriverClass, false);
  142. }
  143. /**
  144. * Creates a new SAXBuilder using the specified SAX parser.
  145. * The underlying parser will validate or not
  146. * according to the given parameter.
  147. *
  148. * @param saxDriverClass <code>String</code> name of SAX Driver
  149. * to use for parsing.
  150. * @param validate <code>boolean</code> indicating if
  151. * validation should occur.
  152. */
  153. public SAXBuilder(String saxDriverClass, boolean validate) {
  154. this.saxDriverClass = saxDriverClass;
  155. this.validate = validate;
  156. }
  157. /**
  158. * Returns the driver class assigned in the constructor, or null if none.
  159. *
  160. * @return the driver class assigned in the constructor
  161. */
  162. public String getDriverClass() {
  163. return saxDriverClass;
  164. }
  165. /**
  166. * Returns the current {@link org.jdom.JDOMFactory} in use.
  167. * @return the factory in use
  168. */
  169. public JDOMFactory getFactory() {
  170. return factory;
  171. }
  172. /**
  173. * This sets a custom JDOMFactory for the builder. Use this to build
  174. * the tree with your own subclasses of the JDOM classes.
  175. *
  176. * @param factory <code>JDOMFactory</code> to use
  177. */
  178. public void setFactory(JDOMFactory factory) {
  179. this.factory = factory;
  180. }
  181. /**
  182. * Returns whether validation is to be performed during the build.
  183. *
  184. * @return whether validation is to be performed during the build
  185. */
  186. public boolean getValidation() {
  187. return validate;
  188. }
  189. /**
  190. * This sets validation for the builder.
  191. *
  192. * @param validate <code>boolean</code> indicating whether validation
  193. * should occur.
  194. */
  195. public void setValidation(boolean validate) {
  196. this.validate = validate;
  197. }
  198. /**
  199. * Returns the {@link ErrorHandler} assigned, or null if none.
  200. * @return the ErrorHandler assigned, or null if none
  201. */
  202. public ErrorHandler getErrorHandler() {
  203. return saxErrorHandler;
  204. }
  205. /**
  206. * This sets custom ErrorHandler for the <code>Builder</code>.
  207. *
  208. * @param errorHandler <code>ErrorHandler</code>
  209. */
  210. public void setErrorHandler(ErrorHandler errorHandler) {
  211. saxErrorHandler = errorHandler;
  212. }
  213. /**
  214. * Returns the {@link EntityResolver} assigned, or null if none.
  215. *
  216. * @return the EntityResolver assigned
  217. */
  218. public EntityResolver getEntityResolver() {
  219. return saxEntityResolver;
  220. }
  221. /**
  222. * This sets custom EntityResolver for the <code>Builder</code>.
  223. *
  224. * @param entityResolver <code>EntityResolver</code>
  225. */
  226. public void setEntityResolver(EntityResolver entityResolver) {
  227. saxEntityResolver = entityResolver;
  228. }
  229. /**
  230. * Returns the {@link DTDHandler} assigned, or null if none.
  231. *
  232. * @return the DTDHandler assigned
  233. */
  234. public DTDHandler getDTDHandler() {
  235. return saxDTDHandler;
  236. }
  237. /**
  238. * This sets custom DTDHandler for the <code>Builder</code>.
  239. *
  240. * @param dtdHandler <code>DTDHandler</code>
  241. */
  242. public void setDTDHandler(DTDHandler dtdHandler) {
  243. saxDTDHandler = dtdHandler;
  244. }
  245. /**
  246. * Returns the {@link XMLFilter} used during parsing, or null if none.
  247. *
  248. * @return the XMLFilter used during parsing
  249. */
  250. public XMLFilter getXMLFilter() {
  251. return saxXMLFilter;
  252. }
  253. /**
  254. * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
  255. *
  256. * @param xmlFilter the filter to use
  257. */
  258. public void setXMLFilter(XMLFilter xmlFilter) {
  259. saxXMLFilter = xmlFilter;
  260. }
  261. /**
  262. * Returns whether element content whitespace is to be ignored during the
  263. * build.
  264. *
  265. * @return whether element content whitespace is to be ignored during the
  266. * build
  267. */
  268. public boolean getIgnoringElementContentWhitespace() {
  269. return ignoringWhite;
  270. }
  271. /**
  272. * Specifies whether or not the parser should elminate whitespace in
  273. * element content (sometimes known as "ignorable whitespace") when
  274. * building the document. Only whitespace which is contained within
  275. * element content that has an element only content model will be
  276. * eliminated (see XML Rec 3.2.1). For this setting to take effect
  277. * requires that validation be turned on. The default value of this
  278. * setting is <code>false</code>.
  279. *
  280. * @param ignoringWhite Whether to ignore ignorable whitespace
  281. */
  282. public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
  283. this.ignoringWhite = ignoringWhite;
  284. }
  285. /**
  286. * Returns whether or not the parser will elminate element content
  287. * containing only whitespace.
  288. *
  289. * @return <code>boolean</code> - whether only whitespace content will
  290. * be ignored during build.
  291. *
  292. * @see #setIgnoringBoundaryWhitespace
  293. */
  294. public boolean getIgnoringBoundaryWhitespace() {
  295. return ignoringBoundaryWhite;
  296. }
  297. /**
  298. * Specifies whether or not the parser should elminate boundary whitespace,
  299. * a term that indicates whitespace-only text between element tags. This
  300. * feature is a lot like {@link #setIgnoringElementContentWhitespace(boolean)}
  301. * but this feature is more aggressive and doesn't require validation be
  302. * turned on. The {@link #setIgnoringElementContentWhitespace(boolean)}
  303. * call impacts the SAX parse process while this method impacts the JDOM
  304. * build process, so it can be beneficial to turn both on for efficiency.
  305. * For implementation efficiency, this method actually removes all
  306. * whitespace-only text() nodes. That can, in some cases (like beteween an
  307. * element tag and a comment), include whitespace that isn't just boundary
  308. * whitespace. The default is <code>false</code>.
  309. *
  310. * @param ignoringBoundaryWhite Whether to ignore whitespace-only text
  311. * noes
  312. */
  313. public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) {
  314. this.ignoringBoundaryWhite = ignoringBoundaryWhite;
  315. }
  316. /**
  317. * Returns whether the contained SAX parser instance is reused across
  318. * multiple parses. The default is true.
  319. *
  320. * @return whether the contained SAX parser instance is reused across
  321. * multiple parses
  322. */
  323. public boolean getReuseParser() {
  324. return reuseParser;
  325. }
  326. /**
  327. * Specifies whether this builder shall reuse the same SAX parser
  328. * when performing subsequent parses or allocate a new parser for
  329. * each parse. The default value of this setting is
  330. * <code>true</code> (parser reuse).
  331. * <p>
  332. * <strong>Note</strong>: As SAX parser instances are not thread safe,
  333. * the parser reuse feature should not be used with SAXBuilder instances
  334. * shared among threads.</p>
  335. *
  336. * @param reuseParser Whether to reuse the SAX parser.
  337. */
  338. public void setReuseParser(boolean reuseParser) {
  339. this.reuseParser = reuseParser;
  340. this.saxParser = null;
  341. }
  342. /**
  343. * This sets a feature on the SAX parser. See the SAX documentation for
  344. * </p>
  345. * <p>
  346. * NOTE: SAXBuilder requires that some particular features of the SAX parser be
  347. * set up in certain ways for it to work properly. The list of such features
  348. * may change in the future. Therefore, the use of this method may cause
  349. * parsing to break, and even if it doesn't break anything today it might
  350. * break parsing in a future JDOM version, because what JDOM parsers require
  351. * may change over time. Use with caution.
  352. * </p>
  353. *
  354. * @param name The feature name, which is a fully-qualified URI.
  355. * @param value The requested state of the feature (true or false).
  356. */
  357. public void setFeature(String name, boolean value) {
  358. // Save the specified feature for later.
  359. features.put(name, new Boolean(value));
  360. }
  361. /**
  362. * This sets a property on the SAX parser. See the SAX documentation for
  363. * more information.
  364. * <p>
  365. * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
  366. * set up in certain ways for it to work properly. The list of such properties
  367. * may change in the future. Therefore, the use of this method may cause
  368. * parsing to break, and even if it doesn't break anything today it might
  369. * break parsing in a future JDOM version, because what JDOM parsers require
  370. * may change over time. Use with caution.
  371. * </p>
  372. *
  373. * @param name The property name, which is a fully-qualified URI.
  374. * @param value The requested value for the property.
  375. */
  376. public void setProperty(String name, Object value) {
  377. // Save the specified property for later.
  378. properties.put(name, value);
  379. }
  380. /**
  381. * This builds a document from the supplied
  382. * input source.
  383. *
  384. * @param in <code>InputSource</code> to read from
  385. * @return <code>Document</code> resultant Document object
  386. * @throws JDOMException when errors occur in parsing
  387. * @throws IOException when an I/O error prevents a document
  388. * from being fully parsed
  389. */
  390. public Document build(InputSource in)
  391. throws JDOMException, IOException {
  392. SAXHandler contentHandler = null;
  393. try {
  394. // Create and configure the content handler.
  395. contentHandler = createContentHandler();
  396. configureContentHandler(contentHandler);
  397. XMLReader parser = this.saxParser;
  398. if (parser == null) {
  399. // Create and configure the parser.
  400. parser = createParser();
  401. // Install optional filter
  402. if (saxXMLFilter != null) {
  403. // Connect filter chain to parser
  404. XMLFilter root = saxXMLFilter;
  405. while (root.getParent() instanceof XMLFilter) {
  406. root = (XMLFilter)root.getParent();
  407. }
  408. root.setParent(parser);
  409. // Read from filter
  410. parser = saxXMLFilter;
  411. }
  412. // Configure parser
  413. configureParser(parser, contentHandler);
  414. if (reuseParser == true) {
  415. this.saxParser = parser;
  416. }
  417. }
  418. else {
  419. // Reset content handler as SAXHandler instances cannot
  420. // be reused
  421. configureParser(parser, contentHandler);
  422. }
  423. // Parse the document.
  424. parser.parse(in);
  425. return contentHandler.getDocument();
  426. }
  427. catch (SAXParseException e) {
  428. Document doc = contentHandler.getDocument();
  429. if (doc.hasRootElement() == false) {
  430. doc = null;
  431. }
  432. String systemId = e.getSystemId();
  433. if (systemId != null) {
  434. throw new JDOMParseException("Error on line " +
  435. e.getLineNumber() + " of document " + systemId, e, doc);
  436. } else {
  437. throw new JDOMParseException("Error on line " +
  438. e.getLineNumber(), e, doc);
  439. }
  440. }
  441. catch (SAXException e) {
  442. throw new JDOMParseException("Error in building: " +
  443. e.getMessage(), e, contentHandler.getDocument());
  444. }
  445. finally {
  446. // Explicitly nullify the handler to encourage GC
  447. // It's a stack var so this shouldn't be necessary, but it
  448. // seems to help on some JVMs
  449. contentHandler = null;
  450. }
  451. }
  452. /**
  453. * This creates the SAXHandler that will be used to build the Document.
  454. *
  455. * @return <code>SAXHandler</code> - resultant SAXHandler object.
  456. */
  457. protected SAXHandler createContentHandler() {
  458. SAXHandler contentHandler = new SAXHandler(factory);
  459. return contentHandler;
  460. }
  461. /**
  462. * This configures the SAXHandler that will be used to build the Document.
  463. * <p>
  464. * The default implementation simply passes through some configuration
  465. * settings that were set on the SAXBuilder: setExpandEntities() and
  466. * setIgnoringElementContentWhitespace().
  467. * </p>
  468. */
  469. protected void configureContentHandler(SAXHandler contentHandler) {
  470. // Setup pass through behavior
  471. contentHandler.setExpandEntities(expand);
  472. contentHandler.setIgnoringElementContentWhitespace(ignoringWhite);
  473. contentHandler.setIgnoringBoundaryWhitespace(ignoringBoundaryWhite);
  474. }
  475. /**
  476. * This creates the XMLReader to be used for reading the XML document.
  477. * <p>
  478. * The default behavior is to (1) use the saxDriverClass, if it has been
  479. * set, (2) try to obtain a parser from JAXP, if it is available, and
  480. * (3) if all else fails, use a hard-coded default parser (currently
  481. * the Xerces parser). Subclasses may override this method to determine
  482. * the parser to use in a different way.
  483. * </p>
  484. *
  485. * @return <code>XMLReader</code> - resultant XMLReader object.
  486. */
  487. protected XMLReader createParser() throws JDOMException {
  488. XMLReader parser = null;
  489. if (saxDriverClass != null) {
  490. // The user knows that they want to use a particular class
  491. try {
  492. parser = XMLReaderFactory.createXMLReader(saxDriverClass);
  493. // Configure parser
  494. setFeaturesAndProperties(parser, true);
  495. }
  496. catch (SAXException e) {
  497. throw new JDOMException("Could not load " + saxDriverClass, e);
  498. }
  499. } else {
  500. // Try using JAXP...
  501. // Note we need JAXP 1.1, and if JAXP 1.0 is all that's
  502. // available then the getXMLReader call fails and we skip
  503. // to the hard coded default parser
  504. try {
  505. // Get factory class and method.
  506. Class factoryClass =
  507. Class.forName("org.jdom.input.JAXPParserFactory");
  508. Method createParser =
  509. factoryClass.getMethod("createParser",
  510. new Class[] { boolean.class, Map.class, Map.class });
  511. // Create SAX parser.
  512. parser = (XMLReader)createParser.invoke(null,
  513. new Object[] { new Boolean(validate),
  514. features, properties });
  515. // Configure parser
  516. setFeaturesAndProperties(parser, false);
  517. }
  518. catch (JDOMException e) {
  519. throw e;
  520. }
  521. catch (NoClassDefFoundError e) {
  522. // The class loader failed to resolve the dependencies
  523. // of org.jdom.input.JAXPParserFactory. This probably means
  524. // that no JAXP parser is present in its class path.
  525. // => Ignore and try allocating default SAX parser instance.
  526. }
  527. catch (Exception e) {
  528. // Ignore and try allocating default SAX parser instance.
  529. }
  530. }
  531. // Check to see if we got a parser yet, if not, try to use a
  532. // hard coded default
  533. if (parser == null) {
  534. try {
  535. parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER);
  536. // System.out.println("using default " + DEFAULT_SAX_DRIVER);
  537. saxDriverClass = parser.getClass().getName();
  538. // Configure parser
  539. setFeaturesAndProperties(parser, true);
  540. }
  541. catch (SAXException e) {
  542. throw new JDOMException("Could not load default SAX parser: "
  543. + DEFAULT_SAX_DRIVER, e);
  544. }
  545. }
  546. return parser;
  547. }
  548. /**
  549. * This configures the XMLReader to be used for reading the XML document.
  550. * <p>
  551. * The default implementation sets various options on the given XMLReader,
  552. * such as validation, DTD resolution, entity handlers, etc., according
  553. * to the options that were set (e.g. via <code>setEntityResolver</code>)
  554. * and set various SAX properties and features that are required for JDOM
  555. * internals. These features may change in future releases, so change this
  556. * behavior at your own risk.
  557. * </p>
  558. */
  559. protected void configureParser(XMLReader parser, SAXHandler contentHandler)
  560. throws JDOMException {
  561. // Setup SAX handlers.
  562. parser.setContentHandler(contentHandler);
  563. if (saxEntityResolver != null) {
  564. parser.setEntityResolver(saxEntityResolver);
  565. }
  566. if (saxDTDHandler != null) {
  567. parser.setDTDHandler(saxDTDHandler);
  568. } else {
  569. parser.setDTDHandler(contentHandler);
  570. }
  571. if (saxErrorHandler != null) {
  572. parser.setErrorHandler(saxErrorHandler);
  573. } else {
  574. parser.setErrorHandler(new BuilderErrorHandler());
  575. }
  576. // Setup lexical reporting.
  577. boolean lexicalReporting = false;
  578. try {
  579. parser.setProperty("http://xml.org/sax/handlers/LexicalHandler",
  580. contentHandler);
  581. lexicalReporting = true;
  582. } catch (SAXNotSupportedException e) {
  583. // No lexical reporting available
  584. } catch (SAXNotRecognizedException e) {
  585. // No lexical reporting available
  586. }
  587. // Some parsers use alternate property for lexical handling (grr...)
  588. if (!lexicalReporting) {
  589. try {
  590. parser.setProperty(
  591. "http://xml.org/sax/properties/lexical-handler",
  592. contentHandler);
  593. lexicalReporting = true;
  594. } catch (SAXNotSupportedException e) {
  595. // No lexical reporting available
  596. } catch (SAXNotRecognizedException e) {
  597. // No lexical reporting available
  598. }
  599. }
  600. // Try setting the DeclHandler if entity expansion is off
  601. if (!expand) {
  602. try {
  603. parser.setProperty(
  604. "http://xml.org/sax/properties/declaration-handler",
  605. contentHandler);
  606. } catch (SAXNotSupportedException e) {
  607. // No lexical reporting available
  608. } catch (SAXNotRecognizedException e) {
  609. // No lexical reporting available
  610. }
  611. }
  612. }
  613. private void setFeaturesAndProperties(XMLReader parser,
  614. boolean coreFeatures)
  615. throws JDOMException {
  616. // Set any user-specified features on the parser.
  617. Iterator iter = features.keySet().iterator();
  618. while (iter.hasNext()) {
  619. String name = (String)iter.next();
  620. Boolean value = (Boolean)features.get(name);
  621. internalSetFeature(parser, name, value.booleanValue(), name);
  622. }
  623. // Set any user-specified properties on the parser.
  624. iter = properties.keySet().iterator();
  625. while (iter.hasNext()) {
  626. String name = (String)iter.next();
  627. internalSetProperty(parser, name, properties.get(name), name);
  628. }
  629. if (coreFeatures) {
  630. // Set validation.
  631. try {
  632. internalSetFeature(parser,
  633. "http://xml.org/sax/features/validation",
  634. validate, "Validation");
  635. } catch (JDOMException e) {
  636. // If validation is not supported, and the user is requesting
  637. // that we don't validate, that's fine - don't throw an
  638. // exception.
  639. if (validate)
  640. throw e;
  641. }
  642. // Setup some namespace features.
  643. internalSetFeature(parser,
  644. "http://xml.org/sax/features/namespaces",
  645. true, "Namespaces");
  646. internalSetFeature(parser,
  647. "http://xml.org/sax/features/namespace-prefixes",
  648. true, "Namespace prefixes");
  649. }
  650. // Set entity expansion
  651. // Note SAXHandler can work regardless of how this is set, but when
  652. // entity expansion it's worth it to try to tell the parser not to
  653. // even bother with external general entities.
  654. // Apparently no parsers yet support this feature.
  655. // XXX It might make sense to setEntityResolver() with a resolver
  656. // that simply ignores external general entities
  657. try {
  658. if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
  659. parser.setFeature("http://xml.org/sax/features/external-general-entities", expand);
  660. }
  661. }
  662. catch (SAXNotRecognizedException e) { /* Ignore... */ }
  663. catch (SAXNotSupportedException e) { /* Ignore... */ }
  664. }
  665. /**
  666. * Tries to set a feature on the parser. If the feature cannot be set,
  667. * throws a JDOMException describing the problem.
  668. */
  669. private void internalSetFeature(XMLReader parser, String feature,
  670. boolean value, String displayName) throws JDOMException {
  671. try {
  672. parser.setFeature(feature, value);
  673. } catch (SAXNotSupportedException e) {
  674. throw new JDOMException(
  675. displayName + " feature not supported for SAX driver " + parser.getClass().getName());
  676. } catch (SAXNotRecognizedException e) {
  677. throw new JDOMException(
  678. displayName + " feature not recognized for SAX driver " + parser.getClass().getName());
  679. }
  680. }
  681. /**
  682. * <p>
  683. * Tries to set a property on the parser. If the property cannot be set,
  684. * throws a JDOMException describing the problem.
  685. * </p>
  686. */
  687. private void internalSetProperty(XMLReader parser, String property,
  688. Object value, String displayName) throws JDOMException {
  689. try {
  690. parser.setProperty(property, value);
  691. } catch (SAXNotSupportedException e) {
  692. throw new JDOMException(
  693. displayName + " property not supported for SAX driver " + parser.getClass().getName());
  694. } catch (SAXNotRecognizedException e) {
  695. throw new JDOMException(
  696. displayName + " property not recognized for SAX driver " + parser.getClass().getName());
  697. }
  698. }
  699. /**
  700. * <p>
  701. * This builds a document from the supplied
  702. * input stream.
  703. * </p>
  704. *
  705. * @param in <code>InputStream</code> to read from
  706. * @return <code>Document</code> resultant Document object
  707. * @throws JDOMException when errors occur in parsing
  708. * @throws IOException when an I/O error prevents a document
  709. * from being fully parsed.
  710. */
  711. public Document build(InputStream in)
  712. throws JDOMException, IOException {
  713. return build(new InputSource(in));
  714. }
  715. /**
  716. * <p>
  717. * This builds a document from the supplied
  718. * filename.
  719. * </p>
  720. *
  721. * @param file <code>File</code> to read from
  722. * @return <code>Document</code> resultant Document object
  723. * @throws JDOMException when errors occur in parsing
  724. * @throws IOException when an I/O error prevents a document
  725. * from being fully parsed
  726. */
  727. public Document build(File file)
  728. throws JDOMException, IOException {
  729. try {
  730. URL url = fileToURL(file);
  731. return build(url);
  732. } catch (MalformedURLException e) {
  733. throw new JDOMException("Error in building", e);
  734. }
  735. }
  736. /**
  737. * <p>
  738. * This builds a document from the supplied
  739. * URL.
  740. * </p>
  741. *
  742. * @param url <code>URL</code> to read from.
  743. * @return <code>Document</code> - resultant Document object.
  744. * @throws JDOMException when errors occur in parsing
  745. * @throws IOException when an I/O error prevents a document
  746. * from being fully parsed.
  747. */
  748. public Document build(URL url)
  749. throws JDOMException, IOException {
  750. String systemID = url.toExternalForm();
  751. return build(new InputSource(systemID));
  752. }
  753. /**
  754. * <p>
  755. * This builds a document from the supplied
  756. * input stream.
  757. * </p>
  758. *
  759. * @param in <code>InputStream</code> to read from.
  760. * @param systemId base for resolving relative URIs
  761. * @return <code>Document</code> resultant Document object
  762. * @throws JDOMException when errors occur in parsing
  763. * @throws IOException when an I/O error prevents a document
  764. * from being fully parsed
  765. */
  766. public Document build(InputStream in, String systemId)
  767. throws JDOMException, IOException {
  768. InputSource src = new InputSource(in);
  769. src.setSystemId(systemId);
  770. return build(src);
  771. }
  772. /**
  773. * <p>
  774. * This builds a document from the supplied
  775. * Reader. It's the programmer's responsibility to make sure
  776. * the reader matches the encoding of the file. It's often easier
  777. * and safer to use an InputStream rather than a Reader, and to let the
  778. * parser auto-detect the encoding from the XML declaration.
  779. * </p>
  780. *
  781. * @param characterStream <code>Reader</code> to read from
  782. * @return <code>Document</code> resultant Document object
  783. * @throws JDOMException when errors occur in parsing
  784. * @throws IOException when an I/O error prevents a document
  785. * from being fully parsed
  786. */
  787. public Document build(Reader characterStream)
  788. throws JDOMException, IOException {
  789. return build(new InputSource(characterStream));
  790. }
  791. /**
  792. * <p>
  793. * This builds a document from the supplied
  794. * Reader. It's the programmer's responsibility to make sure
  795. * the reader matches the encoding of the file. It's often easier
  796. * and safer to use an InputStream rather than a Reader, and to let the
  797. * parser auto-detect the encoding from the XML declaration.
  798. * </p>
  799. *
  800. * @param characterStream <code>Reader</code> to read from.
  801. * @param systemId base for resolving relative URIs
  802. * @return <code>Document</code> resultant Document object
  803. * @throws JDOMException when errors occur in parsing
  804. * @throws IOException when an I/O error prevents a document
  805. * from being fully parsed
  806. */
  807. public Document build(Reader characterStream, String systemId)
  808. throws JDOMException, IOException {
  809. InputSource src = new InputSource(characterStream);
  810. src.setSystemId(systemId);
  811. return build(src);
  812. }
  813. /**
  814. * <p>
  815. * This builds a document from the supplied
  816. * URI.
  817. * </p>
  818. * @param systemId URI for the input
  819. * @return <code>Document</code> resultant Document object
  820. * @throws JDOMException when errors occur in parsing
  821. * @throws IOException when an I/O error prevents a document
  822. * from being fully parsed
  823. */
  824. public Document build(String systemId)
  825. throws JDOMException, IOException {
  826. return build(new InputSource(systemId));
  827. }
  828. // /**
  829. // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
  830. // * here to work with JDK 1.1.
  831. // *
  832. // * @see java.io.File
  833. // *
  834. // * @param f the file to convert
  835. // * @return the file path converted to a file: URL
  836. // */
  837. // protected URL fileToURL(File f) throws MalformedURLException {
  838. // String path = f.getAbsolutePath();
  839. // if (File.separatorChar != '/') {
  840. // path = path.replace(File.separatorChar, '/');
  841. // }
  842. // if (!path.startsWith("/")) {
  843. // path = "/" + path;
  844. // }
  845. // if (!path.endsWith("/") && f.isDirectory()) {
  846. // path = path + "/";
  847. // }
  848. // return new URL("file", "", path);
  849. // }
  850. /** Custom File.toUrl() implementation to handle special chars in file names
  851. *
  852. * @param file file object whose path will be converted
  853. * @return URL form of the file, with special characters handled
  854. * @throws MalformedURLException if there's a problem constructing a URL
  855. */
  856. private static URL fileToURL(File file) throws MalformedURLException {
  857. StringBuffer buffer = new StringBuffer();
  858. String path = file.getAbsolutePath();
  859. // Convert non-URL style file separators
  860. if (File.separatorChar != '/') {
  861. path = path.replace(File.separatorChar, '/');
  862. }
  863. // Make sure it starts at root
  864. if (!path.startsWith("/")) {
  865. buffer.append('/');
  866. }
  867. // Copy, converting URL special characters as we go
  868. int len = path.length();
  869. for (int i = 0; i < len; i++) {
  870. char c = path.charAt(i);
  871. if (c == ' ')
  872. buffer.append("%20");
  873. else if (c == '#')
  874. buffer.append("%23");
  875. else if (c == '%')
  876. buffer.append("%25");
  877. else if (c == '&')
  878. buffer.append("%26");
  879. else if (c == ';')
  880. buffer.append("%3B");
  881. else if (c == '<')
  882. buffer.append("%3C");
  883. else if (c == '=')
  884. buffer.append("%3D");
  885. else if (c == '>')
  886. buffer.append("%3E");
  887. else if (c == '?')
  888. buffer.append("%3F");
  889. else if (c == '~')
  890. buffer.append("%7E");
  891. else
  892. buffer.append(c);
  893. }
  894. // Make sure directories end with slash
  895. if (!path.endsWith("/") && file.isDirectory()) {
  896. buffer.append('/');
  897. }
  898. // Return URL
  899. return new URL("file", "", buffer.toString());
  900. }
  901. /**
  902. * Returns whether or not entities are being expanded into normal text
  903. * content.
  904. *
  905. * @return whether entities are being expanded
  906. */
  907. public boolean getExpandEntities() {
  908. return expand;
  909. }
  910. /**
  911. * <p>
  912. * This sets whether or not to expand entities for the builder.
  913. * A true means to expand entities as normal content. A false means to
  914. * leave entities unexpanded as <code>EntityRef</code> objects. The
  915. * default is true.
  916. * </p>
  917. * <p>
  918. * When this setting is false, the internal DTD subset is retained; when
  919. * this setting is true, the internal DTD subset is not retained.
  920. * </p>
  921. * <p>
  922. * Note that Xerces (at least up to 1.4.4) has a bug where entities
  923. * in attribute values will be misreported if this flag is turned off,
  924. * resulting in entities to appear within element content. When turning
  925. * entity expansion off either avoid entities in attribute values, or
  926. * use another parser like Crimson.
  927. * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
  928. * </p>
  929. *
  930. * @param expand <code>boolean</code> indicating whether entity expansion
  931. * should occur.
  932. */
  933. public void setExpandEntities(boolean expand) {
  934. this.expand = expand;
  935. }
  936. }