Desktop tool for browsing account info from EVE-Online
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Verifier.java 51KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236
  1. /*--
  2. $Id: Verifier.java,v 1.55 2007/11/10 05:28:59 jhunter Exp $
  3. Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
  4. All rights reserved.
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions
  7. are met:
  8. 1. Redistributions of source code must retain the above copyright
  9. notice, this list of conditions, and the following disclaimer.
  10. 2. Redistributions in binary form must reproduce the above copyright
  11. notice, this list of conditions, and the disclaimer that follows
  12. these conditions in the documentation and/or other materials
  13. provided with the distribution.
  14. 3. The name "JDOM" must not be used to endorse or promote products
  15. derived from this software without prior written permission. For
  16. written permission, please contact <request_AT_jdom_DOT_org>.
  17. 4. Products derived from this software may not be called "JDOM", nor
  18. may "JDOM" appear in their name, without prior written permission
  19. from the JDOM Project Management <request_AT_jdom_DOT_org>.
  20. In addition, we request (but do not require) that you include in the
  21. end-user documentation provided with the redistribution and/or in the
  22. software itself an acknowledgement equivalent to the following:
  23. "This product includes software developed by the
  24. JDOM Project (http://www.jdom.org/)."
  25. Alternatively, the acknowledgment may be graphical using the logos
  26. available at http://www.jdom.org/images/logos.
  27. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  28. WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  29. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
  31. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  34. USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  35. ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  36. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  37. OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  38. SUCH DAMAGE.
  39. This software consists of voluntary contributions made by many
  40. individuals on behalf of the JDOM Project and was originally
  41. created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
  42. Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
  43. on the JDOM Project, please see <http://www.jdom.org/>.
  44. */
  45. package org.jdom;
  46. import java.util.*;
  47. /**
  48. * A utility class to handle well-formedness checks on names, data, and other
  49. * verification tasks for JDOM. The class is final and may not be subclassed.
  50. *
  51. * @version $Revision: 1.55 $, $Date: 2007/11/10 05:28:59 $
  52. * @author Brett McLaughlin
  53. * @author Elliotte Rusty Harold
  54. * @author Jason Hunter
  55. * @author Bradley S. Huffman
  56. */
  57. final public class Verifier {
  58. private static final String CVS_ID =
  59. "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.55 $ $Date: 2007/11/10 05:28:59 $ $Name: jdom_1_1 $";
  60. /**
  61. * Ensure instantation cannot occur.
  62. */
  63. private Verifier() { }
  64. /**
  65. * This will check the supplied name to see if it is legal for use as
  66. * a JDOM <code>{@link Element}</code> name.
  67. *
  68. * @param name <code>String</code> name to check.
  69. * @return <code>String</code> reason name is illegal, or
  70. * <code>null</code> if name is OK.
  71. */
  72. public static String checkElementName(String name) {
  73. // Check basic XML name rules first
  74. String reason;
  75. if ((reason = checkXMLName(name)) != null) {
  76. return reason;
  77. }
  78. // No colons allowed, since elements handle this internally
  79. if (name.indexOf(":") != -1) {
  80. return "Element names cannot contain colons";
  81. }
  82. // If we got here, everything is OK
  83. return null;
  84. }
  85. /**
  86. * This will check the supplied name to see if it is legal for use as
  87. * a JDOM <code>{@link Attribute}</code> name.
  88. *
  89. * @param name <code>String</code> name to check.
  90. * @return <code>String</code> reason name is illegal, or
  91. * <code>null</code> if name is OK.
  92. */
  93. public static String checkAttributeName(String name) {
  94. // Check basic XML name rules first
  95. String reason;
  96. if ((reason = checkXMLName(name)) != null) {
  97. return reason;
  98. }
  99. // No colons are allowed, since attributes handle this internally
  100. if (name.indexOf(":") != -1) {
  101. return "Attribute names cannot contain colons";
  102. }
  103. // Attribute names may not be xmlns since we do this internally too
  104. if (name.equals("xmlns")) {
  105. return "An Attribute name may not be \"xmlns\"; " +
  106. "use the Namespace class to manage namespaces";
  107. }
  108. // If we got here, everything is OK
  109. return null;
  110. }
  111. /**
  112. * This will check the supplied string to see if it only contains
  113. * characters allowed by the XML 1.0 specification. The C0 controls
  114. * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded
  115. * except for carriage return, linefeed, and the horizontal tab.
  116. * Surrogates are also excluded.
  117. * <p>
  118. * This method is useful for checking element content and attribute
  119. * values. Note that characters
  120. * like " and &lt; are allowed in attribute values and element content.
  121. * They will simply be escaped as &quot; or &lt;
  122. * when the value is serialized.
  123. * </p>
  124. *
  125. * @param text <code>String</code> value to check.
  126. * @return <code>String</code> reason name is illegal, or
  127. * <code>null</code> if name is OK.
  128. */
  129. public static String checkCharacterData(String text) {
  130. if (text == null) {
  131. return "A null is not a legal XML value";
  132. }
  133. // Do check
  134. for (int i = 0, len = text.length(); i<len; i++) {
  135. int ch = text.charAt(i);
  136. // Check if high part of a surrogate pair
  137. if (ch >= 0xD800 && ch <= 0xDBFF) {
  138. // Check if next char is the low-surrogate
  139. i++;
  140. if (i < len) {
  141. char low = text.charAt(i);
  142. if (low < 0xDC00 || low > 0xDFFF) {
  143. return "Illegal Surrogate Pair";
  144. }
  145. // It's a good pair, calculate the true value of
  146. // the character to then fall thru to isXMLCharacter
  147. ch = 0x10000 + (ch - 0xD800) * 0x400 + (low - 0xDC00);
  148. }
  149. else {
  150. return "Surrogate Pair Truncated";
  151. }
  152. }
  153. if (!isXMLCharacter(ch)) {
  154. // Likely this character can't be easily displayed
  155. // because it's a control so we use it'd hexadecimal
  156. // representation in the reason.
  157. return ("0x" + Integer.toHexString(ch) +
  158. " is not a legal XML character");
  159. }
  160. }
  161. // If we got here, everything is OK
  162. return null;
  163. }
  164. /**
  165. * This will check the supplied data to see if it is legal for use as
  166. * JDOM <code>{@link CDATA}</code>.
  167. *
  168. * @param data <code>String</code> data to check.
  169. * @return <code>String</code> reason data is illegal, or
  170. * <code>null</code> is name is OK.
  171. */
  172. public static String checkCDATASection(String data) {
  173. String reason = null;
  174. if ((reason = checkCharacterData(data)) != null) {
  175. return reason;
  176. }
  177. if (data.indexOf("]]>") != -1) {
  178. return "CDATA cannot internally contain a CDATA ending " +
  179. "delimiter (]]>)";
  180. }
  181. // If we got here, everything is OK
  182. return null;
  183. }
  184. /**
  185. * This will check the supplied name to see if it is legal for use as
  186. * a JDOM <code>{@link Namespace}</code> prefix.
  187. *
  188. * @param prefix <code>String</code> prefix to check.
  189. * @return <code>String</code> reason name is illegal, or
  190. * <code>null</code> if name is OK.
  191. */
  192. public static String checkNamespacePrefix(String prefix) {
  193. // Manually do rules, since URIs can be null or empty
  194. if ((prefix == null) || (prefix.equals(""))) {
  195. return null;
  196. }
  197. // Cannot start with a number
  198. char first = prefix.charAt(0);
  199. if (isXMLDigit(first)) {
  200. return "Namespace prefixes cannot begin with a number";
  201. }
  202. // Cannot start with a $
  203. if (first == '$') {
  204. return "Namespace prefixes cannot begin with a dollar sign ($)";
  205. }
  206. // Cannot start with a -
  207. if (first == '-') {
  208. return "Namespace prefixes cannot begin with a hyphen (-)";
  209. }
  210. // Cannot start with a .
  211. if (first == '.') {
  212. return "Namespace prefixes cannot begin with a period (.)";
  213. }
  214. // Cannot start with "xml" in any character case
  215. if (prefix.toLowerCase().startsWith("xml")) {
  216. return "Namespace prefixes cannot begin with " +
  217. "\"xml\" in any combination of case";
  218. }
  219. // Ensure legal content
  220. for (int i=0, len = prefix.length(); i<len; i++) {
  221. char c = prefix.charAt(i);
  222. if (!isXMLNameCharacter(c)) {
  223. return "Namespace prefixes cannot contain the character \"" +
  224. c + "\"";
  225. }
  226. }
  227. // No colons allowed
  228. if (prefix.indexOf(":") != -1) {
  229. return "Namespace prefixes cannot contain colons";
  230. }
  231. // If we got here, everything is OK
  232. return null;
  233. }
  234. /**
  235. * This will check the supplied name to see if it is legal for use as
  236. * a JDOM <code>{@link Namespace}</code> URI.
  237. *
  238. * @param uri <code>String</code> URI to check.
  239. * @return <code>String</code> reason name is illegal, or
  240. * <code>null</code> if name is OK.
  241. */
  242. public static String checkNamespaceURI(String uri) {
  243. // Manually do rules, since URIs can be null or empty
  244. if ((uri == null) || (uri.equals(""))) {
  245. return null;
  246. }
  247. // Cannot start with a number
  248. char first = uri.charAt(0);
  249. if (Character.isDigit(first)) {
  250. return "Namespace URIs cannot begin with a number";
  251. }
  252. // Cannot start with a $
  253. if (first == '$') {
  254. return "Namespace URIs cannot begin with a dollar sign ($)";
  255. }
  256. // Cannot start with a -
  257. if (first == '-') {
  258. return "Namespace URIs cannot begin with a hyphen (-)";
  259. }
  260. // If we got here, everything is OK
  261. return null;
  262. }
  263. /**
  264. * Check if two namespaces collide.
  265. *
  266. * @param namespace <code>Namespace</code> to check.
  267. * @param other <code>Namespace</code> to check against.
  268. * @return <code>String</code> reason for collision, or
  269. * <code>null</code> if no collision.
  270. */
  271. public static String checkNamespaceCollision(Namespace namespace,
  272. Namespace other) {
  273. String p1,p2,u1,u2,reason;
  274. reason = null;
  275. p1 = namespace.getPrefix();
  276. u1 = namespace.getURI();
  277. p2 = other.getPrefix();
  278. u2 = other.getURI();
  279. if (p1.equals(p2) && !u1.equals(u2)) {
  280. reason = "The namespace prefix \"" + p1 + "\" collides";
  281. }
  282. return reason;
  283. }
  284. /**
  285. * Check if <code>{@link Attribute}</code>'s namespace collides with a
  286. * <code>{@link Element}</code>'s namespace.
  287. *
  288. * @param attribute <code>Attribute</code> to check.
  289. * @param element <code>Element</code> to check against.
  290. * @return <code>String</code> reason for collision, or
  291. * <code>null</code> if no collision.
  292. */
  293. public static String checkNamespaceCollision(Attribute attribute,
  294. Element element) {
  295. Namespace namespace = attribute.getNamespace();
  296. String prefix = namespace.getPrefix();
  297. if ("".equals(prefix)) {
  298. return null;
  299. }
  300. return checkNamespaceCollision(namespace, element);
  301. }
  302. /**
  303. * Check if a <code>{@link Namespace}</code> collides with a
  304. * <code>{@link Element}</code>'s namespace.
  305. *
  306. * @param namespace <code>Namespace</code> to check.
  307. * @param element <code>Element</code> to check against.
  308. * @return <code>String</code> reason for collision, or
  309. * <code>null</code> if no collision.
  310. */
  311. public static String checkNamespaceCollision(Namespace namespace,
  312. Element element) {
  313. String reason = checkNamespaceCollision(namespace,
  314. element.getNamespace());
  315. if (reason != null) {
  316. return reason + " with the element namespace prefix";
  317. }
  318. reason = checkNamespaceCollision(namespace,
  319. element.getAdditionalNamespaces());
  320. if (reason != null) {
  321. return reason;
  322. }
  323. reason = checkNamespaceCollision(namespace, element.getAttributes());
  324. if (reason != null) {
  325. return reason;
  326. }
  327. return null;
  328. }
  329. /**
  330. * Check if a <code>{@link Namespace}</code> collides with a
  331. * <code>{@link Attribute}</code>'s namespace.
  332. *
  333. * @param namespace <code>Namespace</code> to check.
  334. * @param attribute <code>Attribute</code> to check against.
  335. * @return <code>String</code> reason for collision, or
  336. * <code>null</code> if no collision.
  337. */
  338. public static String checkNamespaceCollision(Namespace namespace,
  339. Attribute attribute) {
  340. String reason = checkNamespaceCollision(namespace,
  341. attribute.getNamespace());
  342. if (reason != null) {
  343. reason += " with an attribute namespace prefix on the element";
  344. }
  345. return reason;
  346. }
  347. /**
  348. * Check if a <code>{@link Namespace}</code> collides with any namespace
  349. * from a list of objects.
  350. *
  351. * @param namespace <code>Namespace</code> to check.
  352. * @param list <code>List</code> to check against.
  353. * @return <code>String</code> reason for collision, or
  354. * <code>null</code> if no collision.
  355. */
  356. public static String checkNamespaceCollision(Namespace namespace,
  357. List list) {
  358. if (list == null) {
  359. return null;
  360. }
  361. String reason = null;
  362. Iterator i = list.iterator();
  363. while ((reason == null) && i.hasNext()) {
  364. Object obj = i.next();
  365. if (obj instanceof Attribute) {
  366. reason = checkNamespaceCollision(namespace, (Attribute) obj);
  367. }
  368. else if (obj instanceof Element) {
  369. reason = checkNamespaceCollision(namespace, (Element) obj);
  370. }
  371. else if (obj instanceof Namespace) {
  372. reason = checkNamespaceCollision(namespace, (Namespace) obj);
  373. if (reason != null) {
  374. reason += " with an additional namespace declared" +
  375. " by the element";
  376. }
  377. }
  378. }
  379. return reason;
  380. }
  381. /**
  382. * This will check the supplied data to see if it is legal for use as
  383. * a JDOM <code>{@link ProcessingInstruction}</code> target.
  384. *
  385. * @param target <code>String</code> target to check.
  386. * @return <code>String</code> reason target is illegal, or
  387. * <code>null</code> if target is OK.
  388. */
  389. public static String checkProcessingInstructionTarget(String target) {
  390. // Check basic XML name rules first
  391. String reason;
  392. if ((reason = checkXMLName(target)) != null) {
  393. return reason;
  394. }
  395. // No colons allowed, per Namespace Specification Section 6
  396. if (target.indexOf(":") != -1) {
  397. return "Processing instruction targets cannot contain colons";
  398. }
  399. // Cannot begin with 'xml' in any case
  400. if (target.equalsIgnoreCase("xml")) {
  401. return "Processing instructions cannot have a target of " +
  402. "\"xml\" in any combination of case. (Note that the " +
  403. "\"<?xml ... ?>\" declaration at the beginning of a " +
  404. "document is not a processing instruction and should not " +
  405. "be added as one; it is written automatically during " +
  406. "output, e.g. by XMLOutputter.)";
  407. }
  408. // If we got here, everything is OK
  409. return null;
  410. }
  411. /**
  412. * This will check the supplied data to see if it is legal for use as
  413. * <code>{@link ProcessingInstruction}</code> data. Besides checking that
  414. * all the characters are allowed in XML, this also checks
  415. * that the data does not contain the PI end-string "?&gt;".
  416. *
  417. * @param data <code>String</code> data to check.
  418. * @return <code>String</code> reason data is illegal, or
  419. * <code>null</code> if data is OK.
  420. */
  421. public static String checkProcessingInstructionData(String data) {
  422. // Check basic XML name rules first
  423. String reason = checkCharacterData(data);
  424. if (reason == null) {
  425. if (data.indexOf("?>") >= 0) {
  426. return "Processing instructions cannot contain " +
  427. "the string \"?>\"";
  428. }
  429. }
  430. return reason;
  431. }
  432. /**
  433. * This will check the supplied data to see if it is legal for use as
  434. * JDOM <code>{@link Comment}</code> data.
  435. *
  436. * @param data <code>String</code> data to check.
  437. * @return <code>String</code> reason data is illegal, or
  438. * <code>null</code> if data is OK.
  439. */
  440. public static String checkCommentData(String data) {
  441. String reason = null;
  442. if ((reason = checkCharacterData(data)) != null) {
  443. return reason;
  444. }
  445. if (data.indexOf("--") != -1) {
  446. return "Comments cannot contain double hyphens (--)";
  447. }
  448. if (data.endsWith("-")) {
  449. return "Comment data cannot end with a hyphen.";
  450. }
  451. // If we got here, everything is OK
  452. return null;
  453. }
  454. // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
  455. // [-'()+,./:=?;*#@$_%]
  456. public static boolean isXMLPublicIDCharacter(char c) {
  457. if (c >= 'a' && c <= 'z') return true;
  458. if (c >= '?' && c <= 'Z') return true;
  459. if (c >= '\'' && c <= ';') return true;
  460. if (c == ' ') return true;
  461. if (c == '!') return true;
  462. if (c == '=') return true;
  463. if (c == '#') return true;
  464. if (c == '$') return true;
  465. if (c == '_') return true;
  466. if (c == '%') return true;
  467. if (c == '\n') return true;
  468. if (c == '\r') return true;
  469. if (c == '\t') return true;
  470. return false;
  471. }
  472. /**
  473. * This will ensure that the data for a public identifier
  474. * is legal.
  475. *
  476. * @param publicID <code>String</code> public ID to check.
  477. * @return <code>String</code> reason public ID is illegal, or
  478. * <code>null</code> if public ID is OK.
  479. */
  480. public static String checkPublicID(String publicID) {
  481. String reason = null;
  482. if (publicID == null) return null;
  483. // This indicates there is no public ID
  484. for (int i = 0; i < publicID.length(); i++) {
  485. char c = publicID.charAt(i);
  486. if (!isXMLPublicIDCharacter(c)) {
  487. reason = c + " is not a legal character in public IDs";
  488. break;
  489. }
  490. }
  491. return reason;
  492. }
  493. /**
  494. * This will ensure that the data for a system literal
  495. * is legal.
  496. *
  497. * @param systemLiteral <code>String</code> system literal to check.
  498. * @return <code>String</code> reason system literal is illegal, or
  499. * <code>null</code> if system literal is OK.
  500. */
  501. public static String checkSystemLiteral(String systemLiteral) {
  502. String reason = null;
  503. if (systemLiteral == null) return null;
  504. // This indicates there is no system ID
  505. if (systemLiteral.indexOf('\'') != -1
  506. && systemLiteral.indexOf('"') != -1) {
  507. reason =
  508. "System literals cannot simultaneously contain both single and double quotes.";
  509. }
  510. else {
  511. reason = checkCharacterData(systemLiteral);
  512. }
  513. return reason;
  514. }
  515. /**
  516. * This is a utility function for sharing the base process of checking
  517. * any XML name.
  518. *
  519. * @param name <code>String</code> to check for XML name compliance.
  520. * @return <code>String</code> reason the name is illegal, or
  521. * <code>null</code> if OK.
  522. */
  523. public static String checkXMLName(String name) {
  524. // Cannot be empty or null
  525. if ((name == null) || (name.length() == 0)
  526. || (name.trim().equals(""))) {
  527. return "XML names cannot be null or empty";
  528. }
  529. // Cannot start with a number
  530. char first = name.charAt(0);
  531. if (!isXMLNameStartCharacter(first)) {
  532. return "XML names cannot begin with the character \"" +
  533. first + "\"";
  534. }
  535. // Ensure legal content for non-first chars
  536. for (int i=1, len = name.length(); i<len; i++) {
  537. char c = name.charAt(i);
  538. if (!isXMLNameCharacter(c)) {
  539. return "XML names cannot contain the character \"" + c + "\"";
  540. }
  541. }
  542. // We got here, so everything is OK
  543. return null;
  544. }
  545. /**
  546. * <p>
  547. * Checks a string to see if it is a legal RFC 2396 URI.
  548. * Both absolute and relative URIs are supported.
  549. * </p>
  550. *
  551. * @param uri <code>String</code> to check.
  552. * @return <code>String</code> reason the URI is illegal, or
  553. * <code>null</code> if OK.
  554. */
  555. public static String checkURI(String uri) {
  556. // URIs can be null or empty
  557. if ((uri == null) || (uri.equals(""))) {
  558. return null;
  559. }
  560. for (int i = 0; i < uri.length(); i++) {
  561. char test = uri.charAt(i);
  562. if (!isURICharacter(test)) {
  563. String msgNumber = "0x" + Integer.toHexString(test);
  564. if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test);
  565. return "URIs cannot contain " + msgNumber;
  566. } // end if
  567. if (test == '%') { // must be followed by two hexadecimal digits
  568. try {
  569. char firstDigit = uri.charAt(i+1);
  570. char secondDigit = uri.charAt(i+2);
  571. if (!isHexDigit(firstDigit) ||
  572. !isHexDigit(secondDigit)) {
  573. return "Percent signs in URIs must be followed by "
  574. + "exactly two hexadecimal digits.";
  575. }
  576. }
  577. catch (StringIndexOutOfBoundsException e) {
  578. return "Percent signs in URIs must be followed by "
  579. + "exactly two hexadecimal digits.";
  580. }
  581. }
  582. } // end for
  583. // If we got here, everything is OK
  584. return null;
  585. }
  586. /**
  587. * <p>
  588. * This is a utility function for determining whether a specified
  589. * Unicode character is a hexadecimal digit as defined in RFC 2396;
  590. * that is, one of the ASCII characters 0-9, a-f, or A-F.
  591. * </p>
  592. *
  593. * @param c to check for hex digit.
  594. * @return true if it's allowed, false otherwise.
  595. */
  596. public static boolean isHexDigit(char c) {
  597. // I suspect most characters passed to this method will be
  598. // correct hexadecimal digits, so I test for the true cases
  599. // first. If this proves to be a performance bottleneck
  600. // a switch statement or lookup table
  601. // might optimize this.
  602. if (c >= '0' && c <= '9') return true;
  603. if (c >= 'A' && c <= 'F') return true;
  604. if (c >= 'a' && c <= 'f') return true;
  605. return false;
  606. }
  607. /**
  608. * <p>
  609. * This is a utility function for determining whether
  610. * a specified Unicode character is legal in URI references
  611. * as determined by RFC 2396.
  612. * </p>
  613. *
  614. * @param c <code>char</code> to check for URI reference compliance.
  615. * @return true if it's allowed, false otherwise.
  616. */
  617. public static boolean isURICharacter(char c) {
  618. if (c >= 'a' && c <= 'z') return true;
  619. if (c >= 'A' && c <= 'Z') return true;
  620. if (c >= '0' && c <= '9') return true;
  621. if (c == '/') return true;
  622. if (c == '-') return true;
  623. if (c == '.') return true;
  624. if (c == '?') return true;
  625. if (c == ':') return true;
  626. if (c == '@') return true;
  627. if (c == '&') return true;
  628. if (c == '=') return true;
  629. if (c == '+') return true;
  630. if (c == '$') return true;
  631. if (c == ',') return true;
  632. if (c == '%') return true;
  633. if (c == '_') return true;
  634. if (c == '!') return true;
  635. if (c == '~') return true;
  636. if (c == '*') return true;
  637. if (c == '\'') return true;
  638. if (c == '(') return true;
  639. if (c == ')') return true;
  640. return false;
  641. }
  642. /**
  643. * This is a utility function for determining whether a specified
  644. * character is a character according to production 2 of the
  645. * XML 1.0 specification.
  646. *
  647. * @param c <code>char</code> to check for XML compliance
  648. * @return <code>boolean</code> true if it's a character,
  649. * false otherwise
  650. */
  651. public static boolean isXMLCharacter(int c) {
  652. if (c == '\n') return true;
  653. if (c == '\r') return true;
  654. if (c == '\t') return true;
  655. if (c < 0x20) return false; if (c <= 0xD7FF) return true;
  656. if (c < 0xE000) return false; if (c <= 0xFFFD) return true;
  657. if (c < 0x10000) return false; if (c <= 0x10FFFF) return true;
  658. return false;
  659. }
  660. /**
  661. * This is a utility function for determining whether a specified
  662. * character is a name character according to production 4 of the
  663. * XML 1.0 specification.
  664. *
  665. * @param c <code>char</code> to check for XML name compliance.
  666. * @return <code>boolean</code> true if it's a name character,
  667. * false otherwise.
  668. */
  669. public static boolean isXMLNameCharacter(char c) {
  670. return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-'
  671. || c == '_' || c == ':' || isXMLCombiningChar(c)
  672. || isXMLExtender(c));
  673. }
  674. /**
  675. * This is a utility function for determining whether a specified
  676. * character is a legal name start character according to production 5
  677. * of the XML 1.0 specification. This production does allow names
  678. * to begin with colons which the Namespaces in XML Recommendation
  679. * disallows.
  680. *
  681. * @param c <code>char</code> to check for XML name start compliance.
  682. * @return <code>boolean</code> true if it's a name start character,
  683. * false otherwise.
  684. */
  685. public static boolean isXMLNameStartCharacter(char c) {
  686. return (isXMLLetter(c) || c == '_' || c ==':');
  687. }
  688. /**
  689. * This is a utility function for determining whether a specified
  690. * character is a letter or digit according to productions 84 and 88
  691. * of the XML 1.0 specification.
  692. *
  693. * @param c <code>char</code> to check.
  694. * @return <code>boolean</code> true if it's letter or digit,
  695. * false otherwise.
  696. */
  697. public static boolean isXMLLetterOrDigit(char c) {
  698. return (isXMLLetter(c) || isXMLDigit(c));
  699. }
  700. /**
  701. * This is a utility function for determining whether a specified character
  702. * is a letter according to production 84 of the XML 1.0 specification.
  703. *
  704. * @param c <code>char</code> to check for XML name compliance.
  705. * @return <code>String</code> true if it's a letter, false otherwise.
  706. */
  707. public static boolean isXMLLetter(char c) {
  708. // Note that order is very important here. The search proceeds
  709. // from lowest to highest values, so that no searching occurs
  710. // above the character's value. BTW, the first line is equivalent to:
  711. // if (c >= 0x0041 && c <= 0x005A) return true;
  712. if (c < 0x0041) return false; if (c <= 0x005a) return true;
  713. if (c < 0x0061) return false; if (c <= 0x007A) return true;
  714. if (c < 0x00C0) return false; if (c <= 0x00D6) return true;
  715. if (c < 0x00D8) return false; if (c <= 0x00F6) return true;
  716. if (c < 0x00F8) return false; if (c <= 0x00FF) return true;
  717. if (c < 0x0100) return false; if (c <= 0x0131) return true;
  718. if (c < 0x0134) return false; if (c <= 0x013E) return true;
  719. if (c < 0x0141) return false; if (c <= 0x0148) return true;
  720. if (c < 0x014A) return false; if (c <= 0x017E) return true;
  721. if (c < 0x0180) return false; if (c <= 0x01C3) return true;
  722. if (c < 0x01CD) return false; if (c <= 0x01F0) return true;
  723. if (c < 0x01F4) return false; if (c <= 0x01F5) return true;
  724. if (c < 0x01FA) return false; if (c <= 0x0217) return true;
  725. if (c < 0x0250) return false; if (c <= 0x02A8) return true;
  726. if (c < 0x02BB) return false; if (c <= 0x02C1) return true;
  727. if (c == 0x0386) return true;
  728. if (c < 0x0388) return false; if (c <= 0x038A) return true;
  729. if (c == 0x038C) return true;
  730. if (c < 0x038E) return false; if (c <= 0x03A1) return true;
  731. if (c < 0x03A3) return false; if (c <= 0x03CE) return true;
  732. if (c < 0x03D0) return false; if (c <= 0x03D6) return true;
  733. if (c == 0x03DA) return true;
  734. if (c == 0x03DC) return true;
  735. if (c == 0x03DE) return true;
  736. if (c == 0x03E0) return true;
  737. if (c < 0x03E2) return false; if (c <= 0x03F3) return true;
  738. if (c < 0x0401) return false; if (c <= 0x040C) return true;
  739. if (c < 0x040E) return false; if (c <= 0x044F) return true;
  740. if (c < 0x0451) return false; if (c <= 0x045C) return true;
  741. if (c < 0x045E) return false; if (c <= 0x0481) return true;
  742. if (c < 0x0490) return false; if (c <= 0x04C4) return true;
  743. if (c < 0x04C7) return false; if (c <= 0x04C8) return true;
  744. if (c < 0x04CB) return false; if (c <= 0x04CC) return true;
  745. if (c < 0x04D0) return false; if (c <= 0x04EB) return true;
  746. if (c < 0x04EE) return false; if (c <= 0x04F5) return true;
  747. if (c < 0x04F8) return false; if (c <= 0x04F9) return true;
  748. if (c < 0x0531) return false; if (c <= 0x0556) return true;
  749. if (c == 0x0559) return true;
  750. if (c < 0x0561) return false; if (c <= 0x0586) return true;
  751. if (c < 0x05D0) return false; if (c <= 0x05EA) return true;
  752. if (c < 0x05F0) return false; if (c <= 0x05F2) return true;
  753. if (c < 0x0621) return false; if (c <= 0x063A) return true;
  754. if (c < 0x0641) return false; if (c <= 0x064A) return true;
  755. if (c < 0x0671) return false; if (c <= 0x06B7) return true;
  756. if (c < 0x06BA) return false; if (c <= 0x06BE) return true;
  757. if (c < 0x06C0) return false; if (c <= 0x06CE) return true;
  758. if (c < 0x06D0) return false; if (c <= 0x06D3) return true;
  759. if (c == 0x06D5) return true;
  760. if (c < 0x06E5) return false; if (c <= 0x06E6) return true;
  761. if (c < 0x0905) return false; if (c <= 0x0939) return true;
  762. if (c == 0x093D) return true;
  763. if (c < 0x0958) return false; if (c <= 0x0961) return true;
  764. if (c < 0x0985) return false; if (c <= 0x098C) return true;
  765. if (c < 0x098F) return false; if (c <= 0x0990) return true;
  766. if (c < 0x0993) return false; if (c <= 0x09A8) return true;
  767. if (c < 0x09AA) return false; if (c <= 0x09B0) return true;
  768. if (c == 0x09B2) return true;
  769. if (c < 0x09B6) return false; if (c <= 0x09B9) return true;
  770. if (c < 0x09DC) return false; if (c <= 0x09DD) return true;
  771. if (c < 0x09DF) return false; if (c <= 0x09E1) return true;
  772. if (c < 0x09F0) return false; if (c <= 0x09F1) return true;
  773. if (c < 0x0A05) return false; if (c <= 0x0A0A) return true;
  774. if (c < 0x0A0F) return false; if (c <= 0x0A10) return true;
  775. if (c < 0x0A13) return false; if (c <= 0x0A28) return true;
  776. if (c < 0x0A2A) return false; if (c <= 0x0A30) return true;
  777. if (c < 0x0A32) return false; if (c <= 0x0A33) return true;
  778. if (c < 0x0A35) return false; if (c <= 0x0A36) return true;
  779. if (c < 0x0A38) return false; if (c <= 0x0A39) return true;
  780. if (c < 0x0A59) return false; if (c <= 0x0A5C) return true;
  781. if (c == 0x0A5E) return true;
  782. if (c < 0x0A72) return false; if (c <= 0x0A74) return true;
  783. if (c < 0x0A85) return false; if (c <= 0x0A8B) return true;
  784. if (c == 0x0A8D) return true;
  785. if (c < 0x0A8F) return false; if (c <= 0x0A91) return true;
  786. if (c < 0x0A93) return false; if (c <= 0x0AA8) return true;
  787. if (c < 0x0AAA) return false; if (c <= 0x0AB0) return true;
  788. if (c < 0x0AB2) return false; if (c <= 0x0AB3) return true;
  789. if (c < 0x0AB5) return false; if (c <= 0x0AB9) return true;
  790. if (c == 0x0ABD) return true;
  791. if (c == 0x0AE0) return true;
  792. if (c < 0x0B05) return false; if (c <= 0x0B0C) return true;
  793. if (c < 0x0B0F) return false; if (c <= 0x0B10) return true;
  794. if (c < 0x0B13) return false; if (c <= 0x0B28) return true;
  795. if (c < 0x0B2A) return false; if (c <= 0x0B30) return true;
  796. if (c < 0x0B32) return false; if (c <= 0x0B33) return true;
  797. if (c < 0x0B36) return false; if (c <= 0x0B39) return true;
  798. if (c == 0x0B3D) return true;
  799. if (c < 0x0B5C) return false; if (c <= 0x0B5D) return true;
  800. if (c < 0x0B5F) return false; if (c <= 0x0B61) return true;
  801. if (c < 0x0B85) return false; if (c <= 0x0B8A) return true;
  802. if (c < 0x0B8E) return false; if (c <= 0x0B90) return true;
  803. if (c < 0x0B92) return false; if (c <= 0x0B95) return true;
  804. if (c < 0x0B99) return false; if (c <= 0x0B9A) return true;
  805. if (c == 0x0B9C) return true;
  806. if (c < 0x0B9E) return false; if (c <= 0x0B9F) return true;
  807. if (c < 0x0BA3) return false; if (c <= 0x0BA4) return true;
  808. if (c < 0x0BA8) return false; if (c <= 0x0BAA) return true;
  809. if (c < 0x0BAE) return false; if (c <= 0x0BB5) return true;
  810. if (c < 0x0BB7) return false; if (c <= 0x0BB9) return true;
  811. if (c < 0x0C05) return false; if (c <= 0x0C0C) return true;
  812. if (c < 0x0C0E) return false; if (c <= 0x0C10) return true;
  813. if (c < 0x0C12) return false; if (c <= 0x0C28) return true;
  814. if (c < 0x0C2A) return false; if (c <= 0x0C33) return true;
  815. if (c < 0x0C35) return false; if (c <= 0x0C39) return true;
  816. if (c < 0x0C60) return false; if (c <= 0x0C61) return true;
  817. if (c < 0x0C85) return false; if (c <= 0x0C8C) return true;
  818. if (c < 0x0C8E) return false; if (c <= 0x0C90) return true;
  819. if (c < 0x0C92) return false; if (c <= 0x0CA8) return true;
  820. if (c < 0x0CAA) return false; if (c <= 0x0CB3) return true;
  821. if (c < 0x0CB5) return false; if (c <= 0x0CB9) return true;
  822. if (c == 0x0CDE) return true;
  823. if (c < 0x0CE0) return false; if (c <= 0x0CE1) return true;
  824. if (c < 0x0D05) return false; if (c <= 0x0D0C) return true;
  825. if (c < 0x0D0E) return false; if (c <= 0x0D10) return true;
  826. if (c < 0x0D12) return false; if (c <= 0x0D28) return true;
  827. if (c < 0x0D2A) return false; if (c <= 0x0D39) return true;
  828. if (c < 0x0D60) return false; if (c <= 0x0D61) return true;
  829. if (c < 0x0E01) return false; if (c <= 0x0E2E) return true;
  830. if (c == 0x0E30) return true;
  831. if (c < 0x0E32) return false; if (c <= 0x0E33) return true;
  832. if (c < 0x0E40) return false; if (c <= 0x0E45) return true;
  833. if (c < 0x0E81) return false; if (c <= 0x0E82) return true;
  834. if (c == 0x0E84) return true;
  835. if (c < 0x0E87) return false; if (c <= 0x0E88) return true;
  836. if (c == 0x0E8A) return true;
  837. if (c == 0x0E8D) return true;
  838. if (c < 0x0E94) return false; if (c <= 0x0E97) return true;
  839. if (c < 0x0E99) return false; if (c <= 0x0E9F) return true;
  840. if (c < 0x0EA1) return false; if (c <= 0x0EA3) return true;
  841. if (c == 0x0EA5) return true;
  842. if (c == 0x0EA7) return true;
  843. if (c < 0x0EAA) return false; if (c <= 0x0EAB) return true;
  844. if (c < 0x0EAD) return false; if (c <= 0x0EAE) return true;
  845. if (c == 0x0EB0) return true;
  846. if (c < 0x0EB2) return false; if (c <= 0x0EB3) return true;
  847. if (c == 0x0EBD) return true;
  848. if (c < 0x0EC0) return false; if (c <= 0x0EC4) return true;
  849. if (c < 0x0F40) return false; if (c <= 0x0F47) return true;
  850. if (c < 0x0F49) return false; if (c <= 0x0F69) return true;
  851. if (c < 0x10A0) return false; if (c <= 0x10C5) return true;
  852. if (c < 0x10D0) return false; if (c <= 0x10F6) return true;
  853. if (c == 0x1100) return true;
  854. if (c < 0x1102) return false; if (c <= 0x1103) return true;
  855. if (c < 0x1105) return false; if (c <= 0x1107) return true;
  856. if (c == 0x1109) return true;
  857. if (c < 0x110B) return false; if (c <= 0x110C) return true;
  858. if (c < 0x110E) return false; if (c <= 0x1112) return true;
  859. if (c == 0x113C) return true;
  860. if (c == 0x113E) return true;
  861. if (c == 0x1140) return true;
  862. if (c == 0x114C) return true;
  863. if (c == 0x114E) return true;
  864. if (c == 0x1150) return true;
  865. if (c < 0x1154) return false; if (c <= 0x1155) return true;
  866. if (c == 0x1159) return true;
  867. if (c < 0x115F) return false; if (c <= 0x1161) return true;
  868. if (c == 0x1163) return true;
  869. if (c == 0x1165) return true;
  870. if (c == 0x1167) return true;
  871. if (c == 0x1169) return true;
  872. if (c < 0x116D) return false; if (c <= 0x116E) return true;
  873. if (c < 0x1172) return false; if (c <= 0x1173) return true;
  874. if (c == 0x1175) return true;
  875. if (c == 0x119E) return true;
  876. if (c == 0x11A8) return true;
  877. if (c == 0x11AB) return true;
  878. if (c < 0x11AE) return false; if (c <= 0x11AF) return true;
  879. if (c < 0x11B7) return false; if (c <= 0x11B8) return true;
  880. if (c == 0x11BA) return true;
  881. if (c < 0x11BC) return false; if (c <= 0x11C2) return true;
  882. if (c == 0x11EB) return true;
  883. if (c == 0x11F0) return true;
  884. if (c == 0x11F9) return true;
  885. if (c < 0x1E00) return false; if (c <= 0x1E9B) return true;
  886. if (c < 0x1EA0) return false; if (c <= 0x1EF9) return true;
  887. if (c < 0x1F00) return false; if (c <= 0x1F15) return true;
  888. if (c < 0x1F18) return false; if (c <= 0x1F1D) return true;
  889. if (c < 0x1F20) return false; if (c <= 0x1F45) return true;
  890. if (c < 0x1F48) return false; if (c <= 0x1F4D) return true;
  891. if (c < 0x1F50) return false; if (c <= 0x1F57) return true;
  892. if (c == 0x1F59) return true;
  893. if (c == 0x1F5B) return true;
  894. if (c == 0x1F5D) return true;
  895. if (c < 0x1F5F) return false; if (c <= 0x1F7D) return true;
  896. if (c < 0x1F80) return false; if (c <= 0x1FB4) return true;
  897. if (c < 0x1FB6) return false; if (c <= 0x1FBC) return true;
  898. if (c == 0x1FBE) return true;
  899. if (c < 0x1FC2) return false; if (c <= 0x1FC4) return true;
  900. if (c < 0x1FC6) return false; if (c <= 0x1FCC) return true;
  901. if (c < 0x1FD0) return false; if (c <= 0x1FD3) return true;
  902. if (c < 0x1FD6) return false; if (c <= 0x1FDB) return true;
  903. if (c < 0x1FE0) return false; if (c <= 0x1FEC) return true;
  904. if (c < 0x1FF2) return false; if (c <= 0x1FF4) return true;
  905. if (c < 0x1FF6) return false; if (c <= 0x1FFC) return true;
  906. if (c == 0x2126) return true;
  907. if (c < 0x212A) return false; if (c <= 0x212B) return true;
  908. if (c == 0x212E) return true;
  909. if (c < 0x2180) return false; if (c <= 0x2182) return true;
  910. if (c == 0x3007) return true; // ideographic
  911. if (c < 0x3021) return false; if (c <= 0x3029) return true; // ideo
  912. if (c < 0x3041) return false; if (c <= 0x3094) return true;
  913. if (c < 0x30A1) return false; if (c <= 0x30FA) return true;
  914. if (c < 0x3105) return false; if (c <= 0x312C) return true;
  915. if (c < 0x4E00) return false; if (c <= 0x9FA5) return true; // ideo
  916. if (c < 0xAC00) return false; if (c <= 0xD7A3) return true;
  917. return false;
  918. }
  919. /**
  920. * This is a utility function for determining whether a specified character
  921. * is a combining character according to production 87
  922. * of the XML 1.0 specification.
  923. *
  924. * @param c <code>char</code> to check.
  925. * @return <code>boolean</code> true if it's a combining character,
  926. * false otherwise.
  927. */
  928. public static boolean isXMLCombiningChar(char c) {
  929. // CombiningChar
  930. if (c < 0x0300) return false; if (c <= 0x0345) return true;
  931. if (c < 0x0360) return false; if (c <= 0x0361) return true;
  932. if (c < 0x0483) return false; if (c <= 0x0486) return true;
  933. if (c < 0x0591) return false; if (c <= 0x05A1) return true;
  934. if (c < 0x05A3) return false; if (c <= 0x05B9) return true;
  935. if (c < 0x05BB) return false; if (c <= 0x05BD) return true;
  936. if (c == 0x05BF) return true;
  937. if (c < 0x05C1) return false; if (c <= 0x05C2) return true;
  938. if (c == 0x05C4) return true;
  939. if (c < 0x064B) return false; if (c <= 0x0652) return true;
  940. if (c == 0x0670) return true;
  941. if (c < 0x06D6) return false; if (c <= 0x06DC) return true;
  942. if (c < 0x06DD) return false; if (c <= 0x06DF) return true;
  943. if (c < 0x06E0) return false; if (c <= 0x06E4) return true;
  944. if (c < 0x06E7) return false; if (c <= 0x06E8) return true;
  945. if (c < 0x06EA) return false; if (c <= 0x06ED) return true;
  946. if (c < 0x0901) return false; if (c <= 0x0903) return true;
  947. if (c == 0x093C) return true;
  948. if (c < 0x093E) return false; if (c <= 0x094C) return true;
  949. if (c == 0x094D) return true;
  950. if (c < 0x0951) return false; if (c <= 0x0954) return true;
  951. if (c < 0x0962) return false; if (c <= 0x0963) return true;
  952. if (c < 0x0981) return false; if (c <= 0x0983) return true;
  953. if (c == 0x09BC) return true;
  954. if (c == 0x09BE) return true;
  955. if (c == 0x09BF) return true;
  956. if (c < 0x09C0) return false; if (c <= 0x09C4) return true;
  957. if (c < 0x09C7) return false; if (c <= 0x09C8) return true;
  958. if (c < 0x09CB) return false; if (c <= 0x09CD) return true;
  959. if (c == 0x09D7) return true;
  960. if (c < 0x09E2) return false; if (c <= 0x09E3) return true;
  961. if (c == 0x0A02) return true;
  962. if (c == 0x0A3C) return true;
  963. if (c == 0x0A3E) return true;
  964. if (c == 0x0A3F) return true;
  965. if (c < 0x0A40) return false; if (c <= 0x0A42) return true;
  966. if (c < 0x0A47) return false; if (c <= 0x0A48) return true;
  967. if (c < 0x0A4B) return false; if (c <= 0x0A4D) return true;
  968. if (c < 0x0A70) return false; if (c <= 0x0A71) return true;
  969. if (c < 0x0A81) return false; if (c <= 0x0A83) return true;
  970. if (c == 0x0ABC) return true;
  971. if (c < 0x0ABE) return false; if (c <= 0x0AC5) return true;
  972. if (c < 0x0AC7) return false; if (c <= 0x0AC9) return true;
  973. if (c < 0x0ACB) return false; if (c <= 0x0ACD) return true;
  974. if (c < 0x0B01) return false; if (c <= 0x0B03) return true;
  975. if (c == 0x0B3C) return true;
  976. if (c < 0x0B3E) return false; if (c <= 0x0B43) return true;
  977. if (c < 0x0B47) return false; if (c <= 0x0B48) return true;
  978. if (c < 0x0B4B) return false; if (c <= 0x0B4D) return true;
  979. if (c < 0x0B56) return false; if (c <= 0x0B57) return true;
  980. if (c < 0x0B82) return false; if (c <= 0x0B83) return true;
  981. if (c < 0x0BBE) return false; if (c <= 0x0BC2) return true;
  982. if (c < 0x0BC6) return false; if (c <= 0x0BC8) return true;
  983. if (c < 0x0BCA) return false; if (c <= 0x0BCD) return true;
  984. if (c == 0x0BD7) return true;
  985. if (c < 0x0C01) return false; if (c <= 0x0C03) return true;
  986. if (c < 0x0C3E) return false; if (c <= 0x0C44) return true;
  987. if (c < 0x0C46) return false; if (c <= 0x0C48) return true;
  988. if (c < 0x0C4A) return false; if (c <= 0x0C4D) return true;
  989. if (c < 0x0C55) return false; if (c <= 0x0C56) return true;
  990. if (c < 0x0C82) return false; if (c <= 0x0C83) return true;
  991. if (c < 0x0CBE) return false; if (c <= 0x0CC4) return true;
  992. if (c < 0x0CC6) return false; if (c <= 0x0CC8) return true;
  993. if (c < 0x0CCA) return false; if (c <= 0x0CCD) return true;
  994. if (c < 0x0CD5) return false; if (c <= 0x0CD6) return true;
  995. if (c < 0x0D02) return false; if (c <= 0x0D03) return true;
  996. if (c < 0x0D3E) return false; if (c <= 0x0D43) return true;
  997. if (c < 0x0D46) return false; if (c <= 0x0D48) return true;
  998. if (c < 0x0D4A) return false; if (c <= 0x0D4D) return true;
  999. if (c == 0x0D57) return true;
  1000. if (c == 0x0E31) return true;
  1001. if (c < 0x0E34) return false; if (c <= 0x0E3A) return true;
  1002. if (c < 0x0E47) return false; if (c <= 0x0E4E) return true;
  1003. if (c == 0x0EB1) return true;
  1004. if (c < 0x0EB4) return false; if (c <= 0x0EB9) return true;
  1005. if (c < 0x0EBB) return false; if (c <= 0x0EBC) return true;
  1006. if (c < 0x0EC8) return false; if (c <= 0x0ECD) return true;
  1007. if (c < 0x0F18) return false; if (c <= 0x0F19) return true;
  1008. if (c == 0x0F35) return true;
  1009. if (c == 0x0F37) return true;
  1010. if (c == 0x0F39) return true;
  1011. if (c == 0x0F3E) return true;
  1012. if (c == 0x0F3F) return true;
  1013. if (c < 0x0F71) return false; if (c <= 0x0F84) return true;
  1014. if (c < 0x0F86) return false; if (c <= 0x0F8B) return true;
  1015. if (c < 0x0F90) return false; if (c <= 0x0F95) return true;
  1016. if (c == 0x0F97) return true;
  1017. if (c < 0x0F99) return false; if (c <= 0x0FAD) return true;
  1018. if (c < 0x0FB1) return false; if (c <= 0x0FB7) return true;
  1019. if (c == 0x0FB9) return true;
  1020. if (c < 0x20D0) return false; if (c <= 0x20DC) return true;
  1021. if (c == 0x20E1) return true;
  1022. if (c < 0x302A) return false; if (c <= 0x302F) return true;
  1023. if (c == 0x3099) return true;
  1024. if (c == 0x309A) return true;
  1025. return false;
  1026. }
  1027. /**
  1028. * This is a utility function for determining whether a specified
  1029. * character is an extender according to production 88 of the XML 1.0
  1030. * specification.
  1031. *
  1032. * @param c <code>char</code> to check.
  1033. * @return <code>String</code> true if it's an extender, false otherwise.
  1034. */
  1035. public static boolean isXMLExtender(char c) {
  1036. if (c < 0x00B6) return false; // quick short circuit
  1037. // Extenders
  1038. if (c == 0x00B7) return true;
  1039. if (c == 0x02D0) return true;
  1040. if (c == 0x02D1) return true;
  1041. if (c == 0x0387) return true;
  1042. if (c == 0x0640) return true;
  1043. if (c == 0x0E46) return true;
  1044. if (c == 0x0EC6) return true;
  1045. if (c == 0x3005) return true;
  1046. if (c < 0x3031) return false; if (c <= 0x3035) return true;
  1047. if (c < 0x309D) return false; if (c <= 0x309E) return true;
  1048. if (c < 0x30FC) return false; if (c <= 0x30FE) return true;
  1049. return false;
  1050. }
  1051. /**
  1052. * This is a utility function for determining whether a specified
  1053. * Unicode character
  1054. * is a digit according to production 88 of the XML 1.0 specification.
  1055. *
  1056. * @param c <code>char</code> to check for XML digit compliance
  1057. * @return <code>boolean</code> true if it's a digit, false otherwise
  1058. */
  1059. public static boolean isXMLDigit(char c) {
  1060. if (c < 0x0030) return false; if (c <= 0x0039) return true;
  1061. if (c < 0x0660) return false; if (c <= 0x0669) return true;
  1062. if (c < 0x06F0) return false; if (c <= 0x06F9) return true;
  1063. if (c < 0x0966) return false; if (c <= 0x096F) return true;
  1064. if (c < 0x09E6) return false; if (c <= 0x09EF) return true;
  1065. if (c < 0x0A66) return false; if (c <= 0x0A6F) return true;
  1066. if (c < 0x0AE6) return false; if (c <= 0x0AEF) return true;
  1067. if (c < 0x0B66) return false; if (c <= 0x0B6F) return true;
  1068. if (c < 0x0BE7) return false; if (c <= 0x0BEF) return true;
  1069. if (c < 0x0C66) return false; if (c <= 0x0C6F) return true;
  1070. if (c < 0x0CE6) return false; if (c <= 0x0CEF) return true;
  1071. if (c < 0x0D66) return false; if (c <= 0x0D6F) return true;
  1072. if (c < 0x0E50) return false; if (c <= 0x0E59) return true;
  1073. if (c < 0x0ED0) return false; if (c <= 0x0ED9) return true;
  1074. if (c < 0x0F20) return false; if (c <= 0x0F29) return true;
  1075. return false;
  1076. }
  1077. /**
  1078. * This is a utility function for determining whether a specified
  1079. * Unicode character is a whitespace character according to production 3
  1080. * of the XML 1.0 specification.
  1081. *
  1082. * @param c <code>char</code> to check for XML whitespace compliance
  1083. * @return <code>boolean</code> true if it's a whitespace, false otherwise
  1084. */
  1085. public static boolean isXMLWhitespace(char c) {
  1086. if (c==' ' || c=='\n' || c=='\t' || c=='\r' ){
  1087. return true;
  1088. }
  1089. return false;
  1090. }
  1091. }