You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

IRCReader.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. /*
  2. * Copyright (c) 2006-2017 DMDirc Developers
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. * SOFTWARE.
  21. */
  22. package com.dmdirc.parser.irc;
  23. import com.dmdirc.parser.interfaces.Encoder;
  24. import java.io.Closeable;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.nio.ByteBuffer;
  28. import java.nio.charset.CharacterCodingException;
  29. import java.nio.charset.Charset;
  30. import java.nio.charset.CharsetDecoder;
  31. import java.nio.charset.CodingErrorAction;
  32. import java.util.HashMap;
  33. import java.util.Map;
  34. /**
  35. * A {@link java.io.BufferedReader}-style reader that is aware of the IRC
  36. * protocol and can transcode text appropriately.
  37. *
  38. * @since 0.6.5
  39. */
  40. public class IRCReader implements Closeable {
  41. /** Maximum length for an IRC line in bytes. */
  42. private static final int LINE_LENGTH = 1024;
  43. /** The input stream to read input from. */
  44. private final InputStream stream;
  45. /** The encoder to use to encode lines. */
  46. private final Encoder encoder;
  47. /** Decoder to use for parts not handled by the encoder. */
  48. private final CharsetDecoder decoder;
  49. /**
  50. * Creates a new IRCReader which will read from the specified stream.
  51. * Protocol-level elements (e.g. channel and user names) will be encoded
  52. * using the system default charset.
  53. *
  54. * @param inputStream The stream to read input from
  55. * @param encoder The encoder to use to encode lines
  56. */
  57. public IRCReader(final InputStream inputStream, final Encoder encoder) {
  58. this(inputStream, encoder, Charset.defaultCharset());
  59. }
  60. /**
  61. * Creates a new IRCReader which will read from the specified stream.
  62. *
  63. * @param inputStream The stream to read input from
  64. * @param encoder The encoder to use to encode lines
  65. * @param charset The charset to use for protocol-level elements
  66. */
  67. public IRCReader(final InputStream inputStream, final Encoder encoder,
  68. final Charset charset) {
  69. this.stream = inputStream;
  70. this.encoder = encoder;
  71. this.decoder = charset.newDecoder();
  72. this.decoder.onMalformedInput(CodingErrorAction.REPLACE);
  73. this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  74. }
  75. /**
  76. * Reads a line from the underlying input stream, tokenises it, and
  77. * requests that this reader's encoder encodes the message part of the
  78. * line, if any.
  79. *
  80. * @return A wrapped line tokenised per RFC1459, or null if the stream ends
  81. * @throws IOException If an IOException is encountered reading the
  82. * underlying stream
  83. */
  84. public ReadLine readLine() throws IOException {
  85. final byte[] line = new byte[LINE_LENGTH];
  86. int offset = 0;
  87. int paramOffset = -1;
  88. int chr = 0, lastChr = 0;
  89. boolean hasTags = false;
  90. boolean endOfTags = false;
  91. boolean hasV3Tags = false;
  92. boolean foundFirstSpace = false;
  93. while (offset < LINE_LENGTH && (chr = stream.read()) > -1) {
  94. if (chr == '\r') {
  95. continue;
  96. } else if (chr == '\n') {
  97. // End of the line
  98. break;
  99. }
  100. if (hasTags && !endOfTags) {
  101. // Tags end either at the first @ for non-v3 tags or space for v3
  102. if (offset > 0 && ((chr == '@' && !hasV3Tags) || chr == ' ')) {
  103. endOfTags = true;
  104. hasV3Tags = (chr == ' ');
  105. }
  106. // If we are still possibly looking at tags, and we find a non-numeric
  107. // character, then we probably have v3Tags
  108. if (!endOfTags && (chr < '0' || chr > '9')) {
  109. hasV3Tags = true;
  110. }
  111. } else if (offset == 0 && chr == '@') {
  112. hasTags = true;
  113. } else if (offset == 0) {
  114. endOfTags = true;
  115. }
  116. line[offset++] = (byte) chr;
  117. if (lastChr == ' ' && chr == ':' && paramOffset == -1) {
  118. // We've found the last param
  119. if (!hasV3Tags || foundFirstSpace) {
  120. paramOffset = offset;
  121. } else if (hasV3Tags) {
  122. foundFirstSpace = true;
  123. }
  124. }
  125. lastChr = chr;
  126. }
  127. if (chr == -1) {
  128. // Hit the end of the stream
  129. return null;
  130. }
  131. return processLine(line, offset, paramOffset);
  132. }
  133. /**
  134. * Processes the specified line into a wrapped {@link ReadLine} instance.
  135. *
  136. * @param line The line as read from the wire
  137. * @param length The length of the line in bytes
  138. * @param paramOffset The offset of the first byte of the trailing parameter
  139. * (i.e., the first byte following the ASCII sequence ' :'), or -1 if no
  140. * such parameter exists.
  141. * @return A corresponding {@link ReadLine} instance
  142. */
  143. private ReadLine processLine(final byte[] line, final int length, final int paramOffset) {
  144. try {
  145. final String firstPart = this.decoder.decode(ByteBuffer.wrap(line,
  146. 0, paramOffset == -1 ? length : paramOffset - 2)).toString();
  147. final String[] firstTokens = firstPart.split("[ ]+");
  148. final String[] tokens;
  149. if (paramOffset > -1) {
  150. final String source = getSource(firstTokens);
  151. final String destination = getDestination(firstTokens);
  152. final String lastPart = encoder.encode(source, destination,
  153. line, paramOffset, length - paramOffset);
  154. tokens = new String[firstTokens.length + 1];
  155. System.arraycopy(firstTokens, 0, tokens, 0, firstTokens.length);
  156. tokens[firstTokens.length] = lastPart;
  157. } else {
  158. tokens = firstTokens;
  159. }
  160. return new ReadLine(new String(line, 0, length), tokens);
  161. } catch (CharacterCodingException ex) {
  162. // Shouldn't happen, as we're replacing errors.
  163. return null;
  164. }
  165. }
  166. /**
  167. * Determines the 'source' of a line made up of the specified tokens. A
  168. * source is described by the first token if and only if that token starts
  169. * with a colon.
  170. *
  171. * @param tokens The tokens to extract a source from
  172. * @return The relevant source or null if none specified
  173. */
  174. private String getSource(final String... tokens) {
  175. if (tokens.length > 0 && tokens[0].length() > 1 && tokens[0].charAt(0) == ':') {
  176. return tokens[0].substring(1);
  177. }
  178. return null;
  179. }
  180. /**
  181. * Determines the 'destination' of a line made up of the specified tokens.
  182. * A destination exists only if a source exists
  183. * (see {@link #getSource(java.lang.String[])}), and is contained within
  184. * the third argument for non-numeric lines, and fourth for numerics.
  185. *
  186. * @param tokens The tokens to extract a destination from
  187. * @return The relevant destination or null if none specified
  188. */
  189. private String getDestination(final String... tokens) {
  190. if (tokens.length > 0 && tokens[0].length() >= 3 && tokens[0].charAt(0) == ':') {
  191. final int target = tokens[1].matches("^[0-9]+$") ? 3 : 2;
  192. if (tokens.length > target) {
  193. return tokens[target];
  194. }
  195. }
  196. return null;
  197. }
  198. @Override
  199. public void close() throws IOException {
  200. stream.close();
  201. }
  202. /**
  203. * Represents a line that has been read from the IRC server and encoded
  204. * appropriately.
  205. */
  206. public static class ReadLine {
  207. /** A representation of the read-line using a default encoding. */
  208. private final String line;
  209. /** The tokens found in the line, individually encoded as appropriate. */
  210. private final String[] tokens;
  211. /** The tags (if any) found in the line, individually encoded as appropriate. */
  212. private final Map<String,String> tags = new HashMap<>();
  213. /**
  214. * Creates a new instance of {@link ReadLine} with the specified line
  215. * and tokens.
  216. *
  217. * @param line A string representation of the line
  218. * @param lineTokens The tokens which make up the line
  219. */
  220. public ReadLine(final String line, final String... lineTokens) {
  221. this.line = line;
  222. String[] tokens = lineTokens;
  223. // In the case where TSIRC and message tags are used, the TSIRC tag can appear in 1 of 2 places depending
  224. // on interpretation of the spec - Either right at the start of the line, or as part of the actual message.
  225. // EG:
  226. // @123@@tag=value :test ing
  227. // @tag=value @123@:test ing
  228. //
  229. // are both functionally equivalent.
  230. // Look for old-style TSIRC timestamp first.
  231. if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  232. final int tsEnd = tokens[0].indexOf('@', 1);
  233. if (tsEnd > -1) {
  234. try {
  235. final long ts = Long.parseLong(tokens[0].substring(1, tsEnd));
  236. tags.put("tsirc date", tokens[0].substring(1, tsEnd));
  237. tokens[0] = tokens[0].substring(tsEnd + 1);
  238. } catch (final NumberFormatException nfe) { /* Not a timestamp. */ }
  239. }
  240. }
  241. // Now look for message tags.
  242. if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  243. final String[] lineTags = tokens[0].substring(1).split(";");
  244. for (final String keyVal : lineTags) {
  245. if (!keyVal.isEmpty()) {
  246. final String[] keyValue = keyVal.split("=", 2);
  247. tags.put(keyValue[0], keyValue.length > 1 ? keyValue[1] : "");
  248. }
  249. }
  250. tokens = new String[lineTokens.length - 1];
  251. System.arraycopy(lineTokens, 1, tokens, 0, lineTokens.length - 1);
  252. }
  253. // Look again for tsirc, as it may be after the message tags.
  254. if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  255. final int tsEnd = tokens[0].indexOf('@', 1);
  256. if (tsEnd > -1) {
  257. try {
  258. final long ts = Long.parseLong(tokens[0].substring(1, tsEnd));
  259. tags.put("tsirc date", tokens[0].substring(1, tsEnd));
  260. tokens[0] = tokens[0].substring(tsEnd + 1);
  261. } catch (final NumberFormatException nfe) { /* Not a timestamp. */ }
  262. }
  263. }
  264. this.tokens = tokens;
  265. }
  266. /**
  267. * Retrieves a string representation of the line that has been read.
  268. * This may be encoded using a charset which is not appropriate for
  269. * displaying all of the line's contents, and is intended for debug
  270. * purposes only.
  271. *
  272. * @return A string representation of the line
  273. */
  274. public String getLine() {
  275. return line;
  276. }
  277. /**
  278. * Retrieves an array of tokens extracted from the specified line.
  279. * Each token may have a different encoding.
  280. *
  281. * @return The line's tokens
  282. */
  283. public String[] getTokens() {
  284. return tokens;
  285. }
  286. /**
  287. * Retrieves a map of tags extracted from the specified line.
  288. *
  289. * @return The line's tags
  290. */
  291. public Map<String,String> getTags() {
  292. return tags;
  293. }
  294. }
  295. }