You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

IRCReader.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /*
  2. * Copyright (c) 2006-2017 DMDirc Developers
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. * SOFTWARE.
  21. */
  22. package com.dmdirc.parser.irc;
  23. import com.dmdirc.parser.interfaces.Encoder;
  24. import java.io.Closeable;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.nio.ByteBuffer;
  28. import java.nio.charset.CharacterCodingException;
  29. import java.nio.charset.Charset;
  30. import java.nio.charset.CharsetDecoder;
  31. import java.nio.charset.CodingErrorAction;
  32. import java.util.HashMap;
  33. import java.util.Map;
  34. /**
  35. * A {@link java.io.BufferedReader}-style reader that is aware of the IRC
  36. * protocol and can transcode text appropriately.
  37. *
  38. * @since 0.6.5
  39. */
  40. public class IRCReader implements Closeable {
  41. /** Maximum length for an IRC line in bytes. */
  42. private static final int LINE_LENGTH = 1024;
  43. /** The input stream to read input from. */
  44. private final InputStream stream;
  45. /** The encoder to use to encode lines. */
  46. private final Encoder encoder;
  47. /** Decoder to use for parts not handled by the encoder. */
  48. private final CharsetDecoder decoder;
  49. /**
  50. * Creates a new IRCReader which will read from the specified stream.
  51. * Protocol-level elements (e.g. channel and user names) will be encoded
  52. * using the system default charset.
  53. *
  54. * @param inputStream The stream to read input from
  55. * @param encoder The encoder to use to encode lines
  56. */
  57. public IRCReader(final InputStream inputStream, final Encoder encoder) {
  58. this(inputStream, encoder, Charset.defaultCharset());
  59. }
  60. /**
  61. * Creates a new IRCReader which will read from the specified stream.
  62. *
  63. * @param inputStream The stream to read input from
  64. * @param encoder The encoder to use to encode lines
  65. * @param charset The charset to use for protocol-level elements
  66. */
  67. public IRCReader(final InputStream inputStream, final Encoder encoder,
  68. final Charset charset) {
  69. this.stream = inputStream;
  70. this.encoder = encoder;
  71. this.decoder = charset.newDecoder();
  72. this.decoder.onMalformedInput(CodingErrorAction.REPLACE);
  73. this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  74. }
  75. /**
  76. * Reads a line from the underlying input stream, tokenises it, and
  77. * requests that this reader's encoder encodes the message part of the
  78. * line, if any.
  79. *
  80. * @return A wrapped line tokenised per RFC1459, or null if the stream ends
  81. * @throws IOException If an IOException is encountered reading the
  82. * underlying stream
  83. */
  84. public ReadLine readLine() throws IOException {
  85. final byte[] line = new byte[LINE_LENGTH];
  86. int offset = 0;
  87. int paramOffset = -1;
  88. int chr = 0, lastChr = 0;
  89. boolean hasTags = false;
  90. boolean endOfTags = false;
  91. boolean hasV3Tags = false;
  92. boolean foundFirstSpace = false;
  93. while (offset < LINE_LENGTH && (chr = stream.read()) > -1) {
  94. if (chr == '\r') {
  95. continue;
  96. } else if (chr == '\n') {
  97. // End of the line
  98. break;
  99. }
  100. if (hasTags && !endOfTags) {
  101. // Tags end either at the first @ for non-v3 tags or space for v3
  102. if (offset > 0 && ((chr == '@' && !hasV3Tags) || chr == ' ')) {
  103. endOfTags = true;
  104. hasV3Tags = (chr == ' ');
  105. }
  106. // If we are still possibly looking at tags, and we find a non-numeric
  107. // character, then we probably have v3Tags
  108. if (!endOfTags && (chr < '0' || chr > '9')) {
  109. hasV3Tags = true;
  110. }
  111. } else if (offset == 0 && chr == '@') {
  112. hasTags = true;
  113. } else if (offset == 0) {
  114. endOfTags = true;
  115. }
  116. line[offset++] = (byte) chr;
  117. if (lastChr == ' ' && chr == ':' && paramOffset == -1) {
  118. // We've found the last param
  119. if (!hasV3Tags || foundFirstSpace) {
  120. paramOffset = offset;
  121. } else if (hasV3Tags) {
  122. foundFirstSpace = true;
  123. }
  124. }
  125. lastChr = chr;
  126. }
  127. if (chr == -1) {
  128. // Hit the end of the stream
  129. return null;
  130. }
  131. return processLine(line, offset, paramOffset);
  132. }
  133. /**
  134. * Processes the specified line into a wrapped {@link ReadLine} instance.
  135. *
  136. * @param line The line as read from the wire
  137. * @param length The length of the line in bytes
  138. * @param paramOffset The offset of the first byte of the trailing parameter
  139. * (i.e., the first byte following the ASCII sequence ' :'), or -1 if no
  140. * such parameter exists.
  141. * @return A corresponding {@link ReadLine} instance
  142. */
  143. private ReadLine processLine(final byte[] line, final int length, final int paramOffset) {
  144. try {
  145. final String firstPart = this.decoder.decode(ByteBuffer.wrap(line,
  146. 0, paramOffset == -1 ? length : paramOffset - 2)).toString();
  147. final String[] firstTokens = firstPart.split("[ ]+");
  148. final String[] tokens;
  149. if (paramOffset > -1) {
  150. final String source = getSource(firstTokens);
  151. final String destination = getDestination(firstTokens);
  152. final String lastPart = encoder.encode(source, destination,
  153. line, paramOffset, length - paramOffset);
  154. tokens = new String[firstTokens.length + 1];
  155. System.arraycopy(firstTokens, 0, tokens, 0, firstTokens.length);
  156. tokens[firstTokens.length] = lastPart;
  157. } else {
  158. tokens = firstTokens;
  159. }
  160. return new ReadLine(new String(line, 0, length), tokens);
  161. } catch (CharacterCodingException ex) {
  162. // Shouldn't happen, as we're replacing errors.
  163. return null;
  164. }
  165. }
  166. /**
  167. * Determines the 'source' of a line made up of the specified tokens. A
  168. * source is described by the first token if and only if that token starts
  169. * with a colon.
  170. *
  171. * @param tokens The tokens to extract a source from
  172. * @return The relevant source or null if none specified
  173. */
  174. private String getSource(final String... tokens) {
  175. if (tokens.length > 0 && tokens[0].length() > 1 && tokens[0].charAt(0) == ':') {
  176. return tokens[0].substring(1);
  177. }
  178. return null;
  179. }
  180. /**
  181. * Determines the 'destination' of a line made up of the specified tokens.
  182. * A destination exists only if a source exists
  183. * (see {@link #getSource(java.lang.String[])}), and is contained within
  184. * the third argument for non-numeric lines, and fourth for numerics.
  185. *
  186. * @param tokens The tokens to extract a destination from
  187. * @return The relevant destination or null if none specified
  188. */
  189. private String getDestination(final String... tokens) {
  190. if (tokens.length > 0 && tokens[0].length() >= 3 && tokens[0].charAt(0) == ':') {
  191. final int target = tokens[1].matches("^[0-9]+$") ? 3 : 2;
  192. if (tokens.length > target) {
  193. return tokens[target];
  194. }
  195. }
  196. return null;
  197. }
  198. @Override
  199. public void close() throws IOException {
  200. stream.close();
  201. }
  202. /**
  203. * Represents a line that has been read from the IRC server and encoded
  204. * appropriately.
  205. */
  206. public static class ReadLine {
  207. /** A representation of the read-line using a default encoding. */
  208. private final String line;
  209. /** The tokens found in the line, individually encoded as appropriate. */
  210. private final String[] tokens;
  211. /** The tags (if any) found in the line, individually encoded as appropriate. */
  212. private final Map<String,String> tags = new HashMap<>();
  213. /**
  214. * Creates a new instance of {@link ReadLine} with the specified line
  215. * and tokens.
  216. *
  217. * @param line A string representation of the line
  218. * @param lineTokens The tokens which make up the line
  219. */
  220. public ReadLine(final String line, final String... lineTokens) {
  221. this.line = line;
  222. // In the case where TSIRC and message tags are used, the TSIRC tag can appear in 1 of 2 places depending
  223. // on interpretation of the spec - Either right at the start of the line, or as part of the actual message.
  224. // EG:
  225. // @123@@tag=value :test ing
  226. // @tag=value @123@:test ing
  227. //
  228. // are both functionally equivalent.
  229. //
  230. // Look for old-style TSIRC timestamp first.
  231. // Then look for message tags.
  232. // Then look again for tsirc, as it may be after the message tags.
  233. this.tokens = checkTSIRC(checkMessageTags(checkTSIRC(lineTokens)));
  234. }
  235. /**
  236. * Look for TSIRC Timestamp.
  237. *
  238. * @param lineTokens Current line tokens
  239. * @return The line tokens after we have removed the TSIRC Timestamp if
  240. * there was one, else we return lineTokens as-is.
  241. */
  242. private String[] checkTSIRC(final String[] lineTokens) {
  243. String[] tokens = lineTokens;
  244. if (tokens.length > 0 && !tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  245. final int tsEnd = tokens[0].indexOf('@', 1);
  246. if (tsEnd > -1) {
  247. try {
  248. final long ts = Long.parseLong(tokens[0].substring(1, tsEnd));
  249. tags.put("tsirc date", tokens[0].substring(1, tsEnd));
  250. tokens[0] = tokens[0].substring(tsEnd + 1);
  251. } catch (final NumberFormatException nfe) { /* Not a timestamp. */ }
  252. }
  253. }
  254. return tokens;
  255. }
  256. /**
  257. * Look for Message-Tags
  258. *
  259. * @param lineTokens Current line tokens
  260. * @return The line tokens after we have removed the message-tags if
  261. * there was any, else we return lineTokens as-is.
  262. */
  263. private String[] checkMessageTags(final String[] lineTokens) {
  264. String[] tokens = lineTokens;
  265. if (tokens.length > 0 && !tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  266. final String[] lineTags = tokens[0].substring(1).split(";");
  267. for (final String keyVal : lineTags) {
  268. if (!keyVal.isEmpty()) {
  269. final String[] keyValue = keyVal.split("=", 2);
  270. tags.put(keyValue[0], keyValue.length > 1 ? keyValue[1] : "");
  271. }
  272. }
  273. tokens = new String[lineTokens.length - 1];
  274. System.arraycopy(lineTokens, 1, tokens, 0, lineTokens.length - 1);
  275. }
  276. return tokens;
  277. }
  278. /**
  279. * Retrieves a string representation of the line that has been read.
  280. * This may be encoded using a charset which is not appropriate for
  281. * displaying all of the line's contents, and is intended for debug
  282. * purposes only.
  283. *
  284. * @return A string representation of the line
  285. */
  286. public String getLine() {
  287. return line;
  288. }
  289. /**
  290. * Retrieves an array of tokens extracted from the specified line.
  291. * Each token may have a different encoding.
  292. *
  293. * @return The line's tokens
  294. */
  295. public String[] getTokens() {
  296. return tokens;
  297. }
  298. /**
  299. * Retrieves a map of tags extracted from the specified line.
  300. *
  301. * @return The line's tags
  302. */
  303. public Map<String,String> getTags() {
  304. return tags;
  305. }
  306. }
  307. }