Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

IRCReader.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /*
  2. * Copyright (c) 2006-2017 DMDirc Developers
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. * SOFTWARE.
  21. */
  22. package com.dmdirc.parser.irc;
  23. import com.dmdirc.parser.interfaces.Encoder;
  24. import java.io.Closeable;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.nio.ByteBuffer;
  28. import java.nio.charset.CharacterCodingException;
  29. import java.nio.charset.Charset;
  30. import java.nio.charset.CharsetDecoder;
  31. import java.nio.charset.CodingErrorAction;
  32. import java.util.Arrays;
  33. import java.util.HashMap;
  34. import java.util.Map;
  35. /**
  36. * A {@link java.io.BufferedReader}-style reader that is aware of the IRC
  37. * protocol and can transcode text appropriately.
  38. *
  39. * @since 0.6.5
  40. */
  41. public class IRCReader implements Closeable {
  42. /** Maximum length for an IRC line in bytes. */
  43. private static final int LINE_LENGTH = 1024;
  44. /** The input stream to read input from. */
  45. private final InputStream stream;
  46. /** The encoder to use to encode lines. */
  47. private final Encoder encoder;
  48. /** Decoder to use for parts not handled by the encoder. */
  49. private final CharsetDecoder decoder;
  50. /**
  51. * Creates a new IRCReader which will read from the specified stream.
  52. * Protocol-level elements (e.g. channel and user names) will be encoded
  53. * using the system default charset.
  54. *
  55. * @param inputStream The stream to read input from
  56. * @param encoder The encoder to use to encode lines
  57. */
  58. public IRCReader(final InputStream inputStream, final Encoder encoder) {
  59. this(inputStream, encoder, Charset.defaultCharset());
  60. }
  61. /**
  62. * Creates a new IRCReader which will read from the specified stream.
  63. *
  64. * @param inputStream The stream to read input from
  65. * @param encoder The encoder to use to encode lines
  66. * @param charset The charset to use for protocol-level elements
  67. */
  68. public IRCReader(final InputStream inputStream, final Encoder encoder,
  69. final Charset charset) {
  70. this.stream = inputStream;
  71. this.encoder = encoder;
  72. this.decoder = charset.newDecoder();
  73. this.decoder.onMalformedInput(CodingErrorAction.REPLACE);
  74. this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  75. }
  76. /**
  77. * Reads a line from the underlying input stream, tokenises it, and
  78. * requests that this reader's encoder encodes the message part of the
  79. * line, if any.
  80. *
  81. * @return A wrapped line tokenised per RFC1459, or null if the stream ends
  82. * @throws IOException If an IOException is encountered reading the
  83. * underlying stream
  84. */
  85. public ReadLine readLine() throws IOException {
  86. final byte[] line = new byte[LINE_LENGTH];
  87. int offset = 0;
  88. int paramOffset = -1;
  89. int chr = 0, lastChr = 0;
  90. boolean hasTags = false;
  91. boolean endOfTags = false;
  92. boolean hasV3Tags = false;
  93. boolean foundFirstSpace = false;
  94. while (offset < LINE_LENGTH && (chr = stream.read()) > -1) {
  95. if (chr == '\r') {
  96. continue;
  97. } else if (chr == '\n') {
  98. // End of the line
  99. break;
  100. }
  101. if (hasTags && !endOfTags) {
  102. // Tags end either at the first @ for non-v3 tags or space for v3
  103. if (offset > 0 && ((chr == '@' && !hasV3Tags) || chr == ' ')) {
  104. endOfTags = true;
  105. hasV3Tags = (chr == ' ');
  106. }
  107. // If we are still possibly looking at tags, and we find a non-numeric
  108. // character, then we probably have v3Tags
  109. if (!endOfTags && (chr < '0' || chr > '9')) {
  110. hasV3Tags = true;
  111. }
  112. } else if (offset == 0 && chr == '@') {
  113. hasTags = true;
  114. } else if (offset == 0) {
  115. endOfTags = true;
  116. }
  117. line[offset++] = (byte) chr;
  118. if (lastChr == ' ' && chr == ':' && paramOffset == -1) {
  119. // We've found the last param
  120. if (!hasV3Tags || foundFirstSpace) {
  121. paramOffset = offset;
  122. } else if (hasV3Tags) {
  123. foundFirstSpace = true;
  124. }
  125. }
  126. lastChr = chr;
  127. }
  128. if (chr == -1) {
  129. // Hit the end of the stream
  130. return null;
  131. }
  132. return processLine(line, offset, paramOffset);
  133. }
  134. /**
  135. * Processes the specified line into a wrapped {@link ReadLine} instance.
  136. *
  137. * @param line The line as read from the wire
  138. * @param length The length of the line in bytes
  139. * @param paramOffset The offset of the first byte of the trailing parameter
  140. * (i.e., the first byte following the ASCII sequence ' :'), or -1 if no
  141. * such parameter exists.
  142. * @return A corresponding {@link ReadLine} instance
  143. */
  144. private ReadLine processLine(final byte[] line, final int length, final int paramOffset) {
  145. try {
  146. final String firstPart = this.decoder.decode(ByteBuffer.wrap(line,
  147. 0, paramOffset == -1 ? length : paramOffset - 2)).toString();
  148. final String[] firstTokens = firstPart.split("[ ]+");
  149. final String[] tokens;
  150. if (paramOffset > -1) {
  151. final String source = getSource(firstTokens);
  152. final String destination = getDestination(firstTokens);
  153. final String lastPart = encoder.encode(source, destination,
  154. line, paramOffset, length - paramOffset);
  155. tokens = new String[firstTokens.length + 1];
  156. System.arraycopy(firstTokens, 0, tokens, 0, firstTokens.length);
  157. tokens[firstTokens.length] = lastPart;
  158. } else {
  159. tokens = firstTokens;
  160. }
  161. return new ReadLine(new String(line, 0, length), tokens);
  162. } catch (CharacterCodingException ex) {
  163. // Shouldn't happen, as we're replacing errors.
  164. return null;
  165. }
  166. }
  167. /**
  168. * Determines the 'source' of a line made up of the specified tokens. A
  169. * source is described by the first token if and only if that token starts
  170. * with a colon.
  171. *
  172. * @param tokens The tokens to extract a source from
  173. * @return The relevant source or null if none specified
  174. */
  175. private String getSource(final String... tokens) {
  176. if (tokens.length > 0 && tokens[0].length() > 1 && tokens[0].charAt(0) == ':') {
  177. return tokens[0].substring(1);
  178. }
  179. return null;
  180. }
  181. /**
  182. * Determines the 'destination' of a line made up of the specified tokens.
  183. * A destination exists only if a source exists
  184. * (see {@link #getSource(java.lang.String[])}), and is contained within
  185. * the third argument for non-numeric lines, and fourth for numerics.
  186. *
  187. * @param tokens The tokens to extract a destination from
  188. * @return The relevant destination or null if none specified
  189. */
  190. private String getDestination(final String... tokens) {
  191. if (tokens.length > 0 && tokens[0].length() >= 3 && tokens[0].charAt(0) == ':') {
  192. final int target = tokens[1].matches("^[0-9]+$") ? 3 : 2;
  193. if (tokens.length > target) {
  194. return tokens[target];
  195. }
  196. }
  197. return null;
  198. }
  199. @Override
  200. public void close() throws IOException {
  201. stream.close();
  202. }
  203. /**
  204. * Represents a line that has been read from the IRC server and encoded
  205. * appropriately.
  206. */
  207. public static class ReadLine {
  208. /** A representation of the read-line using a default encoding. */
  209. private final String line;
  210. /** The tokens found in the line, individually encoded as appropriate. */
  211. private final String[] tokens;
  212. /** The tags (if any) found in the line, individually encoded as appropriate. */
  213. private final Map<String,String> tags = new HashMap<>();
  214. /**
  215. * Creates a new instance of {@link ReadLine} with the specified line
  216. * and tokens.
  217. *
  218. * @param line A string representation of the line
  219. * @param lineTokens The tokens which make up the line
  220. */
  221. public ReadLine(final String line, final String... lineTokens) {
  222. this.line = line;
  223. String[] tokens = lineTokens;
  224. // In the case where TSIRC and message tags are used, the TSIRC tag can appear in 1 of 2 places depending
  225. // on interpretation of the spec - Either right at the start of the line, or as part of the actual message.
  226. // EG:
  227. // @123@@tag=value :test ing
  228. // @tag=value @123@:test ing
  229. //
  230. // are both functionally equivalent.
  231. // Look for old-style TSIRC timestamp first.
  232. if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  233. final int tsEnd = tokens[0].indexOf('@', 1);
  234. if (tsEnd > -1) {
  235. try {
  236. final long ts = Long.parseLong(tokens[0].substring(1, tsEnd));
  237. tags.put("tsirc date", tokens[0].substring(1, tsEnd));
  238. tokens[0] = tokens[0].substring(tsEnd + 1);
  239. } catch (final NumberFormatException nfe) { /* Not a timestamp. */ }
  240. }
  241. }
  242. // Now look for message tags.
  243. if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  244. final String[] lineTags = tokens[0].substring(1).split(";");
  245. for (final String keyVal : lineTags) {
  246. if (!keyVal.isEmpty()) {
  247. final String[] keyValue = keyVal.split("=", 2);
  248. tags.put(keyValue[0], keyValue.length > 1 ? keyValue[1] : "");
  249. }
  250. }
  251. tokens = new String[lineTokens.length - 1];
  252. System.arraycopy(lineTokens, 1, tokens, 0, lineTokens.length - 1);
  253. }
  254. // Look again for tsirc, as it may be after the message tags.
  255. if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
  256. final int tsEnd = tokens[0].indexOf('@', 1);
  257. if (tsEnd > -1) {
  258. try {
  259. final long ts = Long.parseLong(tokens[0].substring(1, tsEnd));
  260. tags.put("tsirc date", tokens[0].substring(1, tsEnd));
  261. tokens[0] = tokens[0].substring(tsEnd + 1);
  262. } catch (final NumberFormatException nfe) { /* Not a timestamp. */ }
  263. }
  264. }
  265. this.tokens = tokens;
  266. }
  267. /**
  268. * Retrieves a string representation of the line that has been read.
  269. * This may be encoded using a charset which is not appropriate for
  270. * displaying all of the line's contents, and is intended for debug
  271. * purposes only.
  272. *
  273. * @return A string representation of the line
  274. */
  275. public String getLine() {
  276. return line;
  277. }
  278. /**
  279. * Retrieves an array of tokens extracted from the specified line.
  280. * Each token may have a different encoding.
  281. *
  282. * @return The line's tokens
  283. */
  284. public String[] getTokens() {
  285. return tokens;
  286. }
  287. /**
  288. * Retrieves a map of tags extracted from the specified line.
  289. *
  290. * @return The line's tags
  291. */
  292. public Map<String,String> getTags() {
  293. return tags;
  294. }
  295. }
  296. }