123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- /*
- * Copyright (c) 2006-2017 DMDirc Developers
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
- package com.dmdirc.parser.irc;
-
- import com.dmdirc.parser.interfaces.Encoder;
-
- import java.io.Closeable;
- import java.io.IOException;
- import java.io.InputStream;
- import java.nio.ByteBuffer;
- import java.nio.charset.CharacterCodingException;
- import java.nio.charset.Charset;
- import java.nio.charset.CharsetDecoder;
- import java.nio.charset.CodingErrorAction;
- import java.util.HashMap;
- import java.util.Map;
-
- /**
- * A {@link java.io.BufferedReader}-style reader that is aware of the IRC
- * protocol and can transcode text appropriately.
- *
- * @since 0.6.5
- */
- public class IRCReader implements Closeable {
-
- /** Maximum length for an IRC line in bytes. */
- private static final int LINE_LENGTH = 1024;
- /** The input stream to read input from. */
- private final InputStream stream;
- /** The encoder to use to encode lines. */
- private final Encoder encoder;
- /** Decoder to use for parts not handled by the encoder. */
- private final CharsetDecoder decoder;
-
- /**
- * Creates a new IRCReader which will read from the specified stream.
- * Protocol-level elements (e.g. channel and user names) will be encoded
- * using the system default charset.
- *
- * @param inputStream The stream to read input from
- * @param encoder The encoder to use to encode lines
- */
- public IRCReader(final InputStream inputStream, final Encoder encoder) {
- this(inputStream, encoder, Charset.defaultCharset());
- }
-
- /**
- * Creates a new IRCReader which will read from the specified stream.
- *
- * @param inputStream The stream to read input from
- * @param encoder The encoder to use to encode lines
- * @param charset The charset to use for protocol-level elements
- */
- public IRCReader(final InputStream inputStream, final Encoder encoder,
- final Charset charset) {
- this.stream = inputStream;
- this.encoder = encoder;
- this.decoder = charset.newDecoder();
- this.decoder.onMalformedInput(CodingErrorAction.REPLACE);
- this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
- }
-
- /**
- * Reads a line from the underlying input stream, tokenises it, and
- * requests that this reader's encoder encodes the message part of the
- * line, if any.
- *
- * @return A wrapped line tokenised per RFC1459, or null if the stream ends
- * @throws IOException If an IOException is encountered reading the
- * underlying stream
- */
- public ReadLine readLine() throws IOException {
- final byte[] line = new byte[LINE_LENGTH];
- int offset = 0;
- int paramOffset = -1;
- int chr = 0, lastChr = 0;
-
- boolean hasTags = false;
- boolean endOfTags = false;
- boolean hasV3Tags = false;
- boolean foundFirstSpace = false;
-
- while (offset < LINE_LENGTH && (chr = stream.read()) > -1) {
- if (chr == '\r') {
- continue;
- } else if (chr == '\n') {
- // End of the line
- break;
- }
-
- if (hasTags && !endOfTags) {
- // Tags end either at the first @ for non-v3 tags or space for v3
- if (offset > 0 && ((chr == '@' && !hasV3Tags) || chr == ' ')) {
- endOfTags = true;
- hasV3Tags = (chr == ' ');
- }
- // If we are still possibly looking at tags, and we find a non-numeric
- // character, then we probably have v3Tags
- if (!endOfTags && (chr < '0' || chr > '9')) {
- hasV3Tags = true;
- }
- } else if (offset == 0 && chr == '@') {
- hasTags = true;
- } else if (offset == 0) {
- endOfTags = true;
- }
-
- line[offset++] = (byte) chr;
-
- if (lastChr == ' ' && chr == ':' && paramOffset == -1) {
- // We've found the last param
- if (!hasV3Tags || foundFirstSpace) {
- paramOffset = offset;
- } else if (hasV3Tags) {
- foundFirstSpace = true;
- }
- }
-
- lastChr = chr;
- }
-
- if (chr == -1) {
- // Hit the end of the stream
- return null;
- }
-
- return processLine(line, offset, paramOffset);
- }
-
- /**
- * Processes the specified line into a wrapped {@link ReadLine} instance.
- *
- * @param line The line as read from the wire
- * @param length The length of the line in bytes
- * @param paramOffset The offset of the first byte of the trailing parameter
- * (i.e., the first byte following the ASCII sequence ' :'), or -1 if no
- * such parameter exists.
- * @return A corresponding {@link ReadLine} instance
- */
- private ReadLine processLine(final byte[] line, final int length, final int paramOffset) {
- try {
- final String firstPart = this.decoder.decode(ByteBuffer.wrap(line,
- 0, paramOffset == -1 ? length : paramOffset - 2)).toString();
-
- final String[] firstTokens = firstPart.split("[ ]+");
-
- final String[] tokens;
- if (paramOffset > -1) {
- final String source = getSource(firstTokens);
- final String destination = getDestination(firstTokens);
-
- final String lastPart = encoder.encode(source, destination,
- line, paramOffset, length - paramOffset);
- tokens = new String[firstTokens.length + 1];
- System.arraycopy(firstTokens, 0, tokens, 0, firstTokens.length);
- tokens[firstTokens.length] = lastPart;
- } else {
- tokens = firstTokens;
- }
-
- return new ReadLine(new String(line, 0, length), tokens);
- } catch (CharacterCodingException ex) {
- // Shouldn't happen, as we're replacing errors.
- return null;
- }
- }
-
- /**
- * Determines the 'source' of a line made up of the specified tokens. A
- * source is described by the first token if and only if that token starts
- * with a colon.
- *
- * @param tokens The tokens to extract a source from
- * @return The relevant source or null if none specified
- */
- private String getSource(final String... tokens) {
- if (tokens.length > 0 && tokens[0].length() > 1 && tokens[0].charAt(0) == ':') {
- return tokens[0].substring(1);
- }
-
- return null;
- }
-
- /**
- * Determines the 'destination' of a line made up of the specified tokens.
- * A destination exists only if a source exists
- * (see {@link #getSource(java.lang.String[])}), and is contained within
- * the third argument for non-numeric lines, and fourth for numerics.
- *
- * @param tokens The tokens to extract a destination from
- * @return The relevant destination or null if none specified
- */
- private String getDestination(final String... tokens) {
- if (tokens.length > 0 && tokens[0].length() >= 3 && tokens[0].charAt(0) == ':') {
- final int target = tokens[1].matches("^[0-9]+$") ? 3 : 2;
-
- if (tokens.length > target) {
- return tokens[target];
- }
- }
-
- return null;
- }
-
- @Override
- public void close() throws IOException {
- stream.close();
- }
-
- /**
- * Represents a line that has been read from the IRC server and encoded
- * appropriately.
- */
- public static class ReadLine {
-
- /** A representation of the read-line using a default encoding. */
- private final String line;
- /** The tokens found in the line, individually encoded as appropriate. */
- private final String[] tokens;
- /** The tags (if any) found in the line, individually encoded as appropriate. */
- private final Map<String,String> tags = new HashMap<>();
-
- /**
- * Creates a new instance of {@link ReadLine} with the specified line
- * and tokens.
- *
- * @param line A string representation of the line
- * @param lineTokens The tokens which make up the line
- */
- public ReadLine(final String line, final String... lineTokens) {
- this.line = line;
-
- // In the case where TSIRC and message tags are used, the TSIRC tag can appear in 1 of 2 places depending
- // on interpretation of the spec - Either right at the start of the line, or as part of the actual message.
- // EG:
- // @123@@tag=value :test ing
- // @tag=value @123@:test ing
- //
- // are both functionally equivalent.
- //
- // Look for old-style TSIRC timestamp first.
- // Then look for message tags.
- // Then look again for tsirc, as it may be after the message tags.
- this.tokens = checkTSIRC(checkMessageTags(checkTSIRC(lineTokens)));
- }
-
- /**
- * Look for TSIRC Timestamp.
- *
- * @param lineTokens Current line tokens
- * @return The line tokens after we have removed the TSIRC Timestamp if
- * there was one, else we return lineTokens as-is.
- */
- private String[] checkTSIRC(final String[] lineTokens) {
- String[] tokens = lineTokens;
- if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
- final int tsEnd = tokens[0].indexOf('@', 1);
- if (tsEnd > -1) {
- try {
- final long ts = Long.parseLong(tokens[0].substring(1, tsEnd));
- tags.put("tsirc date", tokens[0].substring(1, tsEnd));
- tokens[0] = tokens[0].substring(tsEnd + 1);
- } catch (final NumberFormatException nfe) { /* Not a timestamp. */ }
- }
- }
-
- return tokens;
- }
-
- /**
- * Look for Message-Tags
- *
- * @param lineTokens Current line tokens
- * @return The line tokens after we have removed the message-tags if
- * there was any, else we return lineTokens as-is.
- */
- private String[] checkMessageTags(final String[] lineTokens) {
- String[] tokens = lineTokens;
- if (!tokens[0].isEmpty() && tokens[0].charAt(0) == '@') {
- final String[] lineTags = tokens[0].substring(1).split(";");
- for (final String keyVal : lineTags) {
- if (!keyVal.isEmpty()) {
- final String[] keyValue = keyVal.split("=", 2);
- tags.put(keyValue[0], keyValue.length > 1 ? keyValue[1] : "");
- }
- }
-
- tokens = new String[lineTokens.length - 1];
- System.arraycopy(lineTokens, 1, tokens, 0, lineTokens.length - 1);
- }
-
- return tokens;
- }
-
- /**
- * Retrieves a string representation of the line that has been read.
- * This may be encoded using a charset which is not appropriate for
- * displaying all of the line's contents, and is intended for debug
- * purposes only.
- *
- * @return A string representation of the line
- */
- public String getLine() {
- return line;
- }
-
- /**
- * Retrieves an array of tokens extracted from the specified line.
- * Each token may have a different encoding.
- *
- * @return The line's tokens
- */
- public String[] getTokens() {
- return tokens;
- }
-
- /**
- * Retrieves a map of tags extracted from the specified line.
- *
- * @return The line's tags
- */
- public Map<String,String> getTags() {
- return tags;
- }
- }
- }
|