Bläddra i källkod

Merge pull request #1152 from slingamn/utf8.1

fix #1151
tags/v2.2.0-rc1
Shivaram Lingamneni 4 år sedan
förälder
incheckning
ba68358c21
Inget konto är kopplat till bidragsgivarens mejladress
11 ändrade filer med 53 tillägg och 18 borttagningar
  1. 6
    1
      conventional.yaml
  2. 6
    1
      default.yaml
  3. 6
    1
      irc/client.go
  4. 5
    0
      irc/commands.go
  5. 1
    0
      irc/config.go
  6. 1
    0
      irc/errors.go
  7. 6
    0
      irc/handlers.go
  8. 6
    4
      irc/ircconn.go
  9. 3
    0
      irc/server.go
  10. 7
    11
      irc/socket.go
  11. 6
    0
      irc/strings.go

+ 6
- 1
conventional.yaml Visa fil

@@ -100,7 +100,7 @@ server:
100 100
 
101 101
     # casemapping controls what kinds of strings are permitted as identifiers (nicknames,
102 102
     # channel names, account names, etc.), and how they are normalized for case.
103
-    # with the recommended default of 'precis', utf-8 identifiers that are "sane"
103
+    # with the recommended default of 'precis', UTF8 identifiers that are "sane"
104 104
     # (according to RFC 8265) are allowed, and the server additionally tries to protect
105 105
     # against confusable characters ("homoglyph attacks").
106 106
     # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
@@ -110,6 +110,11 @@ server:
110 110
     # already up and running is problematic).
111 111
     casemapping: "precis"
112 112
 
113
+    # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages
114
+    # (as in traditional IRC) or preemptively discards non-UTF8 messages (since
115
+    # they cannot be relayed to websocket clients).
116
+    enforce-utf8: true
117
+
113 118
     # whether to look up user hostnames with reverse DNS.
114 119
     # (disabling this will expose user IPs instead of hostnames;
115 120
     # to make IP/hostname information private, see the ip-cloaking section)

+ 6
- 1
default.yaml Visa fil

@@ -126,7 +126,7 @@ server:
126 126
 
127 127
     # casemapping controls what kinds of strings are permitted as identifiers (nicknames,
128 128
     # channel names, account names, etc.), and how they are normalized for case.
129
-    # with the recommended default of 'precis', utf-8 identifiers that are "sane"
129
+    # with the recommended default of 'precis', UTF8 identifiers that are "sane"
130 130
     # (according to RFC 8265) are allowed, and the server additionally tries to protect
131 131
     # against confusable characters ("homoglyph attacks").
132 132
     # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
@@ -136,6 +136,11 @@ server:
136 136
     # already up and running is problematic).
137 137
     casemapping: "precis"
138 138
 
139
+    # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages
140
+    # (as in traditional IRC) or preemptively discards non-UTF8 messages (since
141
+    # they cannot be relayed to websocket clients).
142
+    enforce-utf8: true
143
+
139 144
     # whether to look up user hostnames with reverse DNS.
140 145
     # (disabling this will expose user IPs instead of hostnames;
141 146
     # to make IP/hostname information private, see the ip-cloaking section)

+ 6
- 1
irc/client.go Visa fil

@@ -615,8 +615,11 @@ func (client *Client) run(session *Session) {
615 615
 	firstLine := !isReattach
616 616
 
617 617
 	for {
618
+		var invalidUtf8 bool
618 619
 		line, err := session.socket.Read()
619
-		if err != nil {
620
+		if err == errInvalidUtf8 {
621
+			invalidUtf8 = true // handle as normal, including labeling
622
+		} else if err != nil {
620 623
 			quitMessage := "connection closed"
621 624
 			if err == errReadQ {
622 625
 				quitMessage = "readQ exceeded"
@@ -676,6 +679,8 @@ func (client *Client) run(session *Session) {
676 679
 		cmd, exists := Commands[msg.Command]
677 680
 		if !exists {
678 681
 			cmd = unknownCommand
682
+		} else if invalidUtf8 {
683
+			cmd = invalidUtf8Command
679 684
 		}
680 685
 
681 686
 		isExiting := cmd.Run(client.server, client, session, msg)

+ 5
- 0
irc/commands.go Visa fil

@@ -79,6 +79,11 @@ var unknownCommand = Command{
79 79
 	usablePreReg: true,
80 80
 }
81 81
 
82
+var invalidUtf8Command = Command{
83
+	handler:      invalidUtf8Handler,
84
+	usablePreReg: true,
85
+}
86
+
82 87
 // Commands holds all commands executable by a client connected to us.
83 88
 var Commands map[string]Command
84 89
 

+ 1
- 0
irc/config.go Visa fil

@@ -519,6 +519,7 @@ type Config struct {
519 519
 		supportedCaps *caps.Set
520 520
 		capValues     caps.Values
521 521
 		Casemapping   Casemapping
522
+		EnforceUtf8   bool   `yaml:"enforce-utf8"`
522 523
 		OutputPath    string `yaml:"output-path"`
523 524
 	}
524 525
 

+ 1
- 0
irc/errors.go Visa fil

@@ -66,6 +66,7 @@ var (
66 66
 	errCredsExternallyManaged         = errors.New("Credentials are externally managed and cannot be changed here")
67 67
 	errInvalidMultilineBatch          = errors.New("Invalid multiline batch")
68 68
 	errTimedOut                       = errors.New("Operation timed out")
69
+	errInvalidUtf8                    = errors.New("Message rejected for invalid utf8")
69 70
 )
70 71
 
71 72
 // Socket Errors

+ 6
- 0
irc/handlers.go Visa fil

@@ -2918,3 +2918,9 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage
2918 2918
 	rb.Add(nil, server.name, ERR_UNKNOWNCOMMAND, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Unknown command"))
2919 2919
 	return false
2920 2920
 }
2921
+
2922
+// fake handler for invalid utf8
2923
+func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool {
2924
+	rb.Add(nil, server.name, "FAIL", utils.SafeErrorParam(msg.Command), "INVALID_UTF8", client.t("Message rejected for containing invalid UTF-8"))
2925
+	return false
2926
+}

+ 6
- 4
irc/ircconn.go Visa fil

@@ -76,7 +76,9 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) {
76 76
 	if isPrefix {
77 77
 		return nil, errReadQ
78 78
 	}
79
-	line = bytes.TrimSuffix(line, crlf)
79
+	if globalUtf8EnforcementSetting && !utf8.Valid(line) {
80
+		err = errInvalidUtf8
81
+	}
80 82
 	return
81 83
 }
82 84
 
@@ -101,9 +103,9 @@ func (wc IRCWSConn) UnderlyingConn() *utils.WrappedConn {
101 103
 
102 104
 func (wc IRCWSConn) WriteLine(buf []byte) (err error) {
103 105
 	buf = bytes.TrimSuffix(buf, crlf)
104
-	// there's not much we can do about this;
105
-	// silently drop the message
106
-	if !utf8.Valid(buf) {
106
+	if !globalUtf8EnforcementSetting && !utf8.Valid(buf) {
107
+		// there's not much we can do about this;
108
+		// silently drop the message
107 109
 		return nil
108 110
 	}
109 111
 	return wc.conn.WriteMessage(websocket.TextMessage, buf)

+ 3
- 0
irc/server.go Visa fil

@@ -487,6 +487,7 @@ func (server *Server) applyConfig(config *Config) (err error) {
487 487
 		server.name = config.Server.Name
488 488
 		server.nameCasefolded = config.Server.nameCasefolded
489 489
 		globalCasemappingSetting = config.Server.Casemapping
490
+		globalUtf8EnforcementSetting = config.Server.EnforceUtf8
490 491
 	} else {
491 492
 		// enforce configs that can't be changed after launch:
492 493
 		if server.name != config.Server.Name {
@@ -495,6 +496,8 @@ func (server *Server) applyConfig(config *Config) (err error) {
495 496
 			return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted")
496 497
 		} else if globalCasemappingSetting != config.Server.Casemapping {
497 498
 			return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted")
499
+		} else if globalUtf8EnforcementSetting != config.Server.EnforceUtf8 {
500
+			return fmt.Errorf("UTF-8 enforcement cannot be changed after launching the server, rehash aborted")
498 501
 		} else if oldConfig.Accounts.Multiclient.AlwaysOn != config.Accounts.Multiclient.AlwaysOn {
499 502
 			return fmt.Errorf("Default always-on setting cannot be changed after launching the server, rehash aborted")
500 503
 		}

+ 7
- 11
irc/socket.go Visa fil

@@ -7,7 +7,6 @@ package irc
7 7
 import (
8 8
 	"errors"
9 9
 	"io"
10
-	"strings"
11 10
 	"sync"
12 11
 
13 12
 	"github.com/oragono/oragono/irc/utils"
@@ -59,27 +58,24 @@ func (socket *Socket) Close() {
59 58
 
60 59
 // Read returns a single IRC line from a Socket.
61 60
 func (socket *Socket) Read() (string, error) {
61
+	// immediately fail if Close() has been called, even if there's
62
+	// still data in a bufio.Reader or websocket buffer:
62 63
 	if socket.IsClosed() {
63 64
 		return "", io.EOF
64 65
 	}
65 66
 
66 67
 	lineBytes, err := socket.conn.ReadLine()
67
-
68
-	// convert bytes to string
69 68
 	line := string(lineBytes)
70 69
 
71
-	// read last message properly (such as ERROR/QUIT/etc), just fail next reads/writes
72 70
 	if err == io.EOF {
73 71
 		socket.Close()
72
+		// process last message properly (such as ERROR/QUIT/etc), just fail next reads/writes
73
+		if line != "" {
74
+			err = nil
75
+		}
74 76
 	}
75 77
 
76
-	if err == io.EOF && strings.TrimSpace(line) != "" {
77
-		// don't do anything
78
-	} else if err != nil {
79
-		return "", err
80
-	}
81
-
82
-	return line, nil
78
+	return line, err
83 79
 }
84 80
 
85 81
 // Write sends the given string out of Socket. Requirements:

+ 6
- 0
irc/strings.go Visa fil

@@ -50,6 +50,12 @@ const (
50 50
 // this happens-before all IRC connections and all casefolding operations.
51 51
 var globalCasemappingSetting Casemapping = CasemappingPRECIS
52 52
 
53
+// XXX analogous unsynchronized global variable controlling utf8 validation
54
+// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459
55
+// octets) and invalid utf8 messages are silently dropped for websocket clients only.
56
+// if this is on, invalid utf8 inputs get a FAIL reply.
57
+var globalUtf8EnforcementSetting bool
58
+
53 59
 // Each pass of PRECIS casefolding is a composition of idempotent operations,
54 60
 // but not idempotent itself. Therefore, the spec says "do it four times and hope
55 61
 // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,

Laddar…
Avbryt
Spara