Browse Source

Merge pull request #564 from slingamn/confusables_again.1

more systematic fix for #562
tags/v1.1.0
Daniel Oaks 5 years ago
parent
commit
c65c7de736
No account linked to committer's email address
2 changed files with 9 additions and 39 deletions
  1. 1
    23
      irc/strings.go
  2. 8
    16
      irc/strings_test.go

+ 1
- 23
irc/strings.go View File

108
 	return lowered, err
108
 	return lowered, err
109
 }
109
 }
110
 
110
 
111
-// "boring" names are exempt from skeletonization.
112
-// this is because confusables.txt considers various pure ASCII alphanumeric
113
-// strings confusable: 0 and O, 1 and l, m and rn. IMO this causes more problems
114
-// than it solves.
115
-func isBoring(name string) bool {
116
-	for i := 0; i < len(name); i += 1 {
117
-		chr := name[i]
118
-		if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
119
-			continue // alphanumerics
120
-		}
121
-		switch chr {
122
-		case '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', '<', '>', '=':
123
-			continue // benign printable ascii characters
124
-		default:
125
-			return false // potentially confusable ascii like | ' `, non-ascii
126
-		}
127
-	}
128
-	return true
129
-}
130
-
131
 // returns true if the given name is a valid ident, using a mix of Insp and
111
 // returns true if the given name is a valid ident, using a mix of Insp and
132
 // Chary's ident restrictions.
112
 // Chary's ident restrictions.
133
 func isIdent(name string) bool {
113
 func isIdent(name string) bool {
168
 	// same as PRECIS:
148
 	// same as PRECIS:
169
 	name = width.Fold.String(name)
149
 	name = width.Fold.String(name)
170
 
150
 
171
-	if !isBoring(name) {
172
-		name = confusables.Skeleton(name)
173
-	}
151
+	name = confusables.Skeleton(name)
174
 
152
 
175
 	// internationalized lowercasing for skeletons; this is much more lenient than
153
 	// internationalized lowercasing for skeletons; this is much more lenient than
176
 	// Casefold. In particular, skeletons are expected to mix scripts (which may
154
 	// Casefold. In particular, skeletons are expected to mix scripts (which may

+ 8
- 16
irc/strings_test.go View File

128
 	}
128
 	}
129
 }
129
 }
130
 
130
 
131
-func TestIsBoring(t *testing.T) {
132
-	assertBoring := func(str string, expected bool) {
133
-		if isBoring(str) != expected {
134
-			t.Errorf("expected [%s] to have boringness [%t], but got [%t]", str, expected, !expected)
135
-		}
136
-	}
137
-
138
-	assertBoring("warning", true)
139
-	assertBoring("phi|ip", false)
140
-	assertBoring("Νικηφόρος", false)
141
-}
142
-
143
 func TestIsIdent(t *testing.T) {
131
 func TestIsIdent(t *testing.T) {
144
 	assertIdent := func(str string, expected bool) {
132
 	assertIdent := func(str string, expected bool) {
145
 		if isIdent(str) != expected {
133
 		if isIdent(str) != expected {
165
 		return skel
153
 		return skel
166
 	}
154
 	}
167
 
155
 
168
-	if skeleton("warning") == skeleton("waming") {
169
-		t.Errorf("Oragono shouldn't consider rn confusable with m")
156
+	if skeleton("warning") != skeleton("waming") {
157
+		t.Errorf("i give up, Oragono should consider rn confusable with m")
170
 	}
158
 	}
171
 
159
 
172
 	if skeleton("Phi|ip") != "philip" {
160
 	if skeleton("Phi|ip") != "philip" {
173
 		t.Errorf("but we still consider pipe confusable with l")
161
 		t.Errorf("but we still consider pipe confusable with l")
174
 	}
162
 	}
175
 
163
 
176
-	if skeleton("smt") != "smt" {
164
+	if skeleton("smt") != skeleton("smt") {
177
 		t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
165
 		t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
178
 	}
166
 	}
179
 
167
 
181
 		t.Errorf("after skeletonizing, we should casefold")
169
 		t.Errorf("after skeletonizing, we should casefold")
182
 	}
170
 	}
183
 
171
 
184
-	if skeleton("smt") != "smt" {
172
+	if skeleton("smt") != skeleton("smt") {
185
 		t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
173
 		t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
186
 	}
174
 	}
187
 
175
 
189
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
177
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
190
 	}
178
 	}
191
 
179
 
180
+	if skeleton("еmily") != skeleton("emily") {
181
+		t.Errorf("we must protect against cyrillic homoglyph attacks")
182
+	}
183
+
192
 	if skeleton("РОТАТО") != "potato" {
184
 	if skeleton("РОТАТО") != "potato" {
193
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
185
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
194
 	}
186
 	}

Loading…
Cancel
Save