Browse Source

Merge pull request #564 from slingamn/confusables_again.1

more systematic fix for #562
tags/v1.1.0
Daniel Oaks 5 years ago
parent
commit
c65c7de736
No account linked to committer's email address
2 changed files with 9 additions and 39 deletions
  1. 1
    23
      irc/strings.go
  2. 8
    16
      irc/strings_test.go

+ 1
- 23
irc/strings.go View File

@@ -108,26 +108,6 @@ func CasefoldName(name string) (string, error) {
108 108
 	return lowered, err
109 109
 }
110 110
 
111
-// "boring" names are exempt from skeletonization.
112
-// this is because confusables.txt considers various pure ASCII alphanumeric
113
-// strings confusable: 0 and O, 1 and l, m and rn. IMO this causes more problems
114
-// than it solves.
115
-func isBoring(name string) bool {
116
-	for i := 0; i < len(name); i += 1 {
117
-		chr := name[i]
118
-		if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
119
-			continue // alphanumerics
120
-		}
121
-		switch chr {
122
-		case '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', '<', '>', '=':
123
-			continue // benign printable ascii characters
124
-		default:
125
-			return false // potentially confusable ascii like | ' `, non-ascii
126
-		}
127
-	}
128
-	return true
129
-}
130
-
131 111
 // returns true if the given name is a valid ident, using a mix of Insp and
132 112
 // Chary's ident restrictions.
133 113
 func isIdent(name string) bool {
@@ -168,9 +148,7 @@ func Skeleton(name string) (string, error) {
168 148
 	// same as PRECIS:
169 149
 	name = width.Fold.String(name)
170 150
 
171
-	if !isBoring(name) {
172
-		name = confusables.Skeleton(name)
173
-	}
151
+	name = confusables.Skeleton(name)
174 152
 
175 153
 	// internationalized lowercasing for skeletons; this is much more lenient than
176 154
 	// Casefold. In particular, skeletons are expected to mix scripts (which may

+ 8
- 16
irc/strings_test.go View File

@@ -128,18 +128,6 @@ func TestCasefoldName(t *testing.T) {
128 128
 	}
129 129
 }
130 130
 
131
-func TestIsBoring(t *testing.T) {
132
-	assertBoring := func(str string, expected bool) {
133
-		if isBoring(str) != expected {
134
-			t.Errorf("expected [%s] to have boringness [%t], but got [%t]", str, expected, !expected)
135
-		}
136
-	}
137
-
138
-	assertBoring("warning", true)
139
-	assertBoring("phi|ip", false)
140
-	assertBoring("Νικηφόρος", false)
141
-}
142
-
143 131
 func TestIsIdent(t *testing.T) {
144 132
 	assertIdent := func(str string, expected bool) {
145 133
 		if isIdent(str) != expected {
@@ -165,15 +153,15 @@ func TestSkeleton(t *testing.T) {
165 153
 		return skel
166 154
 	}
167 155
 
168
-	if skeleton("warning") == skeleton("waming") {
169
-		t.Errorf("Oragono shouldn't consider rn confusable with m")
156
+	if skeleton("warning") != skeleton("waming") {
157
+		t.Errorf("i give up, Oragono should consider rn confusable with m")
170 158
 	}
171 159
 
172 160
 	if skeleton("Phi|ip") != "philip" {
173 161
 		t.Errorf("but we still consider pipe confusable with l")
174 162
 	}
175 163
 
176
-	if skeleton("smt") != "smt" {
164
+	if skeleton("smt") != skeleton("smt") {
177 165
 		t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
178 166
 	}
179 167
 
@@ -181,7 +169,7 @@ func TestSkeleton(t *testing.T) {
181 169
 		t.Errorf("after skeletonizing, we should casefold")
182 170
 	}
183 171
 
184
-	if skeleton("smt") != "smt" {
172
+	if skeleton("smt") != skeleton("smt") {
185 173
 		t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
186 174
 	}
187 175
 
@@ -189,6 +177,10 @@ func TestSkeleton(t *testing.T) {
189 177
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
190 178
 	}
191 179
 
180
+	if skeleton("еmily") != skeleton("emily") {
181
+		t.Errorf("we must protect against cyrillic homoglyph attacks")
182
+	}
183
+
192 184
 	if skeleton("РОТАТО") != "potato" {
193 185
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
194 186
 	}

Loading…
Cancel
Save