Browse Source

fix an edge case in skeletonization

'm' skeletonizes to 'rn' (but is exempted by the isBoring check),
but the fullwidth 'm' does not skeletonize to anything. The root cause
of this is the (still unexplained) patchiness of the skeleton mapping
for fullwidth -> standard-width Latin characters; the fix is to perform
width mapping first, before either skeletonization or isBoring.
tags/v1.1.0
Shivaram Lingamneni 5 years ago
parent
commit
be4d098945
2 changed files with 8 additions and 4 deletions
  1. 4
    4
      irc/strings.go
  2. 4
    0
      irc/strings_test.go

+ 4
- 4
irc/strings.go View File

163
 // from the original (unfolded) identifier and stored/tracked separately from the
163
 // from the original (unfolded) identifier and stored/tracked separately from the
164
 // casefolded identifier.
164
 // casefolded identifier.
165
 func Skeleton(name string) (string, error) {
165
 func Skeleton(name string) (string, error) {
166
-	if !isBoring(name) {
167
-		name = confusables.Skeleton(name)
168
-	}
169
-
170
 	// XXX the confusables table includes some, but not all, fullwidth->standard
166
 	// XXX the confusables table includes some, but not all, fullwidth->standard
171
 	// mappings for latin characters. do a pass of explicit width folding,
167
 	// mappings for latin characters. do a pass of explicit width folding,
172
 	// same as PRECIS:
168
 	// same as PRECIS:
173
 	name = width.Fold.String(name)
169
 	name = width.Fold.String(name)
174
 
170
 
171
+	if !isBoring(name) {
172
+		name = confusables.Skeleton(name)
173
+	}
174
+
175
 	// internationalized lowercasing for skeletons; this is much more lenient than
175
 	// internationalized lowercasing for skeletons; this is much more lenient than
176
 	// Casefold. In particular, skeletons are expected to mix scripts (which may
176
 	// Casefold. In particular, skeletons are expected to mix scripts (which may
177
 	// violate the bidi rule). We also don't care if they contain runes
177
 	// violate the bidi rule). We also don't care if they contain runes

+ 4
- 0
irc/strings_test.go View File

181
 		t.Errorf("after skeletonizing, we should casefold")
181
 		t.Errorf("after skeletonizing, we should casefold")
182
 	}
182
 	}
183
 
183
 
184
+	if skeleton("smt") != "smt" {
185
+		t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
186
+	}
187
+
184
 	if skeleton("еvan") != "evan" {
188
 	if skeleton("еvan") != "evan" {
185
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
189
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
186
 	}
190
 	}

Loading…
Cancel
Save