|
@@ -9,8 +9,10 @@ import (
|
9
|
9
|
"strings"
|
10
|
10
|
|
11
|
11
|
"github.com/oragono/confusables"
|
|
12
|
+ "golang.org/x/text/cases"
|
|
13
|
+ "golang.org/x/text/language"
|
12
|
14
|
"golang.org/x/text/secure/precis"
|
13
|
|
- "golang.org/x/text/unicode/norm"
|
|
15
|
+ "golang.org/x/text/width"
|
14
|
16
|
)
|
15
|
17
|
|
16
|
18
|
const (
|
|
@@ -126,14 +128,6 @@ func isBoring(name string) bool {
|
126
|
128
|
return true
|
127
|
129
|
}
|
128
|
130
|
|
129
|
|
-var skeletonCasefolder = precis.NewIdentifier(precis.FoldWidth, precis.LowerCase(), precis.Norm(norm.NFC))
|
130
|
|
-
|
131
|
|
-// similar to Casefold, but exempts the bidi rule, because skeletons may
|
132
|
|
-// mix scripts strangely
|
133
|
|
-func casefoldSkeleton(str string) (string, error) {
|
134
|
|
- return iterateFolding(skeletonCasefolder, str)
|
135
|
|
-}
|
136
|
|
-
|
137
|
131
|
// Skeleton produces a canonicalized identifier that tries to catch
|
138
|
132
|
// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
|
139
|
133
|
// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
|
|
@@ -146,5 +140,16 @@ func Skeleton(name string) (string, error) {
|
146
|
140
|
if !isBoring(name) {
|
147
|
141
|
name = confusables.Skeleton(name)
|
148
|
142
|
}
|
149
|
|
- return casefoldSkeleton(name)
|
|
143
|
+
|
|
144
|
+ // XXX the confusables table includes some, but not all, fullwidth->standard
|
|
145
|
+ // mappings for latin characters. do a pass of explicit width folding,
|
|
146
|
+ // same as PRECIS:
|
|
147
|
+ name = width.Fold.String(name)
|
|
148
|
+
|
|
149
|
+ // internationalized lowercasing for skeletons; this is much more lenient than
|
|
150
|
+ // Casefold. In particular, skeletons are expected to mix scripts (which may
|
|
151
|
+ // violate the bidi rule). We also don't care if they contain runes
|
|
152
|
+ // that are disallowed by PRECIS, because every identifier must independently
|
|
153
|
+ // pass PRECIS --- we are just further canonicalizing the skeleton.
|
|
154
|
+ return cases.Lower(language.Und).String(name), nil
|
150
|
155
|
}
|