ソースを参照

Merge pull request #340 from slingamn/skeletonfolding.2

more lenient casefolding for skeletons
tags/v1.0.0-rc1
Daniel Oaks 5年前
コミット
9f25a42c3d
コミッターのメールアドレスに関連付けられたアカウントが存在しません
2個のファイルの変更17行の追加10行の削除
  1. 15
    10
      irc/strings.go
  2. 2
    0
      irc/strings_test.go

+ 15
- 10
irc/strings.go ファイルの表示

@@ -9,8 +9,10 @@ import (
9 9
 	"strings"
10 10
 
11 11
 	"github.com/oragono/confusables"
12
+	"golang.org/x/text/cases"
13
+	"golang.org/x/text/language"
12 14
 	"golang.org/x/text/secure/precis"
13
-	"golang.org/x/text/unicode/norm"
15
+	"golang.org/x/text/width"
14 16
 )
15 17
 
16 18
 const (
@@ -126,14 +128,6 @@ func isBoring(name string) bool {
126 128
 	return true
127 129
 }
128 130
 
129
-var skeletonCasefolder = precis.NewIdentifier(precis.FoldWidth, precis.LowerCase(), precis.Norm(norm.NFC))
130
-
131
-// similar to Casefold, but exempts the bidi rule, because skeletons may
132
-// mix scripts strangely
133
-func casefoldSkeleton(str string) (string, error) {
134
-	return iterateFolding(skeletonCasefolder, str)
135
-}
136
-
137 131
 // Skeleton produces a canonicalized identifier that tries to catch
138 132
 // homoglyphic / confusable identifiers. It's a tweaked version of the TR39
139 133
 // skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
@@ -146,5 +140,16 @@ func Skeleton(name string) (string, error) {
146 140
 	if !isBoring(name) {
147 141
 		name = confusables.Skeleton(name)
148 142
 	}
149
-	return casefoldSkeleton(name)
143
+
144
+	// XXX the confusables table includes some, but not all, fullwidth->standard
145
+	// mappings for latin characters. do a pass of explicit width folding,
146
+	// same as PRECIS:
147
+	name = width.Fold.String(name)
148
+
149
+	// internationalized lowercasing for skeletons; this is much more lenient than
150
+	// Casefold. In particular, skeletons are expected to mix scripts (which may
151
+	// violate the bidi rule). We also don't care if they contain runes
152
+	// that are disallowed by PRECIS, because every identifier must independently
153
+	// pass PRECIS --- we are just further canonicalizing the skeleton.
154
+	return cases.Lower(language.Und).String(name), nil
150 155
 }

+ 2
- 0
irc/strings_test.go ファイルの表示

@@ -173,4 +173,6 @@ func TestSkeleton(t *testing.T) {
173 173
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
174 174
 	}
175 175
 
176
+	// should not raise an error:
177
+	skeleton("けらんぐ")
176 178
 }

読み込み中…
キャンセル
保存