You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

strings.go 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. // Copyright (c) 2012-2014 Jeremy Latt
  2. // Copyright (c) 2014-2015 Edmund Huber
  3. // Copyright (c) 2016-2017 Daniel Oaks <daniel@danieloaks.net>
  4. // released under the MIT license
  5. package irc
  6. import (
  7. "fmt"
  8. "strings"
  9. "github.com/oragono/confusables"
  10. "golang.org/x/text/cases"
  11. "golang.org/x/text/language"
  12. "golang.org/x/text/secure/precis"
  13. "golang.org/x/text/width"
  14. )
  15. const (
  16. casemappingName = "rfc8265"
  17. )
  18. // Each pass of PRECIS casefolding is a composition of idempotent operations,
  19. // but not idempotent itself. Therefore, the spec says "do it four times and hope
  20. // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
  21. // which provides this functionality, but unfortunately it's not exposed publicly.
  22. func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
  23. str = oldStr
  24. // follow the stabilizing rules laid out here:
  25. // https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
  26. for i := 0; i < 4; i++ {
  27. str, err = profile.CompareKey(str)
  28. if err != nil {
  29. return "", err
  30. }
  31. if oldStr == str {
  32. break
  33. }
  34. oldStr = str
  35. }
  36. if oldStr != str {
  37. return "", errCouldNotStabilize
  38. }
  39. return str, nil
  40. }
  41. // Casefold returns a casefolded string, without doing any name or channel character checks.
  42. func Casefold(str string) (string, error) {
  43. return iterateFolding(precis.UsernameCaseMapped, str)
  44. }
  45. // CasefoldChannel returns a casefolded version of a channel name.
  46. func CasefoldChannel(name string) (string, error) {
  47. if len(name) == 0 {
  48. return "", errStringIsEmpty
  49. }
  50. // don't casefold the preceding #'s
  51. var start int
  52. for start = 0; start < len(name) && name[start] == '#'; start += 1 {
  53. }
  54. if start == 0 {
  55. // no preceding #'s
  56. return "", errInvalidCharacter
  57. }
  58. lowered, err := Casefold(name[start:])
  59. if err != nil {
  60. return "", err
  61. }
  62. // space can't be used
  63. // , is used as a separator
  64. // * is used in mask matching
  65. // ? is used in mask matching
  66. if strings.ContainsAny(lowered, " ,*?") {
  67. return "", errInvalidCharacter
  68. }
  69. return name[:start] + lowered, err
  70. }
  71. // CasefoldName returns a casefolded version of a nick/user name.
  72. func CasefoldName(name string) (string, error) {
  73. lowered, err := Casefold(name)
  74. if err != nil {
  75. return "", err
  76. } else if len(lowered) == 0 {
  77. return "", errStringIsEmpty
  78. }
  79. // space can't be used
  80. // , is used as a separator
  81. // * is used in mask matching
  82. // ? is used in mask matching
  83. // . denotes a server name
  84. // ! separates nickname from username
  85. // @ separates username from hostname
  86. // : means trailing
  87. // # is a channel prefix
  88. // ~&@%+ are channel membership prefixes
  89. // - I feel like disallowing
  90. if strings.ContainsAny(lowered, " ,*?.!@:") || strings.ContainsAny(string(lowered[0]), "#~&@%+-") {
  91. return "", errInvalidCharacter
  92. }
  93. return lowered, err
  94. }
  95. // returns true if the given name is a valid ident, using a mix of Insp and
  96. // Chary's ident restrictions.
  97. func isIdent(name string) bool {
  98. if len(name) < 1 {
  99. return false
  100. }
  101. for i := 0; i < len(name); i++ {
  102. chr := name[i]
  103. if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
  104. continue // alphanumerics
  105. }
  106. if i == 0 {
  107. return false // first char must be alnum
  108. }
  109. switch chr {
  110. case '[', '\\', ']', '^', '_', '{', '|', '}', '-', '.', '`':
  111. continue // allowed chars
  112. default:
  113. return false // disallowed chars
  114. }
  115. }
  116. return true
  117. }
  118. // Skeleton produces a canonicalized identifier that tries to catch
  119. // homoglyphic / confusable identifiers. It's a tweaked version of the TR39
  120. // skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
  121. // because casefolding first would lose some information about visual confusability.
  122. // This has the weird consequence that the skeleton is not a function of the
  123. // casefolded identifier --- therefore it must always be computed
  124. // from the original (unfolded) identifier and stored/tracked separately from the
  125. // casefolded identifier.
  126. func Skeleton(name string) (string, error) {
  127. // XXX the confusables table includes some, but not all, fullwidth->standard
  128. // mappings for latin characters. do a pass of explicit width folding,
  129. // same as PRECIS:
  130. name = width.Fold.String(name)
  131. name = confusables.SkeletonTweaked(name)
  132. // internationalized lowercasing for skeletons; this is much more lenient than
  133. // Casefold. In particular, skeletons are expected to mix scripts (which may
  134. // violate the bidi rule). We also don't care if they contain runes
  135. // that are disallowed by PRECIS, because every identifier must independently
  136. // pass PRECIS --- we are just further canonicalizing the skeleton.
  137. return cases.Lower(language.Und).String(name), nil
  138. }
  139. // maps a nickmask fragment to an expanded, casefolded wildcard:
  140. // Shivaram@good-fortune -> *!shivaram@good-fortune
  141. // EDMUND -> edmund!*@*
  142. func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) {
  143. var nick, user, host string
  144. bangIndex := strings.IndexByte(userhost, '!')
  145. strudelIndex := strings.IndexByte(userhost, '@')
  146. if bangIndex != -1 && bangIndex < strudelIndex {
  147. nick = userhost[:bangIndex]
  148. user = userhost[bangIndex+1 : strudelIndex]
  149. host = userhost[strudelIndex+1:]
  150. } else if bangIndex != -1 && strudelIndex == -1 {
  151. nick = userhost[:bangIndex]
  152. user = userhost[bangIndex+1:]
  153. } else if bangIndex != -1 && strudelIndex < bangIndex {
  154. // @ before !, fail
  155. return "", errNicknameInvalid
  156. } else if bangIndex == -1 && strudelIndex != -1 {
  157. user = userhost[:strudelIndex]
  158. host = userhost[strudelIndex+1:]
  159. } else if bangIndex == -1 && strudelIndex == -1 {
  160. nick = userhost
  161. } else {
  162. // shouldn't be possible
  163. return "", errInvalidParams
  164. }
  165. if nick == "" {
  166. nick = "*"
  167. }
  168. if nick != "*" {
  169. // XXX wildcards are not accepted with most unicode nicks,
  170. // because the * character breaks casefolding
  171. nick, err = Casefold(nick)
  172. if err != nil {
  173. return "", err
  174. }
  175. }
  176. if user == "" {
  177. user = "*"
  178. }
  179. if user != "*" {
  180. user = strings.ToLower(user)
  181. }
  182. if host == "" {
  183. host = "*"
  184. }
  185. if host != "*" {
  186. host = strings.ToLower(host)
  187. }
  188. return fmt.Sprintf("%s!%s@%s", nick, user, host), nil
  189. }