You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

strings.go 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. // Copyright (c) 2012-2014 Jeremy Latt
  2. // Copyright (c) 2014-2015 Edmund Huber
  3. // Copyright (c) 2016-2017 Daniel Oaks <daniel@danieloaks.net>
  4. // released under the MIT license
  5. package irc
  6. import (
  7. "fmt"
  8. "regexp"
  9. "strings"
  10. "github.com/oragono/confusables"
  11. "golang.org/x/text/cases"
  12. "golang.org/x/text/secure/precis"
  13. "golang.org/x/text/unicode/norm"
  14. "golang.org/x/text/width"
  15. )
  16. const (
  17. precisUTF8MappingToken = "rfc8265"
  18. )
  19. var (
  20. // reviving the old ergonomadic nickname regex:
  21. // in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols
  22. permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`)
  23. )
  24. type Casemapping uint
  25. const (
  26. // "precis" is the default / zero value:
  27. // casefolding/validation: PRECIS + ircd restrictions (like no *)
  28. // confusables detection: standard skeleton algorithm
  29. CasemappingPRECIS Casemapping = iota
  30. // "ascii" is the traditional ircd behavior:
  31. // casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing
  32. // confusables detection: none
  33. CasemappingASCII
  34. // "permissive" is an insecure mode:
  35. // casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding
  36. // confusables detection: standard skeleton algorithm (which may be ineffective
  37. // over the larger set of permitted identifiers)
  38. CasemappingPermissive
  39. )
  40. // XXX this is a global variable without explicit synchronization.
  41. // it gets set during the initial Server.applyConfig and cannot be changed by rehash:
  42. // this happens-before all IRC connections and all casefolding operations.
  43. var globalCasemappingSetting Casemapping = CasemappingPRECIS
  44. // Each pass of PRECIS casefolding is a composition of idempotent operations,
  45. // but not idempotent itself. Therefore, the spec says "do it four times and hope
  46. // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
  47. // which provides this functionality, but unfortunately it's not exposed publicly.
  48. func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
  49. str = oldStr
  50. // follow the stabilizing rules laid out here:
  51. // https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
  52. for i := 0; i < 4; i++ {
  53. str, err = profile.CompareKey(str)
  54. if err != nil {
  55. return "", err
  56. }
  57. if oldStr == str {
  58. break
  59. }
  60. oldStr = str
  61. }
  62. if oldStr != str {
  63. return "", errCouldNotStabilize
  64. }
  65. return str, nil
  66. }
  67. // Casefold returns a casefolded string, without doing any name or channel character checks.
  68. func Casefold(str string) (string, error) {
  69. switch globalCasemappingSetting {
  70. default:
  71. return iterateFolding(precis.UsernameCaseMapped, str)
  72. case CasemappingASCII:
  73. return foldASCII(str)
  74. case CasemappingPermissive:
  75. return foldPermissive(str)
  76. }
  77. }
  78. // CasefoldChannel returns a casefolded version of a channel name.
  79. func CasefoldChannel(name string) (string, error) {
  80. if len(name) == 0 {
  81. return "", errStringIsEmpty
  82. }
  83. // don't casefold the preceding #'s
  84. var start int
  85. for start = 0; start < len(name) && name[start] == '#'; start += 1 {
  86. }
  87. if start == 0 {
  88. // no preceding #'s
  89. return "", errInvalidCharacter
  90. }
  91. lowered, err := Casefold(name[start:])
  92. if err != nil {
  93. return "", err
  94. }
  95. // space can't be used
  96. // , is used as a separator
  97. // * is used in mask matching
  98. // ? is used in mask matching
  99. if strings.ContainsAny(lowered, " ,*?") {
  100. return "", errInvalidCharacter
  101. }
  102. return name[:start] + lowered, err
  103. }
  104. // CasefoldName returns a casefolded version of a nick/user name.
  105. func CasefoldName(name string) (string, error) {
  106. lowered, err := Casefold(name)
  107. if err != nil {
  108. return "", err
  109. } else if len(lowered) == 0 {
  110. return "", errStringIsEmpty
  111. }
  112. // space can't be used
  113. // , is used as a separator
  114. // * is used in mask matching
  115. // ? is used in mask matching
  116. // . denotes a server name
  117. // ! separates nickname from username
  118. // @ separates username from hostname
  119. // : means trailing
  120. // # is a channel prefix
  121. // ~&@%+ are channel membership prefixes
  122. // - I feel like disallowing
  123. if strings.ContainsAny(lowered, " ,*?.!@:") || strings.ContainsAny(string(lowered[0]), "#~&@%+-") {
  124. return "", errInvalidCharacter
  125. }
  126. return lowered, err
  127. }
  128. // returns true if the given name is a valid ident, using a mix of Insp and
  129. // Chary's ident restrictions.
  130. func isIdent(name string) bool {
  131. if len(name) < 1 {
  132. return false
  133. }
  134. for i := 0; i < len(name); i++ {
  135. chr := name[i]
  136. if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
  137. continue // alphanumerics
  138. }
  139. if i == 0 {
  140. return false // first char must be alnum
  141. }
  142. switch chr {
  143. case '[', '\\', ']', '^', '_', '{', '|', '}', '-', '.', '`':
  144. continue // allowed chars
  145. default:
  146. return false // disallowed chars
  147. }
  148. }
  149. return true
  150. }
  151. // Skeleton produces a canonicalized identifier that tries to catch
  152. // homoglyphic / confusable identifiers. It's a tweaked version of the TR39
  153. // skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
  154. // because casefolding first would lose some information about visual confusability.
  155. // This has the weird consequence that the skeleton is not a function of the
  156. // casefolded identifier --- therefore it must always be computed
  157. // from the original (unfolded) identifier and stored/tracked separately from the
  158. // casefolded identifier.
  159. func Skeleton(name string) (string, error) {
  160. switch globalCasemappingSetting {
  161. default:
  162. return realSkeleton(name)
  163. case CasemappingASCII:
  164. // identity function is fine because we independently case-normalize in Casefold
  165. return name, nil
  166. }
  167. }
  168. func realSkeleton(name string) (string, error) {
  169. // XXX the confusables table includes some, but not all, fullwidth->standard
  170. // mappings for latin characters. do a pass of explicit width folding,
  171. // same as PRECIS:
  172. name = width.Fold.String(name)
  173. name = confusables.SkeletonTweaked(name)
  174. // internationalized lowercasing for skeletons; this is much more lenient than
  175. // Casefold. In particular, skeletons are expected to mix scripts (which may
  176. // violate the bidi rule). We also don't care if they contain runes
  177. // that are disallowed by PRECIS, because every identifier must independently
  178. // pass PRECIS --- we are just further canonicalizing the skeleton.
  179. return cases.Fold().String(name), nil
  180. }
  181. // maps a nickmask fragment to an expanded, casefolded wildcard:
  182. // Shivaram@good-fortune -> *!shivaram@good-fortune
  183. // EDMUND -> edmund!*@*
  184. func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) {
  185. var nick, user, host string
  186. bangIndex := strings.IndexByte(userhost, '!')
  187. strudelIndex := strings.IndexByte(userhost, '@')
  188. if bangIndex != -1 && bangIndex < strudelIndex {
  189. nick = userhost[:bangIndex]
  190. user = userhost[bangIndex+1 : strudelIndex]
  191. host = userhost[strudelIndex+1:]
  192. } else if bangIndex != -1 && strudelIndex == -1 {
  193. nick = userhost[:bangIndex]
  194. user = userhost[bangIndex+1:]
  195. } else if bangIndex != -1 && strudelIndex < bangIndex {
  196. // @ before !, fail
  197. return "", errNicknameInvalid
  198. } else if bangIndex == -1 && strudelIndex != -1 {
  199. user = userhost[:strudelIndex]
  200. host = userhost[strudelIndex+1:]
  201. } else if bangIndex == -1 && strudelIndex == -1 {
  202. nick = userhost
  203. } else {
  204. // shouldn't be possible
  205. return "", errInvalidParams
  206. }
  207. if nick == "" {
  208. nick = "*"
  209. }
  210. if nick != "*" {
  211. // XXX wildcards are not accepted with most unicode nicks,
  212. // because the * character breaks casefolding
  213. nick, err = Casefold(nick)
  214. if err != nil {
  215. return "", err
  216. }
  217. }
  218. if user == "" {
  219. user = "*"
  220. }
  221. if user != "*" {
  222. user = strings.ToLower(user)
  223. }
  224. if host == "" {
  225. host = "*"
  226. }
  227. if host != "*" {
  228. host = strings.ToLower(host)
  229. }
  230. return fmt.Sprintf("%s!%s@%s", nick, user, host), nil
  231. }
  232. func foldASCII(str string) (result string, err error) {
  233. if !IsPrintableASCII(str) {
  234. return "", errInvalidCharacter
  235. }
  236. return strings.ToLower(str), nil
  237. }
  238. func IsPrintableASCII(str string) bool {
  239. for i := 0; i < len(str); i++ {
  240. // allow space here because it's technically printable;
  241. // it will be disallowed later by CasefoldName/CasefoldChannel
  242. chr := str[i]
  243. if chr < ' ' || chr > '~' {
  244. return false
  245. }
  246. }
  247. return true
  248. }
  249. func foldPermissive(str string) (result string, err error) {
  250. if !permissiveCharsRegex.MatchString(str) {
  251. return "", errInvalidCharacter
  252. }
  253. // YOLO
  254. str = norm.NFD.String(str)
  255. str = cases.Fold().String(str)
  256. str = norm.NFD.String(str)
  257. return str, nil
  258. }