You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

strings.go 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. // Copyright (c) 2012-2014 Jeremy Latt
  2. // Copyright (c) 2014-2015 Edmund Huber
  3. // Copyright (c) 2016-2017 Daniel Oaks <daniel@danieloaks.net>
  4. // released under the MIT license
  5. package irc
  6. import (
  7. "fmt"
  8. "regexp"
  9. "strings"
  10. "github.com/ergochat/confusables"
  11. "golang.org/x/text/cases"
  12. "golang.org/x/text/secure/precis"
  13. "golang.org/x/text/unicode/norm"
  14. "golang.org/x/text/width"
  15. "github.com/ergochat/ergo/irc/utils"
  16. )
  17. const (
  18. precisUTF8MappingToken = "rfc8265"
  19. // space can't be used
  20. // , is used as a separator
  21. // * is used in mask matching
  22. // ? is used in mask matching
  23. // . denotes a server name
  24. // ! separates nickname from username
  25. // @ separates username from hostname
  26. // : means trailing
  27. protocolBreakingNameCharacters = " ,*?.!@:"
  28. // #1436: we discovered that these characters are problematic,
  29. // so we're disallowing them in new nicks/account names, but allowing
  30. // previously registered names
  31. disfavoredNameCharacters = `<>'";#`
  32. )
  33. var (
  34. // reviving the old ergonomadic nickname regex:
  35. // in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols
  36. permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`)
  37. )
  38. type Casemapping uint
  39. const (
  40. // "precis" is the default / zero value:
  41. // casefolding/validation: PRECIS + ircd restrictions (like no *)
  42. // confusables detection: standard skeleton algorithm
  43. CasemappingPRECIS Casemapping = iota
  44. // "ascii" is the traditional ircd behavior:
  45. // casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing
  46. // confusables detection: none
  47. CasemappingASCII
  48. // "permissive" is an insecure mode:
  49. // casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding
  50. // confusables detection: standard skeleton algorithm (which may be ineffective
  51. // over the larger set of permitted identifiers)
  52. CasemappingPermissive
  53. // rfc1459 is a legacy mapping as defined here: https://modern.ircdocs.horse/#casemapping-parameter
  54. CasemappingRFC1459
  55. // rfc1459-strict is a legacy mapping as defined here: https://modern.ircdocs.horse/#casemapping-parameter
  56. CasemappingRFC1459Strict
  57. )
  58. // XXX this is a global variable without explicit synchronization.
  59. // it gets set during the initial Server.applyConfig and cannot be changed by rehash:
  60. // this happens-before all IRC connections and all casefolding operations.
  61. var globalCasemappingSetting Casemapping = CasemappingPRECIS
  62. // XXX analogous unsynchronized global variable controlling utf8 validation
  63. // if this is off, you get the traditional IRC behavior (relaying any valid RFC1459
  64. // octets) and invalid utf8 messages are silently dropped for websocket clients only.
  65. // if this is on, invalid utf8 inputs get a FAIL reply.
  66. var globalUtf8EnforcementSetting bool
  67. // Each pass of PRECIS casefolding is a composition of idempotent operations,
  68. // but not idempotent itself. Therefore, the spec says "do it four times and hope
  69. // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
  70. // which provides this functionality, but unfortunately it's not exposed publicly.
  71. func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
  72. str = oldStr
  73. // follow the stabilizing rules laid out here:
  74. // https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
  75. for i := 0; i < 4; i++ {
  76. str, err = profile.CompareKey(str)
  77. if err != nil {
  78. return "", err
  79. }
  80. if oldStr == str {
  81. break
  82. }
  83. oldStr = str
  84. }
  85. if oldStr != str {
  86. return "", errCouldNotStabilize
  87. }
  88. return str, nil
  89. }
  90. // Casefold returns a casefolded string, without doing any name or channel character checks.
  91. func Casefold(str string) (string, error) {
  92. return casefoldWithSetting(str, globalCasemappingSetting)
  93. }
  94. func casefoldWithSetting(str string, setting Casemapping) (string, error) {
  95. switch setting {
  96. default:
  97. return iterateFolding(precis.UsernameCaseMapped, str)
  98. case CasemappingASCII:
  99. return foldASCII(str)
  100. case CasemappingPermissive:
  101. return foldPermissive(str)
  102. case CasemappingRFC1459:
  103. return foldRFC1459(str, false)
  104. case CasemappingRFC1459Strict:
  105. return foldRFC1459(str, true)
  106. }
  107. }
  108. // CasefoldChannel returns a casefolded version of a channel name.
  109. func CasefoldChannel(name string) (string, error) {
  110. if len(name) == 0 {
  111. return "", errStringIsEmpty
  112. }
  113. // don't casefold the preceding #'s
  114. var start int
  115. for start = 0; start < len(name) && name[start] == '#'; start += 1 {
  116. }
  117. if start == 0 {
  118. // no preceding #'s
  119. return "", errInvalidCharacter
  120. }
  121. lowered, err := Casefold(name[start:])
  122. if err != nil {
  123. return "", err
  124. }
  125. // space can't be used
  126. // , is used as a separator
  127. // * is used in mask matching
  128. // ? is used in mask matching
  129. if strings.ContainsAny(lowered, " ,*?") {
  130. return "", errInvalidCharacter
  131. }
  132. return name[:start] + lowered, err
  133. }
  134. // CasefoldName returns a casefolded version of a nick/user name.
  135. func CasefoldName(name string) (string, error) {
  136. lowered, err := Casefold(name)
  137. if err != nil {
  138. return "", err
  139. } else if len(lowered) == 0 {
  140. return "", errStringIsEmpty
  141. }
  142. // # is a channel prefix
  143. // ~&@%+ are channel membership prefixes
  144. // - I feel like disallowing
  145. if strings.ContainsAny(lowered, protocolBreakingNameCharacters) || strings.ContainsAny(string(lowered[0]), "#~&@%+-") {
  146. return "", errInvalidCharacter
  147. }
  148. return lowered, err
  149. }
  150. // CasefoldTarget returns a casefolded version of an IRC target, i.e.
  151. // it determines whether the target is a channel name or nickname and
  152. // applies the appropriate casefolding rules.
  153. func CasefoldTarget(name string) (string, error) {
  154. if strings.HasPrefix(name, "#") {
  155. return CasefoldChannel(name)
  156. } else {
  157. return CasefoldName(name)
  158. }
  159. }
  160. // returns true if the given name is a valid ident, using a mix of Insp and
  161. // Chary's ident restrictions.
  162. func isIdent(name string) bool {
  163. if len(name) < 1 {
  164. return false
  165. }
  166. for i := 0; i < len(name); i++ {
  167. chr := name[i]
  168. if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
  169. continue // alphanumerics
  170. }
  171. if i == 0 {
  172. return false // first char must be alnum
  173. }
  174. switch chr {
  175. case '[', '\\', ']', '^', '_', '{', '|', '}', '-', '.', '`':
  176. continue // allowed chars
  177. default:
  178. return false // disallowed chars
  179. }
  180. }
  181. return true
  182. }
  183. // Skeleton produces a canonicalized identifier that tries to catch
  184. // homoglyphic / confusable identifiers. It's a tweaked version of the TR39
  185. // skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
  186. // because casefolding first would lose some information about visual confusability.
  187. // This has the weird consequence that the skeleton is not a function of the
  188. // casefolded identifier --- therefore it must always be computed
  189. // from the original (unfolded) identifier and stored/tracked separately from the
  190. // casefolded identifier.
  191. func Skeleton(name string) (string, error) {
  192. switch globalCasemappingSetting {
  193. default:
  194. return realSkeleton(name)
  195. case CasemappingASCII, CasemappingRFC1459, CasemappingRFC1459Strict:
  196. // identity function is fine because we independently case-normalize in Casefold
  197. return name, nil
  198. }
  199. }
  200. func realSkeleton(name string) (string, error) {
  201. // XXX the confusables table includes some, but not all, fullwidth->standard
  202. // mappings for latin characters. do a pass of explicit width folding,
  203. // same as PRECIS:
  204. name = width.Fold.String(name)
  205. name = confusables.SkeletonTweaked(name)
  206. // internationalized lowercasing for skeletons; this is much more lenient than
  207. // Casefold. In particular, skeletons are expected to mix scripts (which may
  208. // violate the bidi rule). We also don't care if they contain runes
  209. // that are disallowed by PRECIS, because every identifier must independently
  210. // pass PRECIS --- we are just further canonicalizing the skeleton.
  211. return cases.Fold().String(name), nil
  212. }
  213. // maps a nickmask fragment to an expanded, casefolded wildcard:
  214. // Shivaram@good-fortune -> *!shivaram@good-fortune
  215. // EDMUND -> edmund!*@*
  216. func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) {
  217. userhost = strings.TrimSpace(userhost)
  218. var nick, user, host string
  219. bangIndex := strings.IndexByte(userhost, '!')
  220. strudelIndex := strings.IndexByte(userhost, '@')
  221. if bangIndex != -1 && bangIndex < strudelIndex {
  222. nick = userhost[:bangIndex]
  223. user = userhost[bangIndex+1 : strudelIndex]
  224. host = userhost[strudelIndex+1:]
  225. } else if bangIndex != -1 && strudelIndex == -1 {
  226. nick = userhost[:bangIndex]
  227. user = userhost[bangIndex+1:]
  228. } else if bangIndex != -1 && strudelIndex < bangIndex {
  229. // @ before !, fail
  230. return "", errNicknameInvalid
  231. } else if bangIndex == -1 && strudelIndex != -1 {
  232. user = userhost[:strudelIndex]
  233. host = userhost[strudelIndex+1:]
  234. } else if bangIndex == -1 && strudelIndex == -1 {
  235. nick = userhost
  236. } else {
  237. // shouldn't be possible
  238. return "", errInvalidParams
  239. }
  240. if nick == "" {
  241. nick = "*"
  242. }
  243. if nick != "*" {
  244. // XXX wildcards are not accepted with most unicode nicks,
  245. // because the * character breaks casefolding
  246. nick, err = Casefold(nick)
  247. if err != nil {
  248. return "", err
  249. }
  250. }
  251. if user == "" {
  252. user = "*"
  253. }
  254. if user != "*" {
  255. user = strings.ToLower(user)
  256. }
  257. if host == "" {
  258. host = "*"
  259. }
  260. if host != "*" {
  261. host = strings.ToLower(host)
  262. }
  263. expanded = fmt.Sprintf("%s!%s@%s", nick, user, host)
  264. if utils.SafeErrorParam(expanded) != expanded {
  265. err = errInvalidCharacter
  266. }
  267. return
  268. }
  269. func foldASCII(str string) (result string, err error) {
  270. if !IsPrintableASCII(str) {
  271. return "", errInvalidCharacter
  272. }
  273. return strings.ToLower(str), nil
  274. }
  275. var (
  276. rfc1459Replacer = strings.NewReplacer("[", "{", "]", "}", "\\", "|", "~", "^")
  277. rfc1459StrictReplacer = strings.NewReplacer("[", "{", "]", "}", "\\", "|")
  278. )
  279. func foldRFC1459(str string, strict bool) (result string, err error) {
  280. asciiFold, err := foldASCII(str)
  281. if err != nil {
  282. return "", err
  283. }
  284. replacer := rfc1459Replacer
  285. if strict {
  286. replacer = rfc1459StrictReplacer
  287. }
  288. return replacer.Replace(asciiFold), nil
  289. }
  290. func IsPrintableASCII(str string) bool {
  291. for i := 0; i < len(str); i++ {
  292. // allow space here because it's technically printable;
  293. // it will be disallowed later by CasefoldName/CasefoldChannel
  294. chr := str[i]
  295. if chr < ' ' || chr > '~' {
  296. return false
  297. }
  298. }
  299. return true
  300. }
  301. func foldPermissive(str string) (result string, err error) {
  302. if !permissiveCharsRegex.MatchString(str) {
  303. return "", errInvalidCharacter
  304. }
  305. // YOLO
  306. str = norm.NFD.String(str)
  307. str = cases.Fold().String(str)
  308. str = norm.NFD.String(str)
  309. return str, nil
  310. }
  311. // Reduce, e.g., `alice!~u@host` to `alice`
  312. func NUHToNick(nuh string) (nick string) {
  313. if idx := strings.IndexByte(nuh, '!'); idx != -1 {
  314. return nuh[0:idx]
  315. }
  316. return nuh
  317. }