You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

unicode.go 1.2KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. // Copyright (c) 2021 Shivaram Lingamneni
  2. // Released under the MIT License
  3. package ircutils
  4. import (
  5. "strings"
  6. "unicode"
  7. "unicode/utf8"
  8. )
  9. // truncate a message, taking care not to make valid UTF8 into invalid UTF8
  10. func TruncateUTF8Safe(message string, byteLimit int) (result string) {
  11. if len(message) <= byteLimit {
  12. return message
  13. }
  14. message = message[:byteLimit]
  15. for i := 0; i < (utf8.UTFMax - 1); i++ {
  16. r, n := utf8.DecodeLastRuneInString(message)
  17. if r == utf8.RuneError && n <= 1 {
  18. message = message[:len(message)-1]
  19. } else {
  20. break
  21. }
  22. }
  23. return message
  24. }
  25. // Sanitizes human-readable text to make it safe for IRC;
  26. // assumes UTF-8 and uses the replacement character where
  27. // applicable.
  28. func SanitizeText(message string, byteLimit int) (result string) {
  29. var buf strings.Builder
  30. for _, r := range message {
  31. if r == '\x00' || r == '\r' {
  32. continue
  33. } else if r == '\n' {
  34. if buf.Len()+2 <= byteLimit {
  35. buf.WriteString(" ")
  36. continue
  37. } else {
  38. break
  39. }
  40. } else if unicode.IsSpace(r) {
  41. if buf.Len()+1 <= byteLimit {
  42. buf.WriteString(" ")
  43. } else {
  44. break
  45. }
  46. } else {
  47. rLen := utf8.RuneLen(r)
  48. if buf.Len()+rLen <= byteLimit {
  49. buf.WriteRune(r)
  50. } else {
  51. break
  52. }
  53. }
  54. }
  55. return buf.String()
  56. }