You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tweaks.go 2.0KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. package confusables
  2. // these are overrides for the standard confusables table:
  3. // a mapping to "" means "don't map", a mapping to a replacement means
  4. // "replace with this", no entry means "defer to the standard table"
  5. var tweaksMap = map[rune]string{
  6. // ASCII-to-ASCII mapping that we are removing:
  7. 0x6d: "", // m -> rn
  8. // these characters are confusable with m, hence the official table
  9. // maps them to rn (`grep "LATIN SMALL LETTER R, LATIN SMALL LETTER N" confusables.txt`)
  10. 0x118E3: "m", // 118E3 ; 0072 006E ; MA # ( 𑣣 → rn ) WARANG CITI DIGIT THREE → LATIN SMALL LETTER R, LATIN SMALL LETTER N
  11. 0x11700: "m", // 11700 ; 0072 006E ; MA # ( 𑜀 → rn ) AHOM LETTER KA → LATIN SMALL LETTER R, LATIN SMALL LETTER N
  12. // the table thinks this is confusable with m̦ but I think it's confusable with m:
  13. 0x0271: "m", // 0271 ; 0072 006E 0326 ; MA # ( ɱ → rn̦ ) LATIN SMALL LETTER M WITH HOOK → LATIN SMALL LETTER R, LATIN SMALL LETTER N, COMBINING COMMA BELOW # →m̡→
  14. /*
  15. // ASCII-to-ASCII mapping that we are removing:
  16. 0x49: "", // I -> l
  17. // these characters are confusable with I, hence the official table
  18. // maps them to l (`grep "LATIN SMALL LETTER L" confusables.txt`)
  19. 0x0399: "I", // 0399 ; 006C ; MA # ( Ι → l ) GREEK CAPITAL LETTER IOTA → LATIN SMALL LETTER L #
  20. 0x0406: "I", // 0406 ; 006C ; MA # ( І → l ) CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I → LATIN SMALL LETTER L #
  21. 0x04C0: "I", // 04C0 ; 006C ; MA # ( Ӏ → l ) CYRILLIC LETTER PALOCHKA → LATIN SMALL LETTER L #
  22. // ASCII-to-ASCII mapping that we are removing:
  23. 0x31: "", // 1 -> l
  24. // these characters are confusable with 1, hence the official table
  25. // maps them to l (`grep "LATIN SMALL LETTER L" confusables.txt`)
  26. // [nothing yet]
  27. // ASCII-to-ASCII mapping that we are removing:
  28. 0x30: "", // 0 -> O
  29. // these characters are confusable with 0, hence the official table
  30. // maps them to O (`grep "LATIN CAPITAL LETTER O\>" confusables.txt`)
  31. // [nothing yet]
  32. */
  33. }