userids.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. package gomatrix
  2. import (
  3. "bytes"
  4. "encoding/hex"
  5. "fmt"
  6. "strings"
  7. )
  8. const lowerhex = "0123456789abcdef"
  9. // encode the given byte using quoted-printable encoding (e.g "=2f")
  10. // and writes it to the buffer
  11. // See https://golang.org/src/mime/quotedprintable/writer.go
  12. func encode(buf *bytes.Buffer, b byte) {
  13. buf.WriteByte('=')
  14. buf.WriteByte(lowerhex[b>>4])
  15. buf.WriteByte(lowerhex[b&0x0f])
  16. }
  17. // escape the given alpha character and writes it to the buffer
  18. func escape(buf *bytes.Buffer, b byte) {
  19. buf.WriteByte('_')
  20. if b == '_' {
  21. buf.WriteByte('_') // another _
  22. } else {
  23. buf.WriteByte(b + 0x20) // ASCII shift A-Z to a-z
  24. }
  25. }
  26. func shouldEncode(b byte) bool {
  27. return b != '-' && b != '.' && b != '_' && !(b >= '0' && b <= '9') && !(b >= 'a' && b <= 'z') && !(b >= 'A' && b <= 'Z')
  28. }
  29. func shouldEscape(b byte) bool {
  30. return (b >= 'A' && b <= 'Z') || b == '_'
  31. }
  32. func isValidByte(b byte) bool {
  33. return isValidEscapedChar(b) || (b >= '0' && b <= '9') || b == '.' || b == '=' || b == '-'
  34. }
  35. func isValidEscapedChar(b byte) bool {
  36. return b == '_' || (b >= 'a' && b <= 'z')
  37. }
  38. // EncodeUserLocalpart encodes the given string into Matrix-compliant user ID localpart form.
  39. // See http://matrix.org/docs/spec/intro.html#mapping-from-other-character-sets
  40. //
  41. // This returns a string with only the characters "a-z0-9._=-". The uppercase range A-Z
  42. // are encoded using leading underscores ("_"). Characters outside the aforementioned ranges
  43. // (including literal underscores ("_") and equals ("=")) are encoded as UTF8 code points (NOT NCRs)
  44. // and converted to lower-case hex with a leading "=". For example:
  45. // Alph@Bet_50up => _alph=40_bet=5f50up
  46. func EncodeUserLocalpart(str string) string {
  47. strBytes := []byte(str)
  48. var outputBuffer bytes.Buffer
  49. for _, b := range strBytes {
  50. if shouldEncode(b) {
  51. encode(&outputBuffer, b)
  52. } else if shouldEscape(b) {
  53. escape(&outputBuffer, b)
  54. } else {
  55. outputBuffer.WriteByte(b)
  56. }
  57. }
  58. return outputBuffer.String()
  59. }
  60. // DecodeUserLocalpart decodes the given string back into the original input string.
  61. // Returns an error if the given string is not a valid user ID localpart encoding.
  62. // See http://matrix.org/docs/spec/intro.html#mapping-from-other-character-sets
  63. //
  64. // This decodes quoted-printable bytes back into UTF8, and unescapes casing. For
  65. // example:
  66. // _alph=40_bet=5f50up => Alph@Bet_50up
  67. // Returns an error if the input string contains characters outside the
  68. // range "a-z0-9._=-", has an invalid quote-printable byte (e.g. not hex), or has
  69. // an invalid _ escaped byte (e.g. "_5").
  70. func DecodeUserLocalpart(str string) (string, error) {
  71. strBytes := []byte(str)
  72. var outputBuffer bytes.Buffer
  73. for i := 0; i < len(strBytes); i++ {
  74. b := strBytes[i]
  75. if !isValidByte(b) {
  76. return "", fmt.Errorf("Byte pos %d: Invalid byte", i)
  77. }
  78. if b == '_' { // next byte is a-z and should be upper-case or is another _ and should be a literal _
  79. if i+1 >= len(strBytes) {
  80. return "", fmt.Errorf("Byte pos %d: expected _[a-z_] encoding but ran out of string", i)
  81. }
  82. if !isValidEscapedChar(strBytes[i+1]) { // invalid escaping
  83. return "", fmt.Errorf("Byte pos %d: expected _[a-z_] encoding", i)
  84. }
  85. if strBytes[i+1] == '_' {
  86. outputBuffer.WriteByte('_')
  87. } else {
  88. outputBuffer.WriteByte(strBytes[i+1] - 0x20) // ASCII shift a-z to A-Z
  89. }
  90. i++ // skip next byte since we just handled it
  91. } else if b == '=' { // next 2 bytes are hex and should be buffered ready to be read as utf8
  92. if i+2 >= len(strBytes) {
  93. return "", fmt.Errorf("Byte pos: %d: expected quote-printable encoding but ran out of string", i)
  94. }
  95. dst := make([]byte, 1)
  96. _, err := hex.Decode(dst, strBytes[i+1:i+3])
  97. if err != nil {
  98. return "", err
  99. }
  100. outputBuffer.WriteByte(dst[0])
  101. i += 2 // skip next 2 bytes since we just handled it
  102. } else { // pass through
  103. outputBuffer.WriteByte(b)
  104. }
  105. }
  106. return outputBuffer.String(), nil
  107. }
  108. // ExtractUserLocalpart extracts the localpart portion of a user ID.
  109. // See http://matrix.org/docs/spec/intro.html#user-identifiers
  110. func ExtractUserLocalpart(userID string) (string, error) {
  111. if len(userID) == 0 || userID[0] != '@' {
  112. return "", fmt.Errorf("%s is not a valid user id", userID)
  113. }
  114. return strings.TrimPrefix(
  115. strings.SplitN(userID, ":", 2)[0], // @foo:bar:8448 => [ "@foo", "bar:8448" ]
  116. "@", // remove "@" prefix
  117. ), nil
  118. }