formatter.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. # mautrix-signal - A Matrix-Signal puppeting bridge
  2. # Copyright (C) 2020 Tulir Asokan
  3. #
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU Affero General Public License as published by
  6. # the Free Software Foundation, either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU Affero General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. from typing import List, Tuple, cast
  17. from html import escape
  18. import struct
  19. from mautrix.types import Format, MessageType, TextMessageEventContent
  20. from mautrix.util.formatter import EntityString, EntityType, MarkdownString
  21. from mautrix.util.formatter import MatrixParser as BaseMatrixParser
  22. from mautrix.util.formatter import SimpleEntity
  23. from mausignald.types import Address, Mention, MessageData
  24. from . import puppet as pu
  25. from . import user as u
  26. # Helper methods from rom https://github.com/LonamiWebs/Telethon/blob/master/telethon/helpers.py
  27. # I don't know if this is how Signal actually calculates lengths, but it seems
  28. # to work better than plain len()
  29. def add_surrogate(text: str) -> str:
  30. return "".join(
  31. "".join(chr(y) for y in struct.unpack("<HH", x.encode("utf-16le")))
  32. if (0x10000 <= ord(x) <= 0x10FFFF)
  33. else x
  34. for x in text
  35. )
  36. def del_surrogate(text: str) -> str:
  37. return text.encode("utf-16", "surrogatepass").decode("utf-16")
  38. async def signal_to_matrix(message: MessageData) -> TextMessageEventContent:
  39. content = TextMessageEventContent(msgtype=MessageType.TEXT, body=message.body)
  40. surrogated_text = add_surrogate(message.body)
  41. if message.mentions:
  42. text_chunks = []
  43. html_chunks = []
  44. last_offset = 0
  45. for mention in message.mentions:
  46. before = surrogated_text[last_offset : mention.start]
  47. last_offset = mention.start + mention.length
  48. text_chunks.append(before)
  49. html_chunks.append(escape(before))
  50. puppet = await pu.Puppet.get_by_address(Address(uuid=mention.uuid))
  51. name = add_surrogate(puppet.name or puppet.mxid)
  52. text_chunks.append(name)
  53. html_chunks.append(f'<a href="https://matrix.to/#/{puppet.mxid}">{name}</a>')
  54. end = surrogated_text[last_offset:]
  55. text_chunks.append(end)
  56. html_chunks.append(escape(end))
  57. content.body = del_surrogate("".join(text_chunks))
  58. content.format = Format.HTML
  59. content.formatted_body = del_surrogate("".join(html_chunks))
  60. return content
  61. # TODO this has a lot of duplication with mautrix-facebook, maybe move to mautrix-python
  62. class SignalFormatString(EntityString[SimpleEntity, EntityType], MarkdownString):
  63. def format(self, entity_type: EntityType, **kwargs) -> "SignalFormatString":
  64. prefix = suffix = ""
  65. if entity_type == EntityType.USER_MENTION:
  66. self.entities.append(
  67. SimpleEntity(
  68. type=entity_type,
  69. offset=0,
  70. length=len(self.text),
  71. extra_info={"user_id": kwargs["user_id"]},
  72. )
  73. )
  74. return self
  75. elif entity_type == EntityType.BOLD:
  76. prefix = suffix = "**"
  77. elif entity_type == EntityType.ITALIC:
  78. prefix = suffix = "_"
  79. elif entity_type == EntityType.STRIKETHROUGH:
  80. prefix = suffix = "~~"
  81. elif entity_type == EntityType.URL:
  82. if kwargs["url"] != self.text:
  83. suffix = f" ({kwargs['url']})"
  84. elif entity_type == EntityType.PREFORMATTED:
  85. prefix = f"```{kwargs['language']}\n"
  86. suffix = "\n```"
  87. elif entity_type == EntityType.INLINE_CODE:
  88. prefix = suffix = "`"
  89. elif entity_type == EntityType.BLOCKQUOTE:
  90. children = self.trim().split("\n")
  91. children = [child.prepend("> ") for child in children]
  92. return self.join(children, "\n")
  93. elif entity_type == EntityType.HEADER:
  94. prefix = "#" * kwargs["size"] + " "
  95. else:
  96. return self
  97. self._offset_entities(len(prefix))
  98. self.text = f"{prefix}{self.text}{suffix}"
  99. return self
  100. class MatrixParser(BaseMatrixParser[SignalFormatString]):
  101. fs = SignalFormatString
  102. @classmethod
  103. def parse(cls, data: str) -> SignalFormatString:
  104. return cast(SignalFormatString, super().parse(data))
  105. async def matrix_to_signal(content: TextMessageEventContent) -> Tuple[str, List[Mention]]:
  106. if content.msgtype == MessageType.EMOTE:
  107. content.body = f"/me {content.body}"
  108. if content.formatted_body:
  109. content.formatted_body = f"/me {content.formatted_body}"
  110. mentions = []
  111. if content.format == Format.HTML and content.formatted_body:
  112. parsed = MatrixParser.parse(add_surrogate(content.formatted_body))
  113. text = del_surrogate(parsed.text)
  114. for mention in parsed.entities:
  115. mxid = mention.extra_info["user_id"]
  116. user = await u.User.get_by_mxid(mxid, create=False)
  117. if user and user.uuid:
  118. uuid = user.uuid
  119. else:
  120. puppet = await pu.Puppet.get_by_mxid(mxid, create=False)
  121. if puppet:
  122. uuid = puppet.uuid
  123. else:
  124. continue
  125. mentions.append(Mention(uuid=uuid, start=mention.offset, length=mention.length))
  126. else:
  127. text = content.body
  128. return text, mentions