formatter.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. # mautrix-signal - A Matrix-Signal puppeting bridge
  2. # Copyright (C) 2020 Tulir Asokan
  3. #
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU Affero General Public License as published by
  6. # the Free Software Foundation, either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU Affero General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. from typing import List, Tuple, cast
  17. from html import escape
  18. import struct
  19. from mautrix.types import Format, MessageType, TextMessageEventContent
  20. from mautrix.util.formatter import (
  21. EntityString,
  22. EntityType,
  23. MarkdownString,
  24. MatrixParser as BaseMatrixParser,
  25. SimpleEntity,
  26. )
  27. from mausignald.types import Address, Mention, MessageData
  28. from . import puppet as pu, user as u
  29. # Helper methods from rom https://github.com/LonamiWebs/Telethon/blob/master/telethon/helpers.py
  30. # I don't know if this is how Signal actually calculates lengths, but it seems
  31. # to work better than plain len()
  32. def add_surrogate(text: str) -> str:
  33. return "".join(
  34. "".join(chr(y) for y in struct.unpack("<HH", x.encode("utf-16le")))
  35. if (0x10000 <= ord(x) <= 0x10FFFF)
  36. else x
  37. for x in text
  38. )
  39. def del_surrogate(text: str) -> str:
  40. return text.encode("utf-16", "surrogatepass").decode("utf-16")
  41. async def signal_to_matrix(message: MessageData) -> TextMessageEventContent:
  42. content = TextMessageEventContent(msgtype=MessageType.TEXT, body=message.body)
  43. surrogated_text = add_surrogate(message.body)
  44. if message.mentions:
  45. text_chunks = []
  46. html_chunks = []
  47. last_offset = 0
  48. for mention in message.mentions:
  49. before = surrogated_text[last_offset : mention.start]
  50. last_offset = mention.start + mention.length
  51. text_chunks.append(before)
  52. html_chunks.append(escape(before))
  53. puppet = await pu.Puppet.get_by_address(Address(uuid=mention.uuid))
  54. name = add_surrogate(puppet.name or puppet.mxid)
  55. text_chunks.append(name)
  56. html_chunks.append(f'<a href="https://matrix.to/#/{puppet.mxid}">{name}</a>')
  57. end = surrogated_text[last_offset:]
  58. text_chunks.append(end)
  59. html_chunks.append(escape(end))
  60. content.body = del_surrogate("".join(text_chunks))
  61. content.format = Format.HTML
  62. content.formatted_body = del_surrogate("".join(html_chunks))
  63. return content
  64. # TODO this has a lot of duplication with mautrix-facebook, maybe move to mautrix-python
  65. class SignalFormatString(EntityString[SimpleEntity, EntityType], MarkdownString):
  66. def format(self, entity_type: EntityType, **kwargs) -> "SignalFormatString":
  67. prefix = suffix = ""
  68. if entity_type == EntityType.USER_MENTION:
  69. self.entities.append(
  70. SimpleEntity(
  71. type=entity_type,
  72. offset=0,
  73. length=len(self.text),
  74. extra_info={"user_id": kwargs["user_id"]},
  75. )
  76. )
  77. return self
  78. elif entity_type == EntityType.BOLD:
  79. prefix = suffix = "**"
  80. elif entity_type == EntityType.ITALIC:
  81. prefix = suffix = "_"
  82. elif entity_type == EntityType.STRIKETHROUGH:
  83. prefix = suffix = "~~"
  84. elif entity_type == EntityType.URL:
  85. if kwargs["url"] != self.text:
  86. suffix = f" ({kwargs['url']})"
  87. elif entity_type == EntityType.PREFORMATTED:
  88. prefix = f"```{kwargs['language']}\n"
  89. suffix = "\n```"
  90. elif entity_type == EntityType.INLINE_CODE:
  91. prefix = suffix = "`"
  92. elif entity_type == EntityType.BLOCKQUOTE:
  93. children = self.trim().split("\n")
  94. children = [child.prepend("> ") for child in children]
  95. return self.join(children, "\n")
  96. elif entity_type == EntityType.HEADER:
  97. prefix = "#" * kwargs["size"] + " "
  98. else:
  99. return self
  100. self._offset_entities(len(prefix))
  101. self.text = f"{prefix}{self.text}{suffix}"
  102. return self
  103. class MatrixParser(BaseMatrixParser[SignalFormatString]):
  104. fs = SignalFormatString
  105. @classmethod
  106. def parse(cls, data: str) -> SignalFormatString:
  107. return cast(SignalFormatString, super().parse(data))
  108. async def matrix_to_signal(content: TextMessageEventContent) -> Tuple[str, List[Mention]]:
  109. if content.msgtype == MessageType.EMOTE:
  110. content.body = f"/me {content.body}"
  111. if content.formatted_body:
  112. content.formatted_body = f"/me {content.formatted_body}"
  113. mentions = []
  114. if content.format == Format.HTML and content.formatted_body:
  115. parsed = MatrixParser.parse(add_surrogate(content.formatted_body))
  116. text = del_surrogate(parsed.text)
  117. for mention in parsed.entities:
  118. mxid = mention.extra_info["user_id"]
  119. user = await u.User.get_by_mxid(mxid, create=False)
  120. if user and user.uuid:
  121. uuid = user.uuid
  122. else:
  123. puppet = await pu.Puppet.get_by_mxid(mxid, create=False)
  124. if puppet:
  125. uuid = puppet.uuid
  126. else:
  127. continue
  128. mentions.append(Mention(uuid=uuid, start=mention.offset, length=mention.length))
  129. else:
  130. text = content.body
  131. return text, mentions