123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- # mautrix-signal - A Matrix-Signal puppeting bridge
- # Copyright (C) 2021 Tulir Asokan
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Affero General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU Affero General Public License for more details.
- #
- # You should have received a copy of the GNU Affero General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- from __future__ import annotations
- from typing import cast
- import html
- import struct
- from mausignald.types import Address, Mention, MessageData
- from mautrix.types import Format, MessageType, TextMessageEventContent, UserID
- from mautrix.util.formatter import (
- EntityString,
- EntityType,
- MarkdownString,
- MatrixParser as BaseMatrixParser,
- SemiAbstractEntity,
- )
- from . import puppet as pu, user as u
- # Helper methods from from https://github.com/LonamiWebs/Telethon/blob/master/telethon/helpers.py
- # I don't know if this is how Signal actually calculates lengths,
- # but it seems to work better than plain len()
- def add_surrogate(text: str) -> str:
- return "".join(
- "".join(chr(y) for y in struct.unpack("<HH", x.encode("utf-16le")))
- if (0x10000 <= ord(x) <= 0x10FFFF)
- else x
- for x in text
- )
- def del_surrogate(text: str) -> str:
- return text.encode("utf-16", "surrogatepass").decode("utf-16")
- async def signal_to_matrix(message: MessageData) -> TextMessageEventContent:
- content = TextMessageEventContent(msgtype=MessageType.TEXT, body=message.body)
- surrogated_text = add_surrogate(message.body)
- if message.mentions:
- text_chunks = []
- html_chunks = []
- last_offset = 0
- for mention in message.mentions:
- before = surrogated_text[last_offset : mention.start]
- last_offset = mention.start + mention.length
- text_chunks.append(before)
- html_chunks.append(html.escape(before))
- puppet = await pu.Puppet.get_by_uuid(mention.uuid)
- name = add_surrogate(puppet.name or puppet.mxid)
- text_chunks.append(name)
- html_chunks.append(f'<a href="https://matrix.to/#/{puppet.mxid}">{name}</a>')
- end = surrogated_text[last_offset:]
- text_chunks.append(end)
- html_chunks.append(html.escape(end))
- content.body = del_surrogate("".join(text_chunks))
- content.format = Format.HTML
- content.formatted_body = del_surrogate("".join(html_chunks))
- return content
- class MentionEntity(Mention, SemiAbstractEntity):
- @property
- def offset(self) -> int:
- return self.start
- @offset.setter
- def offset(self, val: int) -> None:
- self.start = val
- def copy(self) -> MentionEntity:
- return MentionEntity(uuid=self.uuid, length=self.length, start=self.start)
- # TODO this has a lot of duplication with mautrix-facebook, maybe move to mautrix-python
- class SignalFormatString(EntityString[MentionEntity, EntityType], MarkdownString):
- def format(self, entity_type: EntityType, **kwargs) -> SignalFormatString:
- prefix = suffix = ""
- if entity_type == EntityType.USER_MENTION:
- self.entities.append(
- MentionEntity(uuid=kwargs["uuid"], start=0, length=len(self.text)),
- )
- return self
- elif entity_type == EntityType.BOLD:
- prefix = suffix = "**"
- elif entity_type == EntityType.ITALIC:
- prefix = suffix = "_"
- elif entity_type == EntityType.STRIKETHROUGH:
- prefix = suffix = "~~"
- elif entity_type == EntityType.URL:
- if kwargs["url"] != self.text:
- suffix = f" ({kwargs['url']})"
- elif entity_type == EntityType.PREFORMATTED:
- prefix = f"```{kwargs['language']}\n"
- suffix = "\n```"
- elif entity_type == EntityType.INLINE_CODE:
- prefix = suffix = "`"
- elif entity_type == EntityType.BLOCKQUOTE:
- children = self.trim().split("\n")
- children = [child.prepend("> ") for child in children]
- return self.join(children, "\n")
- elif entity_type == EntityType.HEADER:
- prefix = "#" * kwargs["size"] + " "
- else:
- return self
- self._offset_entities(len(prefix))
- self.text = f"{prefix}{self.text}{suffix}"
- return self
- class MatrixParser(BaseMatrixParser[SignalFormatString]):
- fs = SignalFormatString
- async def user_pill_to_fstring(
- self, msg: SignalFormatString, user_id: UserID
- ) -> SignalFormatString:
- user = await u.User.get_by_mxid(user_id, create=False)
- if user and user.uuid:
- uuid = user.uuid
- else:
- puppet = await pu.Puppet.get_by_mxid(user_id, create=False)
- if puppet:
- uuid = puppet.uuid
- else:
- return msg
- return msg.format(self.e.USER_MENTION, uuid=uuid)
- async def parse(self, data: str) -> SignalFormatString:
- return cast(SignalFormatString, await super().parse(data))
- async def matrix_to_signal(content: TextMessageEventContent) -> tuple[str, list[Mention]]:
- if content.msgtype == MessageType.EMOTE:
- content.body = f"/me {content.body}"
- if content.formatted_body:
- content.formatted_body = f"/me {content.formatted_body}"
- if content.format == Format.HTML and content.formatted_body:
- parsed = await MatrixParser().parse(add_surrogate(content.formatted_body))
- text, mentions = del_surrogate(parsed.text), parsed.entities
- else:
- text, mentions = content.body, []
- return text, mentions
|