formatter.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. // mautrix-discord - A Matrix-Discord puppeting bridge.
  2. // Copyright (C) 2023 Tulir Asokan
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package main
  17. import (
  18. "fmt"
  19. "regexp"
  20. "strings"
  21. "github.com/yuin/goldmark"
  22. "github.com/yuin/goldmark/extension"
  23. "github.com/yuin/goldmark/parser"
  24. "github.com/yuin/goldmark/util"
  25. "maunium.net/go/mautrix/event"
  26. "maunium.net/go/mautrix/format"
  27. "maunium.net/go/mautrix/format/mdext"
  28. "maunium.net/go/mautrix/id"
  29. "maunium.net/go/mautrix/util/variationselector"
  30. )
  31. // escapeFixer is a hacky partial fix for the difference in escaping markdown, used with escapeReplacement
  32. //
  33. // Discord allows escaping with just one backslash, e.g. \__a__,
  34. // but standard markdown requires both to be escaped (\_\_a__)
  35. var escapeFixer = regexp.MustCompile(`\\(__[^_]|\*\*[^*])`)
  36. func escapeReplacement(s string) string {
  37. return s[:2] + `\` + s[2:]
  38. }
  39. // indentableParagraphParser is the default paragraph parser with CanAcceptIndentedLine.
  40. // Used when disabling CodeBlockParser (as disabling it without a replacement will make indented blocks disappear).
  41. type indentableParagraphParser struct {
  42. parser.BlockParser
  43. }
  44. var defaultIndentableParagraphParser = &indentableParagraphParser{BlockParser: parser.NewParagraphParser()}
  45. func (b *indentableParagraphParser) CanAcceptIndentedLine() bool {
  46. return true
  47. }
  48. var discordRenderer = goldmark.New(
  49. goldmark.WithParser(mdext.ParserWithoutFeatures(
  50. parser.NewListParser(), parser.NewListItemParser(), parser.NewHTMLBlockParser(), parser.NewRawHTMLParser(),
  51. parser.NewSetextHeadingParser(), parser.NewATXHeadingParser(), parser.NewThematicBreakParser(),
  52. parser.NewLinkParser(), parser.NewCodeBlockParser(),
  53. )),
  54. goldmark.WithParserOptions(parser.WithBlockParsers(util.Prioritized(defaultIndentableParagraphParser, 500))),
  55. format.HTMLOptions,
  56. goldmark.WithExtensions(extension.Strikethrough, mdext.SimpleSpoiler, mdext.DiscordUnderline, ExtDiscordEveryone, ExtDiscordTag),
  57. )
  58. func (portal *Portal) renderDiscordMarkdownOnlyHTML(text string) string {
  59. text = escapeFixer.ReplaceAllStringFunc(text, escapeReplacement)
  60. var buf strings.Builder
  61. ctx := parser.NewContext()
  62. ctx.Set(parserContextPortal, portal)
  63. err := discordRenderer.Convert([]byte(text), &buf, parser.WithContext(ctx))
  64. if err != nil {
  65. panic(fmt.Errorf("markdown parser errored: %w", err))
  66. }
  67. return format.UnwrapSingleParagraph(buf.String())
  68. }
  69. const formatterContextUserKey = "fi.mau.discord.user"
  70. const formatterContextPortalKey = "fi.mau.discord.portal"
  71. func pillConverter(displayname, mxid, eventID string, ctx format.Context) string {
  72. if len(mxid) == 0 {
  73. return displayname
  74. }
  75. user := ctx.ReturnData[formatterContextUserKey].(*User)
  76. if mxid[0] == '#' {
  77. alias, err := user.bridge.Bot.ResolveAlias(id.RoomAlias(mxid))
  78. if err != nil {
  79. return displayname
  80. }
  81. mxid = alias.RoomID.String()
  82. }
  83. if mxid[0] == '!' {
  84. portal := user.bridge.GetPortalByMXID(id.RoomID(mxid))
  85. if portal != nil {
  86. if eventID == "" {
  87. //currentPortal := ctx[formatterContextPortalKey].(*Portal)
  88. return fmt.Sprintf("<#%s>", portal.Key.ChannelID)
  89. //if currentPortal.GuildID == portal.GuildID {
  90. //} else if portal.GuildID != "" {
  91. // return fmt.Sprintf("<#%s:%s:%s>", portal.Key.ChannelID, portal.GuildID, portal.Name)
  92. //} else {
  93. // // TODO is mentioning private channels possible at all?
  94. //}
  95. } else if msg := user.bridge.DB.Message.GetByMXID(portal.Key, id.EventID(eventID)); msg != nil {
  96. guildID := portal.GuildID
  97. if guildID == "" {
  98. guildID = "@me"
  99. }
  100. return fmt.Sprintf("https://discord.com/channels/%s/%s/%s", guildID, msg.DiscordProtoChannelID(), msg.DiscordID)
  101. }
  102. }
  103. } else if mxid[0] == '@' {
  104. parsedID, ok := user.bridge.ParsePuppetMXID(id.UserID(mxid))
  105. if ok {
  106. return fmt.Sprintf("<@%s>", parsedID)
  107. }
  108. mentionedUser := user.bridge.GetUserByMXID(id.UserID(mxid))
  109. if mentionedUser != nil && mentionedUser.DiscordID != "" {
  110. return fmt.Sprintf("<@%s>", mentionedUser.DiscordID)
  111. }
  112. }
  113. return displayname
  114. }
  115. // Discord links start with http:// or https://, contain at least two characters afterwards,
  116. // don't contain < or whitespace anywhere, and don't end with "'),.:;]
  117. //
  118. // Zero-width whitespace is mostly in the Format category and is allowed, except \uFEFF isn't for some reason
  119. var discordLinkRegex = regexp.MustCompile(`https?://[^<\p{Zs}\x{feff}]*[^"'),.:;\]\p{Zs}\x{feff}]`)
  120. var discordMarkdownEscaper = strings.NewReplacer(
  121. `\`, `\\`,
  122. `_`, `\_`,
  123. `*`, `\*`,
  124. `~`, `\~`,
  125. "`", "\\`",
  126. `|`, `\|`,
  127. `<`, `\<`,
  128. )
  129. func escapeDiscordMarkdown(s string) string {
  130. submatches := discordLinkRegex.FindAllStringIndex(s, -1)
  131. if submatches == nil {
  132. return discordMarkdownEscaper.Replace(s)
  133. }
  134. var builder strings.Builder
  135. offset := 0
  136. for _, match := range submatches {
  137. start := match[0]
  138. end := match[1]
  139. builder.WriteString(discordMarkdownEscaper.Replace(s[offset:start]))
  140. builder.WriteString(s[start:end])
  141. offset = end
  142. }
  143. builder.WriteString(discordMarkdownEscaper.Replace(s[offset:]))
  144. return builder.String()
  145. }
  146. var matrixHTMLParser = &format.HTMLParser{
  147. TabsToSpaces: 4,
  148. Newline: "\n",
  149. HorizontalLine: "\n---\n",
  150. ItalicConverter: func(s string, ctx format.Context) string {
  151. return fmt.Sprintf("*%s*", s)
  152. },
  153. UnderlineConverter: func(s string, ctx format.Context) string {
  154. return fmt.Sprintf("__%s__", s)
  155. },
  156. TextConverter: func(s string, ctx format.Context) string {
  157. if ctx.TagStack.Has("pre") || ctx.TagStack.Has("code") {
  158. // If we're in a code block, don't escape markdown
  159. return s
  160. }
  161. return escapeDiscordMarkdown(s)
  162. },
  163. SpoilerConverter: func(text, reason string, ctx format.Context) string {
  164. if reason != "" {
  165. return fmt.Sprintf("(%s) ||%s||", reason, text)
  166. }
  167. return fmt.Sprintf("||%s||", text)
  168. },
  169. }
  170. func init() {
  171. matrixHTMLParser.PillConverter = pillConverter
  172. }
  173. func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventContent) string {
  174. if content.Format == event.FormatHTML && len(content.FormattedBody) > 0 {
  175. ctx := format.NewContext()
  176. ctx.ReturnData[formatterContextUserKey] = user
  177. ctx.ReturnData[formatterContextPortalKey] = portal
  178. return variationselector.Remove(matrixHTMLParser.Parse(content.FormattedBody, ctx))
  179. } else {
  180. return variationselector.Remove(escapeDiscordMarkdown(content.Body))
  181. }
  182. }