formatter.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. // mautrix-discord - A Matrix-Discord puppeting bridge.
  2. // Copyright (C) 2023 Tulir Asokan
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package main
  17. import (
  18. "fmt"
  19. "regexp"
  20. "strings"
  21. "github.com/bwmarrin/discordgo"
  22. "github.com/yuin/goldmark"
  23. "github.com/yuin/goldmark/extension"
  24. "github.com/yuin/goldmark/parser"
  25. "github.com/yuin/goldmark/util"
  26. "go.mau.fi/util/variationselector"
  27. "golang.org/x/exp/slices"
  28. "maunium.net/go/mautrix/event"
  29. "maunium.net/go/mautrix/format"
  30. "maunium.net/go/mautrix/format/mdext"
  31. "maunium.net/go/mautrix/id"
  32. )
  33. // escapeFixer is a hacky partial fix for the difference in escaping markdown, used with escapeReplacement
  34. //
  35. // Discord allows escaping with just one backslash, e.g. \__a__,
  36. // but standard markdown requires both to be escaped (\_\_a__)
  37. var escapeFixer = regexp.MustCompile(`\\(__[^_]|\*\*[^*])`)
  38. func escapeReplacement(s string) string {
  39. return s[:2] + `\` + s[2:]
  40. }
  41. // indentableParagraphParser is the default paragraph parser with CanAcceptIndentedLine.
  42. // Used when disabling CodeBlockParser (as disabling it without a replacement will make indented blocks disappear).
  43. type indentableParagraphParser struct {
  44. parser.BlockParser
  45. }
  46. var defaultIndentableParagraphParser = &indentableParagraphParser{BlockParser: parser.NewParagraphParser()}
  47. func (b *indentableParagraphParser) CanAcceptIndentedLine() bool {
  48. return true
  49. }
  50. var removeFeaturesExceptLinks = []any{
  51. parser.NewListParser(), parser.NewListItemParser(), parser.NewHTMLBlockParser(), parser.NewRawHTMLParser(),
  52. parser.NewSetextHeadingParser(), parser.NewThematicBreakParser(),
  53. parser.NewCodeBlockParser(),
  54. }
  55. var removeFeaturesAndLinks = append(removeFeaturesExceptLinks, parser.NewLinkParser())
  56. var fixIndentedParagraphs = goldmark.WithParserOptions(parser.WithBlockParsers(util.Prioritized(defaultIndentableParagraphParser, 500)))
  57. var discordExtensions = goldmark.WithExtensions(extension.Strikethrough, mdext.SimpleSpoiler, mdext.DiscordUnderline, ExtDiscordEveryone, ExtDiscordTag)
  58. var discordRenderer = goldmark.New(
  59. goldmark.WithParser(mdext.ParserWithoutFeatures(removeFeaturesAndLinks...)),
  60. fixIndentedParagraphs, format.HTMLOptions, discordExtensions,
  61. )
  62. var discordRendererWithInlineLinks = goldmark.New(
  63. goldmark.WithParser(mdext.ParserWithoutFeatures(removeFeaturesExceptLinks...)),
  64. fixIndentedParagraphs, format.HTMLOptions, discordExtensions,
  65. )
  66. func (portal *Portal) renderDiscordMarkdownOnlyHTML(text string, allowInlineLinks bool) string {
  67. text = escapeFixer.ReplaceAllStringFunc(text, escapeReplacement)
  68. var buf strings.Builder
  69. ctx := parser.NewContext()
  70. ctx.Set(parserContextPortal, portal)
  71. renderer := discordRenderer
  72. if allowInlineLinks {
  73. renderer = discordRendererWithInlineLinks
  74. }
  75. err := renderer.Convert([]byte(text), &buf, parser.WithContext(ctx))
  76. if err != nil {
  77. panic(fmt.Errorf("markdown parser errored: %w", err))
  78. }
  79. return format.UnwrapSingleParagraph(buf.String())
  80. }
  81. const formatterContextPortalKey = "fi.mau.discord.portal"
  82. const formatterContextAllowedMentionsKey = "fi.mau.discord.allowed_mentions"
  83. const formatterContextInputAllowedMentionsKey = "fi.mau.discord.input_allowed_mentions"
  84. func appendIfNotContains(arr []string, newItem string) []string {
  85. for _, item := range arr {
  86. if item == newItem {
  87. return arr
  88. }
  89. }
  90. return append(arr, newItem)
  91. }
  92. func (br *DiscordBridge) pillConverter(displayname, mxid, eventID string, ctx format.Context) string {
  93. if len(mxid) == 0 {
  94. return displayname
  95. }
  96. if mxid[0] == '#' {
  97. alias, err := br.Bot.ResolveAlias(id.RoomAlias(mxid))
  98. if err != nil {
  99. return displayname
  100. }
  101. mxid = alias.RoomID.String()
  102. }
  103. if mxid[0] == '!' {
  104. portal := br.GetPortalByMXID(id.RoomID(mxid))
  105. if portal != nil {
  106. if eventID == "" {
  107. //currentPortal := ctx[formatterContextPortalKey].(*Portal)
  108. return fmt.Sprintf("<#%s>", portal.Key.ChannelID)
  109. //if currentPortal.GuildID == portal.GuildID {
  110. //} else if portal.GuildID != "" {
  111. // return fmt.Sprintf("<#%s:%s:%s>", portal.Key.ChannelID, portal.GuildID, portal.Name)
  112. //} else {
  113. // // TODO is mentioning private channels possible at all?
  114. //}
  115. } else if msg := br.DB.Message.GetByMXID(portal.Key, id.EventID(eventID)); msg != nil {
  116. guildID := portal.GuildID
  117. if guildID == "" {
  118. guildID = "@me"
  119. }
  120. return fmt.Sprintf("https://discord.com/channels/%s/%s/%s", guildID, msg.DiscordProtoChannelID(), msg.DiscordID)
  121. }
  122. }
  123. } else if mxid[0] == '@' {
  124. allowedMentions, _ := ctx.ReturnData[formatterContextInputAllowedMentionsKey].([]id.UserID)
  125. if allowedMentions != nil && !slices.Contains(allowedMentions, id.UserID(mxid)) {
  126. return displayname
  127. }
  128. mentions := ctx.ReturnData[formatterContextAllowedMentionsKey].(*discordgo.MessageAllowedMentions)
  129. parsedID, ok := br.ParsePuppetMXID(id.UserID(mxid))
  130. if ok {
  131. mentions.Users = appendIfNotContains(mentions.Users, parsedID)
  132. return fmt.Sprintf("<@%s>", parsedID)
  133. }
  134. mentionedUser := br.GetUserByMXID(id.UserID(mxid))
  135. if mentionedUser != nil && mentionedUser.DiscordID != "" {
  136. mentions.Users = appendIfNotContains(mentions.Users, mentionedUser.DiscordID)
  137. return fmt.Sprintf("<@%s>", mentionedUser.DiscordID)
  138. }
  139. }
  140. return displayname
  141. }
  142. const discordLinkPattern = `https?://[^<\p{Zs}\x{feff}]*[^"'),.:;\]\p{Zs}\x{feff}]`
  143. // Discord links start with http:// or https://, contain at least two characters afterwards,
  144. // don't contain < or whitespace anywhere, and don't end with "'),.:;]
  145. //
  146. // Zero-width whitespace is mostly in the Format category and is allowed, except \uFEFF isn't for some reason
  147. var discordLinkRegex = regexp.MustCompile(discordLinkPattern)
  148. var discordLinkRegexFull = regexp.MustCompile("^" + discordLinkPattern + "$")
  149. var discordMarkdownEscaper = strings.NewReplacer(
  150. `\`, `\\`,
  151. `_`, `\_`,
  152. `*`, `\*`,
  153. `~`, `\~`,
  154. "`", "\\`",
  155. `|`, `\|`,
  156. `<`, `\<`,
  157. `#`, `\#`,
  158. )
  159. func escapeDiscordMarkdown(s string) string {
  160. submatches := discordLinkRegex.FindAllStringIndex(s, -1)
  161. if submatches == nil {
  162. return discordMarkdownEscaper.Replace(s)
  163. }
  164. var builder strings.Builder
  165. offset := 0
  166. for _, match := range submatches {
  167. start := match[0]
  168. end := match[1]
  169. builder.WriteString(discordMarkdownEscaper.Replace(s[offset:start]))
  170. builder.WriteString(s[start:end])
  171. offset = end
  172. }
  173. builder.WriteString(discordMarkdownEscaper.Replace(s[offset:]))
  174. return builder.String()
  175. }
  176. var matrixHTMLParser = &format.HTMLParser{
  177. TabsToSpaces: 4,
  178. Newline: "\n",
  179. HorizontalLine: "\n---\n",
  180. ItalicConverter: func(s string, ctx format.Context) string {
  181. return fmt.Sprintf("*%s*", s)
  182. },
  183. UnderlineConverter: func(s string, ctx format.Context) string {
  184. return fmt.Sprintf("__%s__", s)
  185. },
  186. TextConverter: func(s string, ctx format.Context) string {
  187. if ctx.TagStack.Has("pre") || ctx.TagStack.Has("code") {
  188. // If we're in a code block, don't escape markdown
  189. return s
  190. }
  191. return escapeDiscordMarkdown(s)
  192. },
  193. SpoilerConverter: func(text, reason string, ctx format.Context) string {
  194. if reason != "" {
  195. return fmt.Sprintf("(%s) ||%s||", reason, text)
  196. }
  197. return fmt.Sprintf("||%s||", text)
  198. },
  199. LinkConverter: func(text, href string, ctx format.Context) string {
  200. if text == href {
  201. return text
  202. } else if !discordLinkRegexFull.MatchString(href) {
  203. return fmt.Sprintf("%s (%s)", escapeDiscordMarkdown(text), escapeDiscordMarkdown(href))
  204. }
  205. return fmt.Sprintf("[%s](%s)", escapeDiscordMarkdown(text), href)
  206. },
  207. }
  208. func (portal *Portal) parseMatrixHTML(content *event.MessageEventContent) (string, *discordgo.MessageAllowedMentions) {
  209. allowedMentions := &discordgo.MessageAllowedMentions{
  210. Parse: []discordgo.AllowedMentionType{},
  211. Users: []string{},
  212. RepliedUser: true,
  213. }
  214. if content.Format == event.FormatHTML && len(content.FormattedBody) > 0 {
  215. ctx := format.NewContext()
  216. ctx.ReturnData[formatterContextPortalKey] = portal
  217. ctx.ReturnData[formatterContextAllowedMentionsKey] = allowedMentions
  218. if content.Mentions != nil {
  219. ctx.ReturnData[formatterContextInputAllowedMentionsKey] = content.Mentions.UserIDs
  220. }
  221. return variationselector.FullyQualify(matrixHTMLParser.Parse(content.FormattedBody, ctx)), allowedMentions
  222. } else {
  223. return variationselector.FullyQualify(escapeDiscordMarkdown(content.Body)), allowedMentions
  224. }
  225. }