formatting.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // mautrix-whatsapp - A Matrix-WhatsApp puppeting bridge.
  2. // Copyright (C) 2023 Tulir Asokan
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package main
  17. import (
  18. "fmt"
  19. "html"
  20. "regexp"
  21. "sort"
  22. "strings"
  23. "go.mau.fi/whatsmeow/types"
  24. "golang.org/x/exp/slices"
  25. "maunium.net/go/mautrix/event"
  26. "maunium.net/go/mautrix/format"
  27. "maunium.net/go/mautrix/id"
  28. )
  29. var italicRegex = regexp.MustCompile("([\\s>~*]|^)_(.+?)_([^a-zA-Z\\d]|$)")
  30. var boldRegex = regexp.MustCompile("([\\s>_~]|^)\\*(.+?)\\*([^a-zA-Z\\d]|$)")
  31. var strikethroughRegex = regexp.MustCompile("([\\s>_*]|^)~(.+?)~([^a-zA-Z\\d]|$)")
  32. var codeBlockRegex = regexp.MustCompile("```(?:.|\n)+?```")
  33. var inlineURLRegex = regexp.MustCompile(`\[(.+?)]\((.+?)\)`)
  34. const mentionedJIDsContextKey = "fi.mau.whatsapp.mentioned_jids"
  35. const allowedMentionsContextKey = "fi.mau.whatsapp.allowed_mentions"
  36. type Formatter struct {
  37. bridge *WABridge
  38. matrixHTMLParser *format.HTMLParser
  39. waReplString map[*regexp.Regexp]string
  40. waReplFunc map[*regexp.Regexp]func(string) string
  41. waReplFuncText map[*regexp.Regexp]func(string) string
  42. }
  43. func NewFormatter(bridge *WABridge) *Formatter {
  44. formatter := &Formatter{
  45. bridge: bridge,
  46. matrixHTMLParser: &format.HTMLParser{
  47. TabsToSpaces: 4,
  48. Newline: "\n",
  49. PillConverter: func(displayname, mxid, eventID string, ctx format.Context) string {
  50. allowedMentions, _ := ctx.ReturnData[allowedMentionsContextKey].(map[types.JID]bool)
  51. if mxid[0] == '@' {
  52. var jid types.JID
  53. if puppet := bridge.GetPuppetByMXID(id.UserID(mxid)); puppet != nil {
  54. jid = puppet.JID
  55. } else if user := bridge.GetUserByMXIDIfExists(id.UserID(mxid)); user != nil {
  56. jid = user.JID.ToNonAD()
  57. }
  58. if !jid.IsEmpty() && (allowedMentions == nil || allowedMentions[jid]) {
  59. if allowedMentions == nil {
  60. jids, ok := ctx.ReturnData[mentionedJIDsContextKey].([]string)
  61. if !ok {
  62. ctx.ReturnData[mentionedJIDsContextKey] = []string{jid.String()}
  63. } else {
  64. ctx.ReturnData[mentionedJIDsContextKey] = append(jids, jid.String())
  65. }
  66. }
  67. return "@" + jid.User
  68. }
  69. }
  70. return displayname
  71. },
  72. BoldConverter: func(text string, _ format.Context) string { return fmt.Sprintf("*%s*", text) },
  73. ItalicConverter: func(text string, _ format.Context) string { return fmt.Sprintf("_%s_", text) },
  74. StrikethroughConverter: func(text string, _ format.Context) string { return fmt.Sprintf("~%s~", text) },
  75. MonospaceConverter: func(text string, _ format.Context) string { return fmt.Sprintf("```%s```", text) },
  76. MonospaceBlockConverter: func(text, language string, _ format.Context) string { return fmt.Sprintf("```%s```", text) },
  77. },
  78. waReplString: map[*regexp.Regexp]string{
  79. italicRegex: "$1<em>$2</em>$3",
  80. boldRegex: "$1<strong>$2</strong>$3",
  81. strikethroughRegex: "$1<del>$2</del>$3",
  82. },
  83. }
  84. formatter.waReplFunc = map[*regexp.Regexp]func(string) string{
  85. codeBlockRegex: func(str string) string {
  86. str = str[3 : len(str)-3]
  87. if strings.ContainsRune(str, '\n') {
  88. return fmt.Sprintf("<pre><code>%s</code></pre>", str)
  89. }
  90. return fmt.Sprintf("<code>%s</code>", str)
  91. },
  92. }
  93. formatter.waReplFuncText = map[*regexp.Regexp]func(string) string{}
  94. return formatter
  95. }
  96. func (formatter *Formatter) getMatrixInfoByJID(roomID id.RoomID, jid types.JID) (mxid id.UserID, displayname string) {
  97. if puppet := formatter.bridge.GetPuppetByJID(jid); puppet != nil {
  98. mxid = puppet.MXID
  99. displayname = puppet.Displayname
  100. }
  101. if user := formatter.bridge.GetUserByJID(jid); user != nil {
  102. mxid = user.MXID
  103. member := formatter.bridge.StateStore.GetMember(roomID, user.MXID)
  104. if len(member.Displayname) > 0 {
  105. displayname = member.Displayname
  106. }
  107. }
  108. return
  109. }
  110. func (formatter *Formatter) ParseWhatsApp(roomID id.RoomID, content *event.MessageEventContent, mentionedJIDs []string, allowInlineURL, forceHTML bool) {
  111. output := html.EscapeString(content.Body)
  112. for regex, replacement := range formatter.waReplString {
  113. output = regex.ReplaceAllString(output, replacement)
  114. }
  115. for regex, replacer := range formatter.waReplFunc {
  116. output = regex.ReplaceAllStringFunc(output, replacer)
  117. }
  118. if allowInlineURL {
  119. output = inlineURLRegex.ReplaceAllStringFunc(output, func(s string) string {
  120. groups := inlineURLRegex.FindStringSubmatch(s)
  121. return fmt.Sprintf(`<a href="%s">%s</a>`, groups[2], groups[1])
  122. })
  123. }
  124. alreadyMentioned := make(map[id.UserID]struct{})
  125. content.Mentions = &event.Mentions{}
  126. for _, rawJID := range mentionedJIDs {
  127. jid, err := types.ParseJID(rawJID)
  128. if err != nil {
  129. continue
  130. } else if jid.Server == types.LegacyUserServer {
  131. jid.Server = types.DefaultUserServer
  132. }
  133. mxid, displayname := formatter.getMatrixInfoByJID(roomID, jid)
  134. number := "@" + jid.User
  135. output = strings.ReplaceAll(output, number, fmt.Sprintf(`<a href="https://matrix.to/#/%s">%s</a>`, mxid, displayname))
  136. content.Body = strings.ReplaceAll(content.Body, number, displayname)
  137. if _, ok := alreadyMentioned[mxid]; !ok {
  138. alreadyMentioned[mxid] = struct{}{}
  139. content.Mentions.UserIDs = append(content.Mentions.UserIDs, mxid)
  140. }
  141. }
  142. if output != content.Body || forceHTML {
  143. output = strings.ReplaceAll(output, "\n", "<br/>")
  144. content.FormattedBody = output
  145. content.Format = event.FormatHTML
  146. for regex, replacer := range formatter.waReplFuncText {
  147. content.Body = regex.ReplaceAllStringFunc(content.Body, replacer)
  148. }
  149. }
  150. }
  151. func (formatter *Formatter) ParseMatrix(html string, mentions *event.Mentions) (string, []string) {
  152. ctx := format.NewContext()
  153. var mentionedJIDs []string
  154. if mentions != nil {
  155. var allowedMentions = make(map[types.JID]bool)
  156. mentionedJIDs = make([]string, 0, len(mentions.UserIDs))
  157. for _, userID := range mentions.UserIDs {
  158. var jid types.JID
  159. if puppet := formatter.bridge.GetPuppetByMXID(userID); puppet != nil {
  160. jid = puppet.JID
  161. mentionedJIDs = append(mentionedJIDs, puppet.JID.String())
  162. } else if user := formatter.bridge.GetUserByMXIDIfExists(userID); user != nil {
  163. jid = user.JID.ToNonAD()
  164. }
  165. if !jid.IsEmpty() && !allowedMentions[jid] {
  166. allowedMentions[jid] = true
  167. mentionedJIDs = append(mentionedJIDs, jid.String())
  168. }
  169. }
  170. ctx.ReturnData[allowedMentionsContextKey] = allowedMentions
  171. }
  172. result := formatter.matrixHTMLParser.Parse(html, ctx)
  173. if mentions == nil {
  174. mentionedJIDs, _ = ctx.ReturnData[mentionedJIDsContextKey].([]string)
  175. sort.Strings(mentionedJIDs)
  176. mentionedJIDs = slices.Compact(mentionedJIDs)
  177. }
  178. return result, mentionedJIDs
  179. }
  180. func (formatter *Formatter) ParseMatrixWithoutMentions(html string) string {
  181. ctx := format.NewContext()
  182. ctx.ReturnData[allowedMentionsContextKey] = map[types.JID]struct{}{}
  183. return formatter.matrixHTMLParser.Parse(html, ctx)
  184. }