formatting.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. // mautrix-whatsapp - A Matrix-WhatsApp puppeting bridge.
  2. // Copyright (C) 2021 Tulir Asokan
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package main
  17. import (
  18. "fmt"
  19. "html"
  20. "regexp"
  21. "strings"
  22. "go.mau.fi/whatsmeow/types"
  23. "maunium.net/go/mautrix/event"
  24. "maunium.net/go/mautrix/format"
  25. "maunium.net/go/mautrix/id"
  26. )
  27. var italicRegex = regexp.MustCompile("([\\s>~*]|^)_(.+?)_([^a-zA-Z\\d]|$)")
  28. var boldRegex = regexp.MustCompile("([\\s>_~]|^)\\*(.+?)\\*([^a-zA-Z\\d]|$)")
  29. var strikethroughRegex = regexp.MustCompile("([\\s>_*]|^)~(.+?)~([^a-zA-Z\\d]|$)")
  30. var codeBlockRegex = regexp.MustCompile("```(?:.|\n)+?```")
  31. var inlineURLRegex = regexp.MustCompile(`\[(.+?)]\((.+?)\)`)
  32. const mentionedJIDsContextKey = "net.maunium.whatsapp.mentioned_jids"
  33. type Formatter struct {
  34. bridge *WABridge
  35. matrixHTMLParser *format.HTMLParser
  36. waReplString map[*regexp.Regexp]string
  37. waReplFunc map[*regexp.Regexp]func(string) string
  38. waReplFuncText map[*regexp.Regexp]func(string) string
  39. }
  40. func NewFormatter(bridge *WABridge) *Formatter {
  41. formatter := &Formatter{
  42. bridge: bridge,
  43. matrixHTMLParser: &format.HTMLParser{
  44. TabsToSpaces: 4,
  45. Newline: "\n",
  46. PillConverter: func(displayname, mxid, eventID string, ctx format.Context) string {
  47. if mxid[0] == '@' {
  48. puppet := bridge.GetPuppetByMXID(id.UserID(mxid))
  49. if puppet != nil {
  50. jids, ok := ctx[mentionedJIDsContextKey].([]string)
  51. if !ok {
  52. ctx[mentionedJIDsContextKey] = []string{puppet.JID.String()}
  53. } else {
  54. ctx[mentionedJIDsContextKey] = append(jids, puppet.JID.String())
  55. }
  56. return "@" + puppet.JID.User
  57. }
  58. }
  59. return mxid
  60. },
  61. BoldConverter: func(text string, _ format.Context) string { return fmt.Sprintf("*%s*", text) },
  62. ItalicConverter: func(text string, _ format.Context) string { return fmt.Sprintf("_%s_", text) },
  63. StrikethroughConverter: func(text string, _ format.Context) string { return fmt.Sprintf("~%s~", text) },
  64. MonospaceConverter: func(text string, _ format.Context) string { return fmt.Sprintf("```%s```", text) },
  65. MonospaceBlockConverter: func(text, language string, _ format.Context) string { return fmt.Sprintf("```%s```", text) },
  66. },
  67. waReplString: map[*regexp.Regexp]string{
  68. italicRegex: "$1<em>$2</em>$3",
  69. boldRegex: "$1<strong>$2</strong>$3",
  70. strikethroughRegex: "$1<del>$2</del>$3",
  71. },
  72. }
  73. formatter.waReplFunc = map[*regexp.Regexp]func(string) string{
  74. codeBlockRegex: func(str string) string {
  75. str = str[3 : len(str)-3]
  76. if strings.ContainsRune(str, '\n') {
  77. return fmt.Sprintf("<pre><code>%s</code></pre>", str)
  78. }
  79. return fmt.Sprintf("<code>%s</code>", str)
  80. },
  81. }
  82. formatter.waReplFuncText = map[*regexp.Regexp]func(string) string{}
  83. return formatter
  84. }
  85. func (formatter *Formatter) getMatrixInfoByJID(roomID id.RoomID, jid types.JID) (mxid id.UserID, displayname string) {
  86. if puppet := formatter.bridge.GetPuppetByJID(jid); puppet != nil {
  87. mxid = puppet.MXID
  88. displayname = puppet.Displayname
  89. }
  90. if user := formatter.bridge.GetUserByJID(jid); user != nil {
  91. mxid = user.MXID
  92. member := formatter.bridge.StateStore.GetMember(roomID, user.MXID)
  93. if len(member.Displayname) > 0 {
  94. displayname = member.Displayname
  95. }
  96. }
  97. return
  98. }
  99. func (formatter *Formatter) ParseWhatsApp(roomID id.RoomID, content *event.MessageEventContent, mentionedJIDs []string, allowInlineURL, forceHTML bool) {
  100. output := html.EscapeString(content.Body)
  101. for regex, replacement := range formatter.waReplString {
  102. output = regex.ReplaceAllString(output, replacement)
  103. }
  104. for regex, replacer := range formatter.waReplFunc {
  105. output = regex.ReplaceAllStringFunc(output, replacer)
  106. }
  107. if allowInlineURL {
  108. output = inlineURLRegex.ReplaceAllStringFunc(output, func(s string) string {
  109. groups := inlineURLRegex.FindStringSubmatch(s)
  110. return fmt.Sprintf(`<a href="%s">%s</a>`, groups[2], groups[1])
  111. })
  112. }
  113. for _, rawJID := range mentionedJIDs {
  114. jid, err := types.ParseJID(rawJID)
  115. if err != nil {
  116. continue
  117. } else if jid.Server == types.LegacyUserServer {
  118. jid.Server = types.DefaultUserServer
  119. }
  120. mxid, displayname := formatter.getMatrixInfoByJID(roomID, jid)
  121. number := "@" + jid.User
  122. output = strings.ReplaceAll(output, number, fmt.Sprintf(`<a href="https://matrix.to/#/%s">%s</a>`, mxid, displayname))
  123. content.Body = strings.ReplaceAll(content.Body, number, displayname)
  124. }
  125. if output != content.Body || forceHTML {
  126. output = strings.ReplaceAll(output, "\n", "<br/>")
  127. content.FormattedBody = output
  128. content.Format = event.FormatHTML
  129. for regex, replacer := range formatter.waReplFuncText {
  130. content.Body = regex.ReplaceAllStringFunc(content.Body, replacer)
  131. }
  132. }
  133. }
  134. func (formatter *Formatter) ParseMatrix(html string) (string, []string) {
  135. ctx := make(format.Context)
  136. result := formatter.matrixHTMLParser.Parse(html, ctx)
  137. mentionedJIDs, _ := ctx[mentionedJIDsContextKey].([]string)
  138. return result, mentionedJIDs
  139. }