Переглянути джерело

Improve Discord markdown escaper. Fixes #14

Tulir Asokan 2 роки тому
батько
коміт
0b84527eab
3 змінених файлів з 96 додано та 12 видалено
  1. 36 12
      formatter.go
  2. 57 0
      formatter_test.go
  3. 3 0
      go.mod

+ 36 - 12
formatter.go

@@ -91,6 +91,40 @@ func pillConverter(displayname, mxid, eventID string, ctx format.Context) string
 	return displayname
 }
 
+// Discord links start with http:// or https://, contain at least two characters afterwards,
+// don't contain < or whitespace anywhere, and don't end with "'),.:;]
+//
+// Zero-width whitespace is mostly in the Format category and is allowed, except \uFEFF isn't for some reason
+var discordLinkRegex = regexp.MustCompile(`https?://[^<\p{Zs}\x{feff}]*[^"'),.:;\]\p{Zs}\x{feff}]`)
+
+var discordMarkdownEscaper = strings.NewReplacer(
+	`\`, `\\`,
+	`_`, `\_`,
+	`*`, `\*`,
+	`~`, `\~`,
+	"`", "\\`",
+	`|`, `\|`,
+	`<`, `\<`,
+)
+
+func escapeDiscordMarkdown(s string) string {
+	submatches := discordLinkRegex.FindAllStringIndex(s, -1)
+	if submatches == nil {
+		return discordMarkdownEscaper.Replace(s)
+	}
+	var builder strings.Builder
+	offset := 0
+	for _, match := range submatches {
+		start := match[0]
+		end := match[1]
+		builder.WriteString(discordMarkdownEscaper.Replace(s[offset:start]))
+		builder.WriteString(s[start:end])
+		offset = end
+	}
+	builder.WriteString(discordMarkdownEscaper.Replace(s[offset:]))
+	return builder.String()
+}
+
 var matrixHTMLParser = &format.HTMLParser{
 	TabsToSpaces:   4,
 	Newline:        "\n",
@@ -102,7 +136,7 @@ var matrixHTMLParser = &format.HTMLParser{
 		return fmt.Sprintf("__%s__", s)
 	},
 	TextConverter: func(s string, context format.Context) string {
-		return discordMarkdownEscaper.Replace(s)
+		return escapeDiscordMarkdown(s)
 	},
 	SpoilerConverter: func(text, reason string, ctx format.Context) string {
 		if reason != "" {
@@ -116,16 +150,6 @@ func init() {
 	matrixHTMLParser.PillConverter = pillConverter
 }
 
-var discordMarkdownEscaper = strings.NewReplacer(
-	`\`, `\\`,
-	`_`, `\_`,
-	`*`, `\*`,
-	`~`, `\~`,
-	"`", "\\`",
-	`|`, `\|`,
-	`<`, `\<`,
-)
-
 func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventContent) string {
 	if content.Format == event.FormatHTML && len(content.FormattedBody) > 0 {
 		return matrixHTMLParser.Parse(content.FormattedBody, format.Context{
@@ -133,6 +157,6 @@ func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventCon
 			formatterContextPortalKey: portal,
 		})
 	} else {
-		return discordMarkdownEscaper.Replace(content.Body)
+		return escapeDiscordMarkdown(content.Body)
 	}
 }

+ 57 - 0
formatter_test.go

@@ -0,0 +1,57 @@
+// mautrix-discord - A Matrix-Discord puppeting bridge.
+// Copyright (C) 2022 Tulir Asokan
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+package main
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestEscapeDiscordMarkdown(t *testing.T) {
+	type escapeTest struct {
+		name     string
+		input    string
+		expected string
+	}
+
+	tests := []escapeTest{
+		{"Simple text", "Lorem ipsum dolor sit amet, consectetuer adipiscing elit.", "Lorem ipsum dolor sit amet, consectetuer adipiscing elit."},
+		{"Backslash", `foo\bar`, `foo\\bar`},
+		{"Underscore", `foo_bar`, `foo\_bar`},
+		{"Asterisk", `foo*bar`, `foo\*bar`},
+		{"Tilde", `foo~bar`, `foo\~bar`},
+		{"Backtick", "foo`bar", "foo\\`bar"},
+		{"Forward tick", `foo´bar`, `foo´bar`},
+		{"Pipe", `foo|bar`, `foo\|bar`},
+		{"Less than", `foo<bar`, `foo\<bar`},
+		{"Greater than", `foo>bar`, `foo>bar`},
+		{"Multiple things", `\_*~|`, `\\\_\*\~\|`},
+		{"URL", `https://example.com/foo_bar`, `https://example.com/foo_bar`},
+		{"Multiple URLs", `hello_world https://example.com/foo_bar *testing* https://a_b_c/*def*`, `hello\_world https://example.com/foo_bar \*testing\* https://a_b_c/*def*`},
+		{"URL ends with no-break zero-width space", "https://example.com\ufefffoo_bar", "https://example.com\ufefffoo\\_bar"},
+		{"URL ends with less than", `https://example.com<foo_bar`, `https://example.com<foo\_bar`},
+		{"Short URL", `https://_`, `https://_`},
+		{"Insecure URL", `http://example.com/foo_bar`, `http://example.com/foo_bar`},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			assert.Equal(t, test.expected, escapeDiscordMarkdown(test.input))
+		})
+	}
+}

+ 3 - 0
go.mod

@@ -9,14 +9,17 @@ require (
 	github.com/lib/pq v1.10.6
 	github.com/mattn/go-sqlite3 v1.14.15
 	github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e
+	github.com/stretchr/testify v1.8.0
 	github.com/yuin/goldmark v1.4.12
 	maunium.net/go/maulogger/v2 v2.3.2
 	maunium.net/go/mautrix v0.12.0
 )
 
 require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/mattn/go-colorable v0.1.12 // indirect
 	github.com/mattn/go-isatty v0.0.14 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/rs/zerolog v1.27.0 // indirect
 	github.com/tidwall/gjson v1.14.1 // indirect
 	github.com/tidwall/match v1.1.1 // indirect