metrics.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. // mautrix-whatsapp - A Matrix-WhatsApp puppeting bridge.
  2. // Copyright (C) 2020 Tulir Asokan
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package main
  17. import (
  18. "context"
  19. "net/http"
  20. "runtime/debug"
  21. "time"
  22. "github.com/prometheus/client_golang/prometheus"
  23. "github.com/prometheus/client_golang/prometheus/promauto"
  24. "github.com/prometheus/client_golang/prometheus/promhttp"
  25. log "maunium.net/go/maulogger/v2"
  26. "maunium.net/go/mautrix/event"
  27. "maunium.net/go/mautrix/id"
  28. "maunium.net/go/mautrix-whatsapp/database"
  29. "maunium.net/go/mautrix-whatsapp/types"
  30. )
  31. type MetricsHandler struct {
  32. db *database.Database
  33. server *http.Server
  34. log log.Logger
  35. running bool
  36. ctx context.Context
  37. stopRecorder func()
  38. messageHandling *prometheus.HistogramVec
  39. countCollection prometheus.Histogram
  40. disconnections *prometheus.CounterVec
  41. puppetCount prometheus.Gauge
  42. userCount prometheus.Gauge
  43. messageCount prometheus.Gauge
  44. portalCount *prometheus.GaugeVec
  45. encryptedGroupCount prometheus.Gauge
  46. encryptedPrivateCount prometheus.Gauge
  47. unencryptedGroupCount prometheus.Gauge
  48. unencryptedPrivateCount prometheus.Gauge
  49. connected prometheus.Gauge
  50. connectedState map[types.WhatsAppID]bool
  51. loggedIn prometheus.Gauge
  52. loggedInState map[types.WhatsAppID]bool
  53. }
  54. func NewMetricsHandler(address string, log log.Logger, db *database.Database) *MetricsHandler {
  55. portalCount := promauto.NewGaugeVec(prometheus.GaugeOpts{
  56. Name: "whatsapp_portals_total",
  57. Help: "Number of portal rooms on Matrix",
  58. }, []string{"type", "encrypted"})
  59. return &MetricsHandler{
  60. db: db,
  61. server: &http.Server{Addr: address, Handler: promhttp.Handler()},
  62. log: log,
  63. running: false,
  64. messageHandling: promauto.NewHistogramVec(prometheus.HistogramOpts{
  65. Name: "matrix_event",
  66. Help: "Time spent processing Matrix events",
  67. }, []string{"event_type"}),
  68. countCollection: promauto.NewHistogram(prometheus.HistogramOpts{
  69. Name: "whatsapp_count_collection",
  70. Help: "Time spent collecting the whatsapp_*_total metrics",
  71. }),
  72. disconnections: promauto.NewCounterVec(prometheus.CounterOpts{
  73. Name: "whatsapp_disconnections",
  74. Help: "Number of times a Matrix user has been disconnected from WhatsApp",
  75. }, []string{"user_id"}),
  76. puppetCount: promauto.NewGauge(prometheus.GaugeOpts{
  77. Name: "whatsapp_puppets_total",
  78. Help: "Number of WhatsApp users bridged into Matrix",
  79. }),
  80. userCount: promauto.NewGauge(prometheus.GaugeOpts{
  81. Name: "whatsapp_users_total",
  82. Help: "Number of Matrix users using the bridge",
  83. }),
  84. messageCount: promauto.NewGauge(prometheus.GaugeOpts{
  85. Name: "whatsapp_messages_total",
  86. Help: "Number of messages bridged",
  87. }),
  88. portalCount: portalCount,
  89. encryptedGroupCount: portalCount.With(prometheus.Labels{"type": "group", "encrypted": "true"}),
  90. encryptedPrivateCount: portalCount.With(prometheus.Labels{"type": "private", "encrypted": "true"}),
  91. unencryptedGroupCount: portalCount.With(prometheus.Labels{"type": "group", "encrypted": "false"}),
  92. unencryptedPrivateCount: portalCount.With(prometheus.Labels{"type": "private", "encrypted": "false"}),
  93. loggedIn: promauto.NewGauge(prometheus.GaugeOpts{
  94. Name: "bridge_logged_in",
  95. Help: "Users logged into the bridge",
  96. }),
  97. loggedInState: make(map[types.WhatsAppID]bool),
  98. connected: promauto.NewGauge(prometheus.GaugeOpts{
  99. Name: "bridge_connected",
  100. Help: "Bridge users connected to WhatsApp",
  101. }),
  102. connectedState: make(map[types.WhatsAppID]bool),
  103. }
  104. }
  105. func noop() {}
  106. func (mh *MetricsHandler) TrackEvent(eventType event.Type) func() {
  107. if !mh.running {
  108. return noop
  109. }
  110. start := time.Now()
  111. return func() {
  112. duration := time.Now().Sub(start)
  113. mh.messageHandling.
  114. With(prometheus.Labels{"event_type": eventType.Type}).
  115. Observe(duration.Seconds())
  116. }
  117. }
  118. func (mh *MetricsHandler) TrackDisconnection(userID id.UserID) {
  119. if !mh.running {
  120. return
  121. }
  122. mh.disconnections.With(prometheus.Labels{"user_id": string(userID)}).Inc()
  123. }
  124. func (mh *MetricsHandler) TrackLoginState(jid types.WhatsAppID, loggedIn bool) {
  125. if !mh.running {
  126. return
  127. }
  128. currentVal, ok := mh.loggedInState[jid]
  129. if !ok || currentVal != loggedIn {
  130. mh.loggedInState[jid] = loggedIn
  131. if loggedIn {
  132. mh.loggedIn.Inc()
  133. } else {
  134. mh.loggedIn.Dec()
  135. }
  136. }
  137. }
  138. func (mh *MetricsHandler) TrackConnectionState(jid types.WhatsAppID, connected bool) {
  139. if !mh.running {
  140. return
  141. }
  142. currentVal, ok := mh.connectedState[jid]
  143. if !ok || currentVal != connected {
  144. mh.connectedState[jid] = connected
  145. if connected {
  146. mh.connected.Inc()
  147. } else {
  148. mh.connected.Dec()
  149. }
  150. }
  151. }
  152. func (mh *MetricsHandler) updateStats() {
  153. start := time.Now()
  154. var puppetCount int
  155. err := mh.db.QueryRowContext(mh.ctx, "SELECT COUNT(*) FROM puppet").Scan(&puppetCount)
  156. if err != nil {
  157. mh.log.Warnln("Failed to scan number of puppets:", err)
  158. } else {
  159. mh.puppetCount.Set(float64(puppetCount))
  160. }
  161. var userCount int
  162. err = mh.db.QueryRowContext(mh.ctx, `SELECT COUNT(*) FROM "user"`).Scan(&userCount)
  163. if err != nil {
  164. mh.log.Warnln("Failed to scan number of users:", err)
  165. } else {
  166. mh.userCount.Set(float64(userCount))
  167. }
  168. var messageCount int
  169. err = mh.db.QueryRowContext(mh.ctx, "SELECT COUNT(*) FROM message").Scan(&messageCount)
  170. if err != nil {
  171. mh.log.Warnln("Failed to scan number of messages:", err)
  172. } else {
  173. mh.messageCount.Set(float64(messageCount))
  174. }
  175. var encryptedGroupCount, encryptedPrivateCount, unencryptedGroupCount, unencryptedPrivateCount int
  176. err = mh.db.QueryRowContext(mh.ctx, `
  177. SELECT
  178. COUNT(CASE WHEN jid LIKE '%@g.us' AND encrypted THEN 1 END) AS encrypted_group_portals,
  179. COUNT(CASE WHEN jid LIKE '%@s.whatsapp.net' AND encrypted THEN 1 END) AS encrypted_private_portals,
  180. COUNT(CASE WHEN jid LIKE '%@g.us' AND NOT encrypted THEN 1 END) AS unencrypted_group_portals,
  181. COUNT(CASE WHEN jid LIKE '%@s.whatsapp.net' AND NOT encrypted THEN 1 END) AS unencrypted_private_portals
  182. FROM portal WHERE mxid<>''
  183. `).Scan(&encryptedGroupCount, &encryptedPrivateCount, &unencryptedGroupCount, &unencryptedPrivateCount)
  184. if err != nil {
  185. mh.log.Warnln("Failed to scan number of portals:", err)
  186. } else {
  187. mh.encryptedGroupCount.Set(float64(encryptedGroupCount))
  188. mh.encryptedPrivateCount.Set(float64(encryptedPrivateCount))
  189. mh.unencryptedGroupCount.Set(float64(unencryptedGroupCount))
  190. mh.unencryptedPrivateCount.Set(float64(encryptedPrivateCount))
  191. }
  192. mh.countCollection.Observe(time.Now().Sub(start).Seconds())
  193. }
  194. func (mh *MetricsHandler) startUpdatingStats() {
  195. defer func() {
  196. err := recover()
  197. if err != nil {
  198. mh.log.Fatalfln("Panic in metric updater: %v\n%s", err, string(debug.Stack()))
  199. }
  200. }()
  201. ticker := time.Tick(10 * time.Second)
  202. for {
  203. mh.updateStats()
  204. select {
  205. case <-mh.ctx.Done():
  206. return
  207. case <-ticker:
  208. }
  209. }
  210. }
  211. func (mh *MetricsHandler) Start() {
  212. mh.running = true
  213. mh.ctx, mh.stopRecorder = context.WithCancel(context.Background())
  214. go mh.startUpdatingStats()
  215. err := mh.server.ListenAndServe()
  216. mh.running = false
  217. if err != nil && err != http.ErrServerClosed {
  218. mh.log.Fatalln("Error in metrics listener:", err)
  219. }
  220. }
  221. func (mh *MetricsHandler) Stop() {
  222. if !mh.running {
  223. return
  224. }
  225. mh.stopRecorder()
  226. err := mh.server.Close()
  227. if err != nil {
  228. mh.log.Errorln("Error closing metrics listener:", err)
  229. }
  230. }