user.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. # mautrix-instagram - A Matrix-Instagram puppeting bridge.
  2. # Copyright (C) 2023 Tulir Asokan
  3. #
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU Affero General Public License as published by
  6. # the Free Software Foundation, either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU Affero General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. from __future__ import annotations
  17. from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Awaitable, Callable, cast
  18. from datetime import datetime, timedelta
  19. from functools import partial
  20. import asyncio
  21. import logging
  22. import time
  23. from mauigpapi import AndroidAPI, AndroidMQTT, AndroidState
  24. from mauigpapi.errors import (
  25. IGChallengeError,
  26. IGCheckpointError,
  27. IGConsentRequiredError,
  28. IGNotLoggedInError,
  29. IGRateLimitError,
  30. IGUnknownError,
  31. IGUserIDNotFoundError,
  32. IrisSubscribeError,
  33. MQTTConnectionUnauthorized,
  34. MQTTNotConnected,
  35. MQTTNotLoggedIn,
  36. MQTTReconnectionError,
  37. )
  38. from mauigpapi.mqtt import (
  39. Connect,
  40. Disconnect,
  41. GraphQLSubscription,
  42. NewSequenceID,
  43. ProxyUpdate,
  44. SkywalkerSubscription,
  45. )
  46. from mauigpapi.types import (
  47. ActivityIndicatorData,
  48. CurrentUser,
  49. MessageSyncEvent,
  50. Operation,
  51. RealtimeDirectEvent,
  52. Thread,
  53. ThreadRemoveEvent,
  54. ThreadSyncEvent,
  55. TypingStatus,
  56. )
  57. from mauigpapi.types.direct_inbox import DMInbox, DMInboxResponse
  58. from mautrix.appservice import AppService
  59. from mautrix.bridge import BaseUser, async_getter_lock
  60. from mautrix.types import EventID, MessageType, RoomID, TextMessageEventContent, UserID
  61. from mautrix.util import background_task
  62. from mautrix.util.bridge_state import BridgeState, BridgeStateEvent
  63. from mautrix.util.logging import TraceLogger
  64. from mautrix.util.opt_prometheus import Gauge, Summary, async_time
  65. from mautrix.util.proxy import RETRYABLE_PROXY_EXCEPTIONS, ProxyHandler
  66. from mautrix.util.simple_lock import SimpleLock
  67. from . import portal as po, puppet as pu
  68. from .config import Config
  69. from .db import Backfill, Message as DBMessage, Portal as DBPortal, User as DBUser
  70. if TYPE_CHECKING:
  71. from .__main__ import InstagramBridge
  72. METRIC_MESSAGE = Summary("bridge_on_message", "calls to handle_message")
  73. METRIC_THREAD_SYNC = Summary("bridge_on_thread_sync", "calls to handle_thread_sync")
  74. METRIC_RTD = Summary("bridge_on_rtd", "calls to handle_rtd")
  75. METRIC_LOGGED_IN = Gauge("bridge_logged_in", "Users logged into the bridge")
  76. METRIC_CONNECTED = Gauge("bridge_connected", "Bridged users connected to Instagram")
  77. BridgeState.human_readable_errors.update(
  78. {
  79. "ig-connection-error": "Instagram disconnected unexpectedly",
  80. "ig-refresh-connection-error": "Reconnecting failed again after refresh: {message}",
  81. "ig-connection-fatal-error": "Instagram disconnected unexpectedly",
  82. "ig-auth-error": "Authentication error from Instagram: {message}, please login again to continue",
  83. "ig-checkpoint": "Instagram checkpoint error. Please check the Instagram website.",
  84. "ig-consent-required": "Instagram requires a consent update. Please check the Instagram website.",
  85. "ig-checkpoint-locked": "Instagram checkpoint error. Please check the Instagram website.",
  86. "ig-rate-limit": "Got Instagram ratelimit error, waiting a few minutes before retrying...",
  87. "ig-disconnected": None,
  88. "logged-out": "You've been logged out of instagram, please login again to continue",
  89. }
  90. )
  91. class User(DBUser, BaseUser):
  92. ig_base_log: TraceLogger = logging.getLogger("mau.instagram")
  93. _activity_indicator_ids: dict[str, int] = {}
  94. by_mxid: dict[UserID, User] = {}
  95. by_igpk: dict[int, User] = {}
  96. config: Config
  97. az: AppService
  98. loop: asyncio.AbstractEventLoop
  99. client: AndroidAPI | None
  100. mqtt: AndroidMQTT | None
  101. _listen_task: asyncio.Task | None = None
  102. _sync_lock: SimpleLock
  103. _backfill_loop_task: asyncio.Task | None
  104. _thread_sync_task: asyncio.Task | None
  105. _seq_id_save_task: asyncio.Task | None
  106. permission_level: str
  107. username: str | None
  108. _notice_room_lock: asyncio.Lock
  109. _notice_send_lock: asyncio.Lock
  110. _is_logged_in: bool
  111. _is_connected: bool
  112. shutdown: bool
  113. remote_typing_status: TypingStatus | None
  114. def __init__(
  115. self,
  116. mxid: UserID,
  117. igpk: int | None = None,
  118. state: AndroidState | None = None,
  119. notice_room: RoomID | None = None,
  120. seq_id: int | None = None,
  121. snapshot_at_ms: int | None = None,
  122. oldest_cursor: str | None = None,
  123. total_backfilled_portals: int | None = None,
  124. thread_sync_completed: bool = False,
  125. ) -> None:
  126. super().__init__(
  127. mxid=mxid,
  128. igpk=igpk,
  129. state=state,
  130. notice_room=notice_room,
  131. seq_id=seq_id,
  132. snapshot_at_ms=snapshot_at_ms,
  133. oldest_cursor=oldest_cursor,
  134. total_backfilled_portals=total_backfilled_portals,
  135. thread_sync_completed=thread_sync_completed,
  136. )
  137. BaseUser.__init__(self)
  138. self._notice_room_lock = asyncio.Lock()
  139. self._notice_send_lock = asyncio.Lock()
  140. perms = self.config.get_permissions(mxid)
  141. self.relay_whitelisted, self.is_whitelisted, self.is_admin, self.permission_level = perms
  142. self.client = None
  143. self.mqtt = None
  144. self.username = None
  145. self._is_logged_in = False
  146. self._is_connected = False
  147. self._is_refreshing = False
  148. self.shutdown = False
  149. self._sync_lock = SimpleLock(
  150. "Waiting for thread sync to finish before handling %s", log=self.log
  151. )
  152. self._listen_task = None
  153. self._thread_sync_task = None
  154. self._backfill_loop_task = None
  155. self.remote_typing_status = None
  156. self._seq_id_save_task = None
  157. self.proxy_handler = ProxyHandler(
  158. api_url=self.config["bridge.get_proxy_api_url"],
  159. )
  160. @classmethod
  161. def init_cls(cls, bridge: "InstagramBridge") -> AsyncIterable[Awaitable[None]]:
  162. cls.bridge = bridge
  163. cls.config = bridge.config
  164. cls.az = bridge.az
  165. cls.loop = bridge.loop
  166. return (user.try_connect() async for user in cls.all_logged_in())
  167. # region Connection management
  168. async def is_logged_in(self) -> bool:
  169. return bool(self.client) and self._is_logged_in
  170. async def get_puppet(self) -> pu.Puppet | None:
  171. if not self.igpk:
  172. return None
  173. return await pu.Puppet.get_by_pk(self.igpk)
  174. async def get_portal_with(self, puppet: pu.Puppet, create: bool = True) -> po.Portal | None:
  175. if not self.igpk:
  176. return None
  177. portal = await po.Portal.find_private_chat(self.igpk, puppet.pk)
  178. if portal:
  179. return portal
  180. if create:
  181. # TODO add error handling somewhere
  182. thread = await self.client.create_group_thread([puppet.pk])
  183. portal = await po.Portal.get_by_thread(thread, self.igpk)
  184. await portal.update_info(thread, self)
  185. return portal
  186. return None
  187. async def try_connect(self) -> None:
  188. try:
  189. await self.connect()
  190. except Exception as e:
  191. self.log.exception("Error while connecting to Instagram")
  192. await self.push_bridge_state(
  193. BridgeStateEvent.UNKNOWN_ERROR, info={"python_error": str(e)}
  194. )
  195. @property
  196. def api_log(self) -> TraceLogger:
  197. return self.ig_base_log.getChild("http").getChild(self.mxid)
  198. @property
  199. def is_connected(self) -> bool:
  200. return bool(self.client) and bool(self.mqtt) and self._is_connected
  201. async def ensure_connected(self, max_wait_seconds: int = 5) -> None:
  202. sleep_interval = 0.1
  203. max_attempts = max_wait_seconds / sleep_interval
  204. attempts = 0
  205. while True:
  206. if self.is_connected:
  207. return
  208. attempts += 1
  209. if attempts > max_attempts:
  210. raise Exception("You're not connected to instagram")
  211. await asyncio.sleep(sleep_interval)
  212. async def connect(self, user: CurrentUser | None = None) -> None:
  213. if not self.state:
  214. await self.push_bridge_state(
  215. BridgeStateEvent.BAD_CREDENTIALS,
  216. error="logged-out",
  217. info={"cnd_action": "reauth"},
  218. )
  219. return
  220. client = AndroidAPI(
  221. self.state,
  222. log=self.api_log,
  223. proxy_handler=self.proxy_handler,
  224. on_proxy_update=self.on_proxy_update,
  225. )
  226. if not user:
  227. try:
  228. resp = await client.current_user()
  229. user = resp.user
  230. except IGNotLoggedInError as e:
  231. self.log.warning(f"Failed to connect to Instagram: {e}, logging out")
  232. await self.logout(error=e)
  233. return
  234. except IGCheckpointError as e:
  235. self.log.debug("Checkpoint error content: %s", e.body)
  236. raise
  237. except (IGChallengeError, IGConsentRequiredError) as e:
  238. await self._handle_checkpoint(e, on="connect", client=client)
  239. return
  240. self.client = client
  241. self._is_logged_in = True
  242. self.igpk = user.pk
  243. self.username = user.username
  244. await self.push_bridge_state(BridgeStateEvent.CONNECTING)
  245. self._track_metric(METRIC_LOGGED_IN, True)
  246. self.by_igpk[self.igpk] = self
  247. self.mqtt = AndroidMQTT(
  248. self.state,
  249. log=self.ig_base_log.getChild("mqtt").getChild(self.mxid),
  250. proxy_handler=self.proxy_handler,
  251. )
  252. self.mqtt.add_event_handler(Connect, self.on_connect)
  253. self.mqtt.add_event_handler(Disconnect, self.on_disconnect)
  254. self.mqtt.add_event_handler(NewSequenceID, self.update_seq_id)
  255. self.mqtt.add_event_handler(MessageSyncEvent, self.handle_message)
  256. self.mqtt.add_event_handler(ThreadSyncEvent, self.handle_thread_sync)
  257. self.mqtt.add_event_handler(ThreadRemoveEvent, self.handle_thread_remove)
  258. self.mqtt.add_event_handler(RealtimeDirectEvent, self.handle_rtd)
  259. self.mqtt.add_event_handler(ProxyUpdate, self.on_proxy_update)
  260. await self.update()
  261. self.loop.create_task(self._try_sync_puppet(user))
  262. self.loop.create_task(self._post_connect())
  263. async def _post_connect(self):
  264. # Backfill requests are handled synchronously so as not to overload the homeserver.
  265. # Users can configure their backfill stages to be more or less aggressive with backfilling
  266. # to try and avoid getting banned.
  267. if not self._backfill_loop_task or self._backfill_loop_task.done():
  268. self._backfill_loop_task = asyncio.create_task(self._handle_backfill_requests_loop())
  269. if not self.seq_id:
  270. await self._try_sync()
  271. else:
  272. self.log.debug("Connecting to MQTT directly as resync_on_startup is false")
  273. self.start_listen()
  274. if self.config["bridge.backfill.enable"]:
  275. if self._thread_sync_task and not self._thread_sync_task.done():
  276. self.log.warning("Cancelling existing background thread sync task")
  277. self._thread_sync_task.cancel()
  278. self._thread_sync_task = asyncio.create_task(self.backfill_threads())
  279. async def _handle_backfill_requests_loop(self) -> None:
  280. if not self.config["bridge.backfill.enable"] or not self.config["bridge.backfill.msc2716"]:
  281. return
  282. while True:
  283. await self._sync_lock.wait("backfill request")
  284. req = await Backfill.get_next(self.mxid)
  285. if not req:
  286. await asyncio.sleep(30)
  287. continue
  288. self.log.info("Backfill request %s", req)
  289. try:
  290. portal = await po.Portal.get_by_thread_id(
  291. req.portal_thread_id, receiver=req.portal_receiver
  292. )
  293. await req.mark_dispatched()
  294. await portal.backfill(self, req)
  295. await req.mark_done()
  296. except IGNotLoggedInError as e:
  297. self.log.exception("User got logged out during backfill loop")
  298. await self.logout(error=e)
  299. break
  300. except (IGChallengeError, IGConsentRequiredError) as e:
  301. self.log.exception("User got a challenge during backfill loop")
  302. await self._handle_checkpoint(e, on="backfill")
  303. break
  304. except Exception as e:
  305. self.log.exception("Failed to backfill portal %s: %s", req.portal_thread_id, e)
  306. # Don't try again to backfill this portal for a minute.
  307. await req.set_cooldown_timeout(60)
  308. self._backfill_loop_task = None
  309. async def on_connect(self, evt: Connect) -> None:
  310. self.log.debug("Connected to Instagram")
  311. self._track_metric(METRIC_CONNECTED, True)
  312. self._is_connected = True
  313. await self.send_bridge_notice("Connected to Instagram")
  314. await self.push_bridge_state(BridgeStateEvent.CONNECTED)
  315. async def on_disconnect(self, evt: Disconnect) -> None:
  316. self.log.debug("Disconnected from Instagram")
  317. self._track_metric(METRIC_CONNECTED, False)
  318. self._is_connected = False
  319. async def on_proxy_update(self, evt: ProxyUpdate | None = None) -> None:
  320. if self.client:
  321. self.client.setup_http(self.state.cookies.jar)
  322. if self.mqtt:
  323. self.mqtt.setup_proxy()
  324. # TODO this stuff could probably be moved to mautrix-python
  325. async def get_notice_room(self) -> RoomID:
  326. if not self.notice_room:
  327. async with self._notice_room_lock:
  328. # If someone already created the room while this call was waiting,
  329. # don't make a new room
  330. if self.notice_room:
  331. return self.notice_room
  332. creation_content = {}
  333. if not self.config["bridge.federate_rooms"]:
  334. creation_content["m.federate"] = False
  335. self.notice_room = await self.az.intent.create_room(
  336. is_direct=True,
  337. invitees=[self.mxid],
  338. topic="Instagram bridge notices",
  339. creation_content=creation_content,
  340. )
  341. await self.update()
  342. return self.notice_room
  343. async def fill_bridge_state(self, state: BridgeState) -> None:
  344. await super().fill_bridge_state(state)
  345. if not state.remote_id:
  346. if self.igpk:
  347. state.remote_id = str(self.igpk)
  348. else:
  349. try:
  350. state.remote_id = self.state.user_id
  351. except IGUserIDNotFoundError:
  352. state.remote_id = None
  353. if self.username:
  354. state.remote_name = f"@{self.username}"
  355. async def get_bridge_states(self) -> list[BridgeState]:
  356. if not self.state:
  357. return []
  358. state = BridgeState(state_event=BridgeStateEvent.UNKNOWN_ERROR)
  359. if self.is_connected:
  360. state.state_event = BridgeStateEvent.CONNECTED
  361. elif self._is_refreshing or self.mqtt:
  362. state.state_event = BridgeStateEvent.TRANSIENT_DISCONNECT
  363. return [state]
  364. async def send_bridge_notice(
  365. self,
  366. text: str,
  367. edit: EventID | None = None,
  368. state_event: BridgeStateEvent | None = None,
  369. important: bool = False,
  370. error_code: str | None = None,
  371. error_message: str | None = None,
  372. info: dict | None = None,
  373. ) -> EventID | None:
  374. if state_event:
  375. await self.push_bridge_state(
  376. state_event,
  377. error=error_code,
  378. message=error_message if error_code else text,
  379. info=info,
  380. )
  381. if self.config["bridge.disable_bridge_notices"]:
  382. return None
  383. if not important and not self.config["bridge.unimportant_bridge_notices"]:
  384. self.log.debug("Not sending unimportant bridge notice: %s", text)
  385. return None
  386. event_id = None
  387. try:
  388. self.log.debug("Sending bridge notice: %s", text)
  389. content = TextMessageEventContent(
  390. body=text, msgtype=(MessageType.TEXT if important else MessageType.NOTICE)
  391. )
  392. if edit:
  393. content.set_edit(edit)
  394. # This is locked to prevent notices going out in the wrong order
  395. async with self._notice_send_lock:
  396. event_id = await self.az.intent.send_message(await self.get_notice_room(), content)
  397. except Exception:
  398. self.log.warning("Failed to send bridge notice", exc_info=True)
  399. return edit or event_id
  400. async def _try_sync_puppet(self, user_info: CurrentUser) -> None:
  401. puppet = await pu.Puppet.get_by_pk(self.igpk)
  402. try:
  403. await puppet.update_info(user_info, self)
  404. except Exception:
  405. self.log.exception("Failed to update own puppet info")
  406. try:
  407. if puppet.custom_mxid != self.mxid and puppet.can_auto_login(self.mxid):
  408. self.log.info("Automatically enabling custom puppet")
  409. await puppet.switch_mxid(access_token="auto", mxid=self.mxid)
  410. except Exception:
  411. self.log.exception("Failed to automatically enable custom puppet")
  412. async def _try_sync(self) -> None:
  413. try:
  414. await self.sync()
  415. except Exception as e:
  416. self.log.exception("Exception while syncing")
  417. if isinstance(e, IGCheckpointError):
  418. self.log.debug("Checkpoint error content: %s", e.body)
  419. await self.push_bridge_state(
  420. BridgeStateEvent.UNKNOWN_ERROR, info={"python_error": str(e)}
  421. )
  422. async def get_direct_chats(self) -> dict[UserID, list[RoomID]]:
  423. return {
  424. pu.Puppet.get_mxid_from_id(portal.other_user_pk): [portal.mxid]
  425. for portal in await DBPortal.find_private_chats_of(self.igpk)
  426. if portal.mxid
  427. }
  428. async def refresh(self, resync: bool = True) -> None:
  429. self._is_refreshing = True
  430. try:
  431. await self.stop_listen()
  432. self.state.reset_pigeon_session_id()
  433. if resync:
  434. retry_count = 0
  435. minutes = 1
  436. while True:
  437. try:
  438. await self.sync()
  439. return
  440. except Exception as e:
  441. if retry_count >= 4 and minutes < 10:
  442. minutes += 1
  443. retry_count += 1
  444. s = "s" if minutes != 1 else ""
  445. self.log.exception(
  446. f"Error while syncing for refresh, retrying in {minutes} minute{s}"
  447. )
  448. if isinstance(e, IGCheckpointError):
  449. self.log.debug("Checkpoint error content: %s", e.body)
  450. await self.push_bridge_state(
  451. BridgeStateEvent.UNKNOWN_ERROR,
  452. error="unknown-error",
  453. message="An unknown error occurred while connecting to Instagram",
  454. info={"python_error": str(e)},
  455. )
  456. await asyncio.sleep(minutes * 60)
  457. else:
  458. self.start_listen()
  459. finally:
  460. self._is_refreshing = False
  461. async def _handle_checkpoint(
  462. self,
  463. e: IGChallengeError | IGConsentRequiredError,
  464. on: str,
  465. client: AndroidAPI | None = None,
  466. ) -> None:
  467. self.log.warning(f"Got checkpoint error on {on}: {e.body.serialize()}")
  468. client = client or self.client
  469. self.client = None
  470. self.mqtt = None
  471. if isinstance(e, IGConsentRequiredError):
  472. await self.push_bridge_state(
  473. BridgeStateEvent.BAD_CREDENTIALS,
  474. error="ig-consent-required",
  475. info=e.body.serialize(),
  476. )
  477. return
  478. error_code = "ig-checkpoint"
  479. try:
  480. resp = await client.challenge_reset()
  481. info = {
  482. "challenge_context": (
  483. resp.challenge_context.serialize() if resp.challenge_context_str else None
  484. ),
  485. "step_name": resp.step_name,
  486. "step_data": resp.step_data.serialize() if resp.step_data else None,
  487. "user_id": resp.user_id,
  488. "action": resp.action,
  489. "status": resp.status,
  490. "challenge": e.body.challenge.serialize() if e.body.challenge else None,
  491. }
  492. self.log.debug(f"Challenge state: {resp.serialize()}")
  493. if resp.challenge_context.challenge_type_enum == "HACKED_LOCK":
  494. error_code = "ig-checkpoint-locked"
  495. except Exception:
  496. self.log.exception("Error resetting challenge state")
  497. info = {"challenge": e.body.challenge.serialize() if e.body.challenge else None}
  498. await self.push_bridge_state(BridgeStateEvent.BAD_CREDENTIALS, error=error_code, info=info)
  499. async def _sync_thread(self, thread: Thread) -> bool:
  500. """
  501. Sync a specific thread. Returns whether the thread had messages after the last message in
  502. the database before the sync.
  503. """
  504. self.log.debug(f"Syncing thread {thread.thread_id}")
  505. forward_messages = thread.items
  506. assert self.client
  507. portal = await po.Portal.get_by_thread(thread, self.igpk)
  508. assert portal
  509. # Create or update the Matrix room
  510. if not portal.mxid:
  511. await portal.create_matrix_room(self, thread)
  512. else:
  513. await portal.update_matrix_room(self, thread)
  514. if not self.config["bridge.backfill.enable_initial"]:
  515. return True
  516. last_message = await DBMessage.get_last(portal.mxid)
  517. cursor = thread.oldest_cursor
  518. if last_message:
  519. original_number_of_messages = len(thread.items)
  520. new_messages = [
  521. m for m in thread.items if last_message.ig_timestamp_ms < m.timestamp_ms
  522. ]
  523. forward_messages = new_messages
  524. portal.log.debug(
  525. f"{len(new_messages)}/{original_number_of_messages} messages are after most recent"
  526. " message."
  527. )
  528. # Fetch more messages until we get back to messages that have been bridged already.
  529. while len(new_messages) > 0 and len(new_messages) == original_number_of_messages:
  530. await asyncio.sleep(self.config["bridge.backfill.incremental.page_delay"])
  531. portal.log.debug("Fetching more messages for forward backfill")
  532. resp = await self.client.get_thread(portal.thread_id, cursor=cursor)
  533. if len(resp.thread.items) == 0:
  534. break
  535. original_number_of_messages = len(resp.thread.items)
  536. new_messages = [
  537. m for m in resp.thread.items if last_message.ig_timestamp_ms < m.timestamp_ms
  538. ]
  539. forward_messages = new_messages + forward_messages
  540. cursor = resp.thread.oldest_cursor
  541. portal.log.debug(
  542. f"{len(new_messages)}/{original_number_of_messages} messages are after most "
  543. "recent message."
  544. )
  545. elif not portal.first_event_id:
  546. self.log.debug(
  547. f"Skipping backfilling {portal.thread_id} as the first event ID is not known"
  548. )
  549. return False
  550. if forward_messages:
  551. portal.cursor = cursor
  552. await portal.update()
  553. mark_read = thread.read_state == 0 or (
  554. (hours := self.config["bridge.backfill.unread_hours_threshold"]) > 0
  555. and (
  556. datetime.fromtimestamp(forward_messages[0].timestamp_ms / 1000)
  557. < datetime.now() - timedelta(hours=hours)
  558. )
  559. )
  560. base_insertion_event_id = await portal.backfill_message_page(
  561. self,
  562. list(reversed(forward_messages)),
  563. forward=True,
  564. last_message=last_message,
  565. mark_read=mark_read,
  566. )
  567. if (
  568. not self.bridge.homeserver_software.is_hungry
  569. and self.config["bridge.backfill.msc2716"]
  570. ):
  571. await portal.send_post_backfill_dummy(
  572. forward_messages[0].timestamp, base_insertion_event_id=base_insertion_event_id
  573. )
  574. if (
  575. mark_read
  576. and not self.bridge.homeserver_software.is_hungry
  577. and (puppet := await self.get_puppet())
  578. ):
  579. last_message = await DBMessage.get_last(portal.mxid)
  580. if last_message:
  581. await puppet.intent_for(portal).mark_read(portal.mxid, last_message.mxid)
  582. await portal._update_read_receipts(thread.last_seen_at)
  583. if self.config["bridge.backfill.msc2716"]:
  584. await portal.enqueue_immediate_backfill(self, 1)
  585. return len(forward_messages) > 0
  586. async def sync(self, increment_total_backfilled_portals: bool = False) -> None:
  587. await self.run_with_sync_lock(partial(self._sync, increment_total_backfilled_portals))
  588. async def _sync(self, increment_total_backfilled_portals: bool = False) -> None:
  589. if not self._listen_task:
  590. self.state.reset_pigeon_session_id()
  591. sleep_minutes = 2
  592. while True:
  593. try:
  594. resp = await self.client.get_inbox()
  595. break
  596. except IGNotLoggedInError as e:
  597. self.log.exception("Got not logged in error while syncing")
  598. await self.logout(error=e)
  599. return
  600. except IGRateLimitError as e:
  601. self.log.error(
  602. "Got ratelimit error while trying to get inbox (%s), retrying in %d minutes",
  603. e.body,
  604. sleep_minutes,
  605. )
  606. await self.push_bridge_state(
  607. BridgeStateEvent.TRANSIENT_DISCONNECT, error="ig-rate-limit"
  608. )
  609. await asyncio.sleep(sleep_minutes * 60)
  610. sleep_minutes += 2
  611. except IGCheckpointError as e:
  612. self.log.debug("Checkpoint error content: %s", e.body)
  613. raise
  614. except (IGChallengeError, IGConsentRequiredError) as e:
  615. await self._handle_checkpoint(e, on="sync")
  616. return
  617. self.seq_id = resp.seq_id
  618. self.snapshot_at_ms = resp.snapshot_at_ms
  619. await self.save_seq_id()
  620. if not self._listen_task:
  621. self.start_listen(is_after_sync=True)
  622. sync_count = min(
  623. self.config["bridge.backfill.max_conversations"],
  624. self.config["bridge.max_startup_thread_sync_count"],
  625. )
  626. self.log.debug(f"Fetching {sync_count} threads, 20 at a time...")
  627. local_limit: int | None = sync_count
  628. if sync_count == 0:
  629. return
  630. elif sync_count < 0:
  631. local_limit = None
  632. await self._sync_threads_with_delay(
  633. self.client.iter_inbox(
  634. self._update_seq_id_and_cursor, start_at=resp, local_limit=local_limit
  635. ),
  636. stop_when_threads_have_no_messages_to_backfill=True,
  637. increment_total_backfilled_portals=increment_total_backfilled_portals,
  638. local_limit=local_limit,
  639. )
  640. try:
  641. await self.update_direct_chats()
  642. except Exception:
  643. self.log.exception("Error updating direct chat list")
  644. async def backfill_threads(self):
  645. try:
  646. await self.run_with_sync_lock(self._backfill_threads)
  647. except Exception:
  648. self.log.exception("Error in thread backfill loop")
  649. async def _backfill_threads(self):
  650. assert self.client
  651. if not self.config["bridge.backfill.enable"]:
  652. return
  653. max_conversations = self.config["bridge.backfill.max_conversations"] or 0
  654. if 0 <= max_conversations <= (self.total_backfilled_portals or 0):
  655. self.log.info("Backfill max_conversations count reached, not syncing any more portals")
  656. return
  657. elif self.thread_sync_completed:
  658. self.log.debug("Thread backfill is marked as completed, not syncing more portals")
  659. return
  660. local_limit = (
  661. max_conversations - (self.total_backfilled_portals or 0)
  662. if max_conversations >= 0
  663. else None
  664. )
  665. start_at = None
  666. if self.oldest_cursor:
  667. start_at = DMInboxResponse(
  668. status="",
  669. seq_id=self.seq_id,
  670. snapshot_at_ms=0,
  671. pending_requests_total=0,
  672. has_pending_top_requests=False,
  673. viewer=None,
  674. inbox=DMInbox(
  675. threads=[],
  676. has_older=True,
  677. unseen_count=0,
  678. unseen_count_ts=0,
  679. blended_inbox_enabled=False,
  680. oldest_cursor=self.oldest_cursor,
  681. ),
  682. )
  683. backoff = self.config.get("bridge.backfill.backoff.thread_list", 300)
  684. await self._sync_threads_with_delay(
  685. self.client.iter_inbox(
  686. self._update_seq_id_and_cursor,
  687. start_at=start_at,
  688. local_limit=local_limit,
  689. rate_limit_exceeded_backoff=backoff,
  690. ),
  691. increment_total_backfilled_portals=True,
  692. local_limit=local_limit,
  693. )
  694. await self.update_direct_chats()
  695. def _update_seq_id_and_cursor(self, seq_id: int, cursor: str | None):
  696. self.seq_id = seq_id
  697. if cursor:
  698. self.oldest_cursor = cursor
  699. async def _sync_threads_with_delay(
  700. self,
  701. threads: AsyncIterable[Thread],
  702. increment_total_backfilled_portals: bool = False,
  703. stop_when_threads_have_no_messages_to_backfill: bool = False,
  704. local_limit: int | None = None,
  705. ):
  706. sync_delay = self.config["bridge.backfill.min_sync_thread_delay"]
  707. last_thread_sync_ts = 0.0
  708. found_thread_count = 0
  709. async for thread in threads:
  710. found_thread_count += 1
  711. now = time.monotonic()
  712. if now < last_thread_sync_ts + sync_delay:
  713. delay = last_thread_sync_ts + sync_delay - now
  714. self.log.debug("Thread sync is happening too quickly. Waiting for %ds", delay)
  715. await asyncio.sleep(delay)
  716. last_thread_sync_ts = time.monotonic()
  717. had_new_messages = await self._sync_thread(thread)
  718. if not had_new_messages and stop_when_threads_have_no_messages_to_backfill:
  719. self.log.debug("Got to threads with no new messages. Stopping sync.")
  720. return
  721. if increment_total_backfilled_portals:
  722. self.total_backfilled_portals = (self.total_backfilled_portals or 0) + 1
  723. await self.update()
  724. if local_limit is None or found_thread_count < local_limit:
  725. if local_limit is None:
  726. self.log.info(
  727. "Reached end of thread list with no limit, marking thread sync as completed"
  728. )
  729. else:
  730. self.log.info(
  731. f"Reached end of thread list (got {found_thread_count} with "
  732. f"limit {local_limit}), marking thread sync as completed"
  733. )
  734. self.thread_sync_completed = True
  735. await self.update()
  736. async def run_with_sync_lock(self, func: Callable[[], Awaitable]):
  737. with self._sync_lock:
  738. retry_count = 0
  739. while retry_count < 5:
  740. try:
  741. retry_count += 1
  742. await func()
  743. # The sync was successful. Exit the loop.
  744. return
  745. except IGNotLoggedInError as e:
  746. await self.logout(error=e)
  747. return
  748. except Exception:
  749. self.log.exception(
  750. "Failed to sync threads. Waiting 30 seconds before retrying sync."
  751. )
  752. await asyncio.sleep(30)
  753. # If we get here, it means that the sync has failed five times. If this happens, most
  754. # likely something very bad has happened.
  755. self.log.error("Failed to sync threads five times. Will not retry.")
  756. def start_listen(self, is_after_sync: bool = False) -> None:
  757. self.shutdown = False
  758. task = self._listen(
  759. seq_id=self.seq_id, snapshot_at_ms=self.snapshot_at_ms, is_after_sync=is_after_sync
  760. )
  761. self._listen_task = self.loop.create_task(task)
  762. async def delayed_start_listen(self, sleep: int) -> None:
  763. await asyncio.sleep(sleep)
  764. if self.is_connected:
  765. self.log.debug(
  766. "Already reconnected before delay after MQTT reconnection error finished"
  767. )
  768. else:
  769. self.log.debug("Reconnecting after MQTT connection error")
  770. self.start_listen()
  771. async def fetch_user_and_reconnect(self, sleep_first: int | None = None) -> None:
  772. if sleep_first:
  773. await asyncio.sleep(sleep_first)
  774. if self.is_connected:
  775. self.log.debug("Canceling user fetch, already reconnected")
  776. return
  777. self.log.debug("Refetching current user after disconnection")
  778. errors = 0
  779. while True:
  780. try:
  781. resp = await self.client.current_user()
  782. except RETRYABLE_PROXY_EXCEPTIONS as e:
  783. # These are retried by the client up to 10 times, but we actually want to retry
  784. # these indefinitely so we capture them here again and retry.
  785. self.log.warning(
  786. f"Proxy error fetching user from Instagram: {e}, retrying in 1 minute",
  787. )
  788. await asyncio.sleep(60)
  789. except IGNotLoggedInError as e:
  790. self.log.warning(f"Failed to reconnect to Instagram: {e}, logging out")
  791. await self.logout(error=e)
  792. return
  793. except (IGChallengeError, IGConsentRequiredError) as e:
  794. await self._handle_checkpoint(e, on="reconnect")
  795. return
  796. except IGUnknownError as e:
  797. if "non-JSON body" not in e:
  798. raise
  799. errors += 1
  800. if errors > 10:
  801. raise
  802. self.log.warning(
  803. "Non-JSON body while trying to check user for reconnection, retrying in 10s"
  804. )
  805. await asyncio.sleep(10)
  806. except Exception as e:
  807. self.log.exception("Error while reconnecting to Instagram")
  808. if isinstance(e, IGCheckpointError):
  809. self.log.debug("Checkpoint error content: %s", e.body)
  810. await self.push_bridge_state(
  811. BridgeStateEvent.UNKNOWN_ERROR, info={"python_error": str(e)}
  812. )
  813. return
  814. else:
  815. self.log.debug(f"Confirmed current user {resp.user.pk}")
  816. self.start_listen()
  817. return
  818. async def _listen(self, seq_id: int, snapshot_at_ms: int, is_after_sync: bool) -> None:
  819. try:
  820. await self.mqtt.listen(
  821. graphql_subs={
  822. GraphQLSubscription.app_presence(),
  823. GraphQLSubscription.direct_typing(self.state.user_id),
  824. GraphQLSubscription.direct_status(),
  825. },
  826. skywalker_subs={
  827. SkywalkerSubscription.direct_sub(self.state.user_id),
  828. SkywalkerSubscription.live_sub(self.state.user_id),
  829. },
  830. seq_id=seq_id,
  831. snapshot_at_ms=snapshot_at_ms,
  832. )
  833. except IrisSubscribeError as e:
  834. if is_after_sync:
  835. self.log.exception("Got IrisSubscribeError right after refresh")
  836. await self.send_bridge_notice(
  837. f"Reconnecting failed again after refresh: {e}",
  838. important=True,
  839. state_event=BridgeStateEvent.UNKNOWN_ERROR,
  840. error_code="ig-refresh-connection-error",
  841. error_message=str(e),
  842. info={"python_error": str(e)},
  843. )
  844. else:
  845. self.log.warning(f"Got IrisSubscribeError {e}, refreshing...")
  846. background_task.create(self.refresh())
  847. except MQTTReconnectionError as e:
  848. self.log.warning(
  849. f"Unexpected connection error: {e}, reconnecting in 1 minute", exc_info=True
  850. )
  851. await self.send_bridge_notice(
  852. f"Error in listener: {e}",
  853. important=True,
  854. state_event=BridgeStateEvent.TRANSIENT_DISCONNECT,
  855. error_code="ig-connection-error-socket",
  856. )
  857. self.mqtt.disconnect()
  858. background_task.create(self.delayed_start_listen(sleep=60))
  859. except (MQTTNotConnected, MQTTNotLoggedIn, MQTTConnectionUnauthorized) as e:
  860. self.log.warning(f"Unexpected connection error: {e}, checking auth and reconnecting")
  861. await self.send_bridge_notice(
  862. f"Error in listener: {e}",
  863. important=True,
  864. state_event=BridgeStateEvent.TRANSIENT_DISCONNECT,
  865. error_code="ig-connection-error-maybe-auth",
  866. )
  867. self.mqtt.disconnect()
  868. background_task.create(self.fetch_user_and_reconnect())
  869. except Exception as e:
  870. self.log.exception("Fatal error in listener, reconnecting in 5 minutes")
  871. await self.send_bridge_notice(
  872. "Fatal error in listener (see logs for more info)",
  873. state_event=BridgeStateEvent.UNKNOWN_ERROR,
  874. important=True,
  875. error_code="ig-unknown-connection-error",
  876. info={"python_error": str(e)},
  877. )
  878. self.mqtt.disconnect()
  879. background_task.create(self.fetch_user_and_reconnect(sleep_first=300))
  880. else:
  881. if not self.shutdown:
  882. await self.send_bridge_notice(
  883. "Instagram connection closed without error",
  884. state_event=BridgeStateEvent.UNKNOWN_ERROR,
  885. error_code="ig-disconnected",
  886. )
  887. finally:
  888. self._listen_task = None
  889. self._is_connected = False
  890. self._track_metric(METRIC_CONNECTED, False)
  891. async def stop_listen(self) -> None:
  892. if self.mqtt:
  893. self.shutdown = True
  894. self.mqtt.disconnect()
  895. if self._listen_task:
  896. await self._listen_task
  897. self.shutdown = False
  898. self._track_metric(METRIC_CONNECTED, False)
  899. self._is_connected = False
  900. await self.update()
  901. def stop_backfill_tasks(self) -> None:
  902. if self._backfill_loop_task:
  903. self._backfill_loop_task.cancel()
  904. self._backfill_loop_task = None
  905. if self._thread_sync_task:
  906. self._thread_sync_task.cancel()
  907. self._thread_sync_task = None
  908. async def logout(self, error: IGNotLoggedInError | None = None) -> None:
  909. await self.stop_listen()
  910. self.stop_backfill_tasks()
  911. if self.client and error is None:
  912. try:
  913. await self.client.logout(one_tap_app_login=False)
  914. except Exception:
  915. self.log.debug("Exception logging out", exc_info=True)
  916. if self.mqtt:
  917. self.mqtt.disconnect()
  918. self._track_metric(METRIC_CONNECTED, False)
  919. self._track_metric(METRIC_LOGGED_IN, False)
  920. if error is None:
  921. await self.push_bridge_state(BridgeStateEvent.LOGGED_OUT)
  922. puppet = await pu.Puppet.get_by_pk(self.igpk, create=False)
  923. if puppet and puppet.is_real_user:
  924. await puppet.switch_mxid(None, None)
  925. try:
  926. del self.by_igpk[self.igpk]
  927. except KeyError:
  928. pass
  929. self.igpk = None
  930. else:
  931. self.log.debug("Auth error body: %s", error.body.serialize())
  932. await self.send_bridge_notice(
  933. f"You have been logged out of Instagram: {error.proper_message}",
  934. important=True,
  935. state_event=BridgeStateEvent.BAD_CREDENTIALS,
  936. error_code="ig-auth-error",
  937. error_message=error.proper_message,
  938. info={"cnd_action": "reauth"},
  939. )
  940. self.client = None
  941. self.mqtt = None
  942. self.state = None
  943. self.seq_id = None
  944. self.snapshot_at_ms = None
  945. self.thread_sync_completed = False
  946. self._is_logged_in = False
  947. await self.update()
  948. # endregion
  949. # region Event handlers
  950. async def _save_seq_id_after_sleep(self) -> None:
  951. await asyncio.sleep(120)
  952. self._seq_id_save_task = None
  953. self.log.trace("Saving sequence ID %d/%d", self.seq_id, self.snapshot_at_ms)
  954. try:
  955. await self.save_seq_id()
  956. except Exception:
  957. self.log.exception("Error saving sequence ID")
  958. async def update_seq_id(self, evt: NewSequenceID) -> None:
  959. self.seq_id = evt.seq_id
  960. self.snapshot_at_ms = evt.snapshot_at_ms
  961. if not self._seq_id_save_task or self._seq_id_save_task.done():
  962. self.log.trace("Starting seq id save task (%d/%d)", evt.seq_id, evt.snapshot_at_ms)
  963. self._seq_id_save_task = asyncio.create_task(self._save_seq_id_after_sleep())
  964. else:
  965. self.log.trace("Not starting seq id save task (%d/%d)", evt.seq_id, evt.snapshot_at_ms)
  966. @async_time(METRIC_MESSAGE)
  967. async def handle_message(self, evt: MessageSyncEvent) -> None:
  968. portal = await po.Portal.get_by_thread_id(evt.message.thread_id, receiver=self.igpk)
  969. if not portal or not portal.mxid:
  970. self.log.debug("Got message in thread with no portal, getting info...")
  971. resp = await self.client.get_thread(evt.message.thread_id)
  972. portal = await po.Portal.get_by_thread(resp.thread, self.igpk)
  973. self.log.debug("Got info for unknown portal, creating room")
  974. await portal.create_matrix_room(self, resp.thread)
  975. if not portal.mxid:
  976. self.log.warning(
  977. "Room creation appears to have failed, "
  978. f"dropping message in {evt.message.thread_id}"
  979. )
  980. return
  981. self.log.trace(f"Received message sync event {evt.message}")
  982. if evt.message.new_reaction:
  983. await portal.handle_instagram_reaction(
  984. evt.message, remove=evt.message.op == Operation.REMOVE
  985. )
  986. return
  987. sender = await pu.Puppet.get_by_pk(evt.message.user_id) if evt.message.user_id else None
  988. if evt.message.op == Operation.ADD:
  989. if not sender:
  990. # I don't think we care about adds with no sender
  991. return
  992. await portal.handle_instagram_item(self, sender, evt.message)
  993. elif evt.message.op == Operation.REMOVE:
  994. # Removes don't have a sender, only the message sender can unsend messages anyway
  995. await portal.handle_instagram_remove(evt.message.item_id)
  996. elif evt.message.op == Operation.REPLACE:
  997. await portal.handle_instagram_update(evt.message)
  998. @async_time(METRIC_THREAD_SYNC)
  999. async def handle_thread_sync(self, evt: ThreadSyncEvent) -> None:
  1000. self.log.trace("Thread sync event content: %s", evt)
  1001. portal = await po.Portal.get_by_thread(evt, receiver=self.igpk)
  1002. if portal.mxid:
  1003. self.log.debug("Got thread sync event for %s with existing portal", portal.thread_id)
  1004. await portal.update_matrix_room(self, evt)
  1005. elif evt.is_group:
  1006. self.log.debug(
  1007. "Got thread sync event for group %s without existing portal, creating room",
  1008. portal.thread_id,
  1009. )
  1010. await portal.create_matrix_room(self, evt)
  1011. else:
  1012. self.log.debug(
  1013. "Got thread sync event for DM %s without existing portal, ignoring",
  1014. portal.thread_id,
  1015. )
  1016. async def handle_thread_remove(self, evt: ThreadRemoveEvent) -> None:
  1017. self.log.debug("Got thread remove event: %s", evt.serialize())
  1018. @async_time(METRIC_RTD)
  1019. async def handle_rtd(self, evt: RealtimeDirectEvent) -> None:
  1020. if not isinstance(evt.value, ActivityIndicatorData):
  1021. return
  1022. now = int(time.time() * 1000)
  1023. date = evt.value.timestamp_ms
  1024. expiry = date + evt.value.ttl
  1025. if expiry < now:
  1026. return
  1027. if evt.activity_indicator_id in self._activity_indicator_ids:
  1028. return
  1029. # TODO clear expired items from this dict
  1030. self._activity_indicator_ids[evt.activity_indicator_id] = expiry
  1031. puppet = await pu.Puppet.get_by_pk(int(evt.value.sender_id))
  1032. portal = await po.Portal.get_by_thread_id(evt.thread_id, receiver=self.igpk)
  1033. if not puppet or not portal or not portal.mxid:
  1034. return
  1035. is_typing = evt.value.activity_status != TypingStatus.OFF
  1036. if puppet.pk == self.igpk:
  1037. self.remote_typing_status = TypingStatus.TEXT if is_typing else TypingStatus.OFF
  1038. await puppet.intent_for(portal).set_typing(portal.mxid, timeout=evt.value.ttl)
  1039. # endregion
  1040. # region Database getters
  1041. def _add_to_cache(self) -> None:
  1042. self.by_mxid[self.mxid] = self
  1043. if self.igpk:
  1044. self.by_igpk[self.igpk] = self
  1045. @classmethod
  1046. @async_getter_lock
  1047. async def get_by_mxid(cls, mxid: UserID, *, create: bool = True) -> User | None:
  1048. # Never allow ghosts to be users
  1049. if pu.Puppet.get_id_from_mxid(mxid):
  1050. return None
  1051. try:
  1052. return cls.by_mxid[mxid]
  1053. except KeyError:
  1054. pass
  1055. user = cast(cls, await super().get_by_mxid(mxid))
  1056. if user is not None:
  1057. user._add_to_cache()
  1058. return user
  1059. if create:
  1060. user = cls(mxid)
  1061. await user.insert()
  1062. user._add_to_cache()
  1063. return user
  1064. return None
  1065. @classmethod
  1066. @async_getter_lock
  1067. async def get_by_igpk(cls, igpk: int) -> User | None:
  1068. try:
  1069. return cls.by_igpk[igpk]
  1070. except KeyError:
  1071. pass
  1072. user = cast(cls, await super().get_by_igpk(igpk))
  1073. if user is not None:
  1074. user._add_to_cache()
  1075. return user
  1076. return None
  1077. @classmethod
  1078. async def all_logged_in(cls) -> AsyncGenerator[User, None]:
  1079. users = await super().all_logged_in()
  1080. user: cls
  1081. for index, user in enumerate(users):
  1082. try:
  1083. yield cls.by_mxid[user.mxid]
  1084. except KeyError:
  1085. user._add_to_cache()
  1086. yield user
  1087. # endregion