rip_modulplaner_frontend_data.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. #!/usr/bin/env python3
  2. import os
  3. import json
  4. import logging
  5. from typing import Optional, List, Dict, Any
  6. from pathlib import Path
  7. from argparse import ArgumentParser
  8. import requests
  9. from config import (
  10. FRONTEND_RIPPER_BASE_FILES,
  11. FRONTEND_RIPPER_BASE_URL_DEFAULT,
  12. FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
  13. FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
  14. REQUESTS_TIMEOUT,
  15. )
  16. logger = logging.getLogger("modulplaner-backend.rip_frontend_data")
  17. def download_file(url: str, local_path: Path) -> bool:
  18. """
  19. Downloads a file URL and returns wether it went successfully.
  20. """
  21. try:
  22. response = requests.get(url, timeout=REQUESTS_TIMEOUT)
  23. response.raise_for_status()
  24. os.makedirs(os.path.dirname(local_path), exist_ok=True)
  25. with open(local_path, "wb") as f:
  26. f.write(response.content)
  27. logger.info("Downloaded: %s", local_path)
  28. return True
  29. except requests.exceptions.HTTPError as e:
  30. if e.response.status_code == 404:
  31. logger.warning("File not found (403): %s", url)
  32. else:
  33. logger.error("Failed to download %s: %s", url, e)
  34. return False
  35. except Exception as e:
  36. logger.error("Error downloading %s: %s", url, e)
  37. return False
  38. def get_semester_versions(
  39. base_url: str, output_dir: Path
  40. ) -> Optional[List[Dict[str, Any]]]:
  41. """
  42. Downloads and parses the semester-versions.json file.
  43. """
  44. logger.info("Fetching semester list...")
  45. if not download_file(
  46. f"{base_url}/{FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE}",
  47. output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
  48. ):
  49. logger.error("Could not download semester-versions.json. Exiting.")
  50. return None
  51. try:
  52. with open(
  53. output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE, "r", encoding="utf-8"
  54. ) as f:
  55. return json.load(f)
  56. except json.JSONDecodeError:
  57. logger.error("Error parsing semester-versions.json")
  58. return None
  59. def process_semester(semester: str, base_url: str, output_dir: Path) -> None:
  60. """
  61. Downloads files associated with a specific semester.
  62. """
  63. logger.info("Processing Semester: %s", semester)
  64. semester_level_files = ["blockclasses.json", "config.json"]
  65. for s_file in semester_level_files:
  66. download_file(f"{base_url}/{semester}/{s_file}", output_dir / semester / s_file)
  67. config_path = output_dir / semester / "config.json"
  68. if config_path.exists():
  69. try:
  70. with open(config_path, "r", encoding="utf-8") as f:
  71. config_data = json.load(f)
  72. blockclass_file = config_data.get("blockclass_file")
  73. if blockclass_file:
  74. download_file(
  75. f"{base_url}/{semester}/{blockclass_file}",
  76. output_dir / semester / blockclass_file,
  77. )
  78. except (json.JSONDecodeError, OSError) as e:
  79. logger.error("Error reading config.json for %s: %s", semester, e)
  80. def process_version(
  81. semester: str, version: str, base_url: str, output_dir: Path
  82. ) -> None:
  83. """
  84. Downloads files associated with a specific version of a semester.
  85. """
  86. version_level_files = ["classes.json", "config.json", "klassen.pdf"]
  87. for v_file in version_level_files:
  88. download_file(
  89. f"{base_url}/{semester}/{version}/{v_file}",
  90. output_dir / semester / version / v_file,
  91. )
  92. def main():
  93. parser = ArgumentParser(
  94. description="Rips all data files from a live modulplaner-frontend server."
  95. )
  96. parser.add_argument(
  97. "--base-url",
  98. help="Base URL for the data",
  99. default=FRONTEND_RIPPER_BASE_URL_DEFAULT,
  100. )
  101. parser.add_argument(
  102. "--output-dir",
  103. help="Output directory for downloaded files",
  104. default=FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
  105. )
  106. parser.add_argument(
  107. "--log-level",
  108. help="Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
  109. default="INFO",
  110. type=str.upper,
  111. choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
  112. )
  113. args = parser.parse_args()
  114. logging.basicConfig(level=args.log_level)
  115. base_url = args.base_url
  116. output_dir = Path(args.output_dir)
  117. for filename in FRONTEND_RIPPER_BASE_FILES:
  118. download_file(f"{base_url}/{filename}", output_dir / filename)
  119. semester_data = get_semester_versions(base_url, output_dir)
  120. if semester_data is None:
  121. return
  122. for item in semester_data:
  123. semester = item.get("semester")
  124. versions = item.get("versions", [])
  125. if not semester:
  126. continue
  127. process_semester(semester, base_url, output_dir)
  128. for version in versions:
  129. process_version(semester, version, base_url, output_dir)
  130. if __name__ == "__main__":
  131. main()