rip_modulplaner_frontend_data.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #!/usr/bin/env python3
  2. import os
  3. import json
  4. import logging
  5. from typing import Optional, List, Dict, Any
  6. from pathlib import Path
  7. from argparse import ArgumentParser
  8. import requests
  9. from config import (
  10. FRONTEND_RIPPER_BASE_FILES,
  11. FRONTEND_RIPPER_BASE_URL_DEFAULT,
  12. FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
  13. FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
  14. REQUESTS_TIMEOUT,
  15. )
  16. def download_file(url: str, local_path: Path) -> bool:
  17. """
  18. Downloads a file URL and returns wether it went successfully.
  19. """
  20. try:
  21. response = requests.get(url, timeout=REQUESTS_TIMEOUT)
  22. response.raise_for_status()
  23. os.makedirs(os.path.dirname(local_path), exist_ok=True)
  24. with open(local_path, "wb") as f:
  25. f.write(response.content)
  26. logging.info("Downloaded: %s", local_path)
  27. return True
  28. except requests.exceptions.HTTPError as e:
  29. if e.response.status_code == 404:
  30. logging.warning("File not found (404): %s", url)
  31. else:
  32. logging.error("Failed to download %s: %s", url, e)
  33. return False
  34. except Exception as e:
  35. logging.error("Error downloading %s: %s", url, e)
  36. return False
  37. def get_semester_versions(
  38. base_url: str, output_dir: Path
  39. ) -> Optional[List[Dict[str, Any]]]:
  40. """
  41. Downloads and parses the semester-versions.json file.
  42. """
  43. logging.info("Fetching semester list...")
  44. if not download_file(
  45. f"{base_url}/{FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE}",
  46. output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
  47. ):
  48. logging.error("Could not download semester-versions.json. Exiting.")
  49. return None
  50. try:
  51. with open(
  52. output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE, "r", encoding="utf-8"
  53. ) as f:
  54. return json.load(f)
  55. except json.JSONDecodeError:
  56. logging.error("Error parsing semester-versions.json")
  57. return None
  58. def process_semester(semester: str, base_url: str, output_dir: Path) -> None:
  59. """
  60. Downloads files associated with a specific semester.
  61. """
  62. logging.info("Processing Semester: %s", semester)
  63. semester_level_files = ["blockclasses.json", "config.json"]
  64. for s_file in semester_level_files:
  65. download_file(f"{base_url}/{semester}/{s_file}", output_dir / semester / s_file)
  66. config_path = output_dir / semester / "config.json"
  67. if config_path.exists():
  68. try:
  69. with open(config_path, "r", encoding="utf-8") as f:
  70. config_data = json.load(f)
  71. blockclass_file = config_data.get("blockclass_file")
  72. if blockclass_file:
  73. download_file(
  74. f"{base_url}/{semester}/{blockclass_file}",
  75. output_dir / semester / blockclass_file,
  76. )
  77. except (json.JSONDecodeError, OSError) as e:
  78. logging.error("Error reading config.json for %s: %s", semester, e)
  79. def process_version(
  80. semester: str, version: str, base_url: str, output_dir: Path
  81. ) -> None:
  82. """
  83. Downloads files associated with a specific version of a semester.
  84. """
  85. version_level_files = ["classes.json", "config.json", "klassen.pdf"]
  86. for v_file in version_level_files:
  87. download_file(
  88. f"{base_url}/{semester}/{version}/{v_file}",
  89. output_dir / semester / version / v_file,
  90. )
  91. def main():
  92. parser = ArgumentParser(
  93. description="Rips all data files from a live modulplaner-frontend server."
  94. )
  95. parser.add_argument(
  96. "--base-url",
  97. help="Base URL for the data",
  98. default=FRONTEND_RIPPER_BASE_URL_DEFAULT,
  99. )
  100. parser.add_argument(
  101. "--output-dir",
  102. help="Output directory for downloaded files",
  103. default=FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
  104. )
  105. args = parser.parse_args()
  106. logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
  107. base_url = args.base_url
  108. output_dir = Path(args.output_dir)
  109. for filename in FRONTEND_RIPPER_BASE_FILES:
  110. download_file(f"{base_url}/{filename}", output_dir / filename)
  111. semester_data = get_semester_versions(base_url, output_dir)
  112. if semester_data is None:
  113. return
  114. for item in semester_data:
  115. semester = item.get("semester")
  116. versions = item.get("versions", [])
  117. if not semester:
  118. continue
  119. process_semester(semester, base_url, output_dir)
  120. for version in versions:
  121. process_version(semester, version, base_url, output_dir)
  122. if __name__ == "__main__":
  123. main()