| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- #!/usr/bin/env python3
- import os
- import json
- import logging
- from typing import Optional, List, Dict, Any
- from pathlib import Path
- from argparse import ArgumentParser
- import requests
- from config import (
- FRONTEND_RIPPER_BASE_FILES,
- FRONTEND_RIPPER_BASE_URL_DEFAULT,
- FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
- FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
- REQUESTS_TIMEOUT,
- )
- def download_file(url: str, local_path: Path) -> bool:
- """
- Downloads a file URL and returns wether it went successfully.
- """
- try:
- response = requests.get(url, timeout=REQUESTS_TIMEOUT)
- response.raise_for_status()
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
- with open(local_path, "wb") as f:
- f.write(response.content)
- logging.info("Downloaded: %s", local_path)
- return True
- except requests.exceptions.HTTPError as e:
- if e.response.status_code == 404:
- logging.warning("File not found (404): %s", url)
- else:
- logging.error("Failed to download %s: %s", url, e)
- return False
- except Exception as e:
- logging.error("Error downloading %s: %s", url, e)
- return False
- def get_semester_versions(
- base_url: str, output_dir: Path
- ) -> Optional[List[Dict[str, Any]]]:
- """
- Downloads and parses the semester-versions.json file.
- """
- logging.info("Fetching semester list...")
- if not download_file(
- f"{base_url}/{FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE}",
- output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
- ):
- logging.error("Could not download semester-versions.json. Exiting.")
- return None
- try:
- with open(
- output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE, "r", encoding="utf-8"
- ) as f:
- return json.load(f)
- except json.JSONDecodeError:
- logging.error("Error parsing semester-versions.json")
- return None
- def process_semester(semester: str, base_url: str, output_dir: Path) -> None:
- """
- Downloads files associated with a specific semester.
- """
- logging.info("Processing Semester: %s", semester)
- semester_level_files = ["blockclasses.json", "config.json"]
- for s_file in semester_level_files:
- download_file(f"{base_url}/{semester}/{s_file}", output_dir / semester / s_file)
- config_path = output_dir / semester / "config.json"
- if config_path.exists():
- try:
- with open(config_path, "r", encoding="utf-8") as f:
- config_data = json.load(f)
- blockclass_file = config_data.get("blockclass_file")
- if blockclass_file:
- download_file(
- f"{base_url}/{semester}/{blockclass_file}",
- output_dir / semester / blockclass_file,
- )
- except (json.JSONDecodeError, OSError) as e:
- logging.error("Error reading config.json for %s: %s", semester, e)
- def process_version(
- semester: str, version: str, base_url: str, output_dir: Path
- ) -> None:
- """
- Downloads files associated with a specific version of a semester.
- """
- version_level_files = ["classes.json", "config.json", "klassen.pdf"]
- for v_file in version_level_files:
- download_file(
- f"{base_url}/{semester}/{version}/{v_file}",
- output_dir / semester / version / v_file,
- )
- def main():
- parser = ArgumentParser(
- description="Rips all data files from a live modulplaner-frontend server."
- )
- parser.add_argument(
- "--base-url",
- help="Base URL for the data",
- default=FRONTEND_RIPPER_BASE_URL_DEFAULT,
- )
- parser.add_argument(
- "--output-dir",
- help="Output directory for downloaded files",
- default=FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
- )
- args = parser.parse_args()
- logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
- base_url = args.base_url
- output_dir = Path(args.output_dir)
- for filename in FRONTEND_RIPPER_BASE_FILES:
- download_file(f"{base_url}/{filename}", output_dir / filename)
- semester_data = get_semester_versions(base_url, output_dir)
- if semester_data is None:
- return
- for item in semester_data:
- semester = item.get("semester")
- versions = item.get("versions", [])
- if not semester:
- continue
- process_semester(semester, base_url, output_dir)
- for version in versions:
- process_version(semester, version, base_url, output_dir)
- if __name__ == "__main__":
- main()
|