parse_class_pdf.py 930 B

12345678910111213141516171819202122232425262728293031323334353637
  1. #!/usr/bin/env python3
  2. import logging
  3. from parse import (
  4. extract_data_from_class_pdf,
  5. get_modules_for_class_json,
  6. get_modules_json,
  7. ClassPdfExtractionPageData,
  8. ClassJsonModule,
  9. )
  10. from config import CLASS_PDF_INPUT_FILE, CLASSES_JSON_OUTPUT_FILE
  11. def main() -> None:
  12. logging.basicConfig(level=logging.DEBUG)
  13. extraction_data: list[ClassPdfExtractionPageData] = extract_data_from_class_pdf(
  14. CLASS_PDF_INPUT_FILE
  15. )
  16. parsed_modules: list[ClassJsonModule] = [
  17. module
  18. for data in extraction_data
  19. for module in get_modules_for_class_json(
  20. data.raw_extracted_modules,
  21. data.page_metadata.class_name,
  22. data.page_metadata.degree_program,
  23. )
  24. ]
  25. json: str = get_modules_json(parsed_modules)
  26. with open(CLASSES_JSON_OUTPUT_FILE, "w", encoding="utf-8") as f:
  27. f.write(json)
  28. if __name__ == "__main__":
  29. main()