parse_modules.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. from typing import List
  2. import logging
  3. from pydantic import TypeAdapter
  4. from config import LECTURER_SHORTHAND_SIZE
  5. from .models import (
  6. RawExtractedModule,
  7. ClassJsonModule,
  8. ParsedModuleCellTextData,
  9. DegreeProgram,
  10. TeachingType,
  11. Weekday,
  12. )
  13. def get_modules_for_class_json(
  14. modules: list[RawExtractedModule], class_name: str, degree_program: DegreeProgram
  15. ) -> list[ClassJsonModule]:
  16. output_modules: list[ClassJsonModule] = []
  17. for input_module in modules:
  18. parsed_data: ParsedModuleCellTextData = parse_module_cell_text(
  19. input_module.text, class_name, degree_program
  20. )
  21. output_modules.append(
  22. ClassJsonModule(
  23. weekday=input_module.weekday,
  24. module_shorthand=parsed_data.module_shorthand, # pyright: ignore
  25. start_seconds=input_module.start_seconds, # pyright: ignore
  26. end_seconds=input_module.end_seconds, # pyright: ignore
  27. degree_program=parsed_data.degree_program, # pyright: ignore
  28. class_name=class_name, # pyright: ignore
  29. rooms=parsed_data.rooms,
  30. pages=[input_module.source_page_number],
  31. part_of_other_classes=parsed_data.part_of_other_classes,
  32. teaching_type=parsed_data.teaching_type,
  33. lecturer_shorthands=parsed_data.lecturer_shortnames, # pyright: ignore
  34. id=get_id(
  35. class_name,
  36. parsed_data.module_shorthand,
  37. input_module.weekday,
  38. input_module.start_seconds,
  39. input_module.end_seconds,
  40. ),
  41. )
  42. )
  43. return output_modules
  44. def get_modules_json(modules: List[ClassJsonModule]) -> str:
  45. """
  46. Serializes a list of ClassJsonModule objects into a formatted JSON string.
  47. """
  48. adapter = TypeAdapter(List[ClassJsonModule])
  49. return adapter.dump_json(modules, by_alias=True).decode("utf-8")
  50. def parse_mixed_degree_programs(
  51. degree_program: DegreeProgram, module_shorthand: str
  52. ) -> DegreeProgram:
  53. if degree_program == DegreeProgram.MIXED_BWL_GSW_KOMM:
  54. if module_shorthand in ["bplan", "lean"]:
  55. return DegreeProgram.KONTEXT_BWL
  56. if module_shorthand in ["wisa", "aua"]:
  57. return DegreeProgram.KONTEXT_KOMM
  58. return DegreeProgram.KONTEXT_GSW
  59. return degree_program
  60. def parse_module_cell_text(
  61. text: str, class_name: str, degree_program: DegreeProgram
  62. ) -> ParsedModuleCellTextData:
  63. lines = text.split("\n")
  64. logging.debug("Parsing module cell text: \n%s", text)
  65. if len(lines) != 3 and len(lines) != 2:
  66. raise RuntimeError("Invalid Number of Lines in the cell text.")
  67. if len(lines) == 3:
  68. rooms = get_rooms(lines[2])
  69. teaching_type = get_teaching_type(lines[2])
  70. else:
  71. rooms = []
  72. teaching_type = TeachingType.ON_SITE
  73. module_shorthand = get_module_shorthand(lines[0], class_name)
  74. return ParsedModuleCellTextData(
  75. module_shorthand=module_shorthand,
  76. degree_program=parse_mixed_degree_programs(degree_program, module_shorthand),
  77. class_name=class_name,
  78. rooms=rooms,
  79. part_of_other_classes=[],
  80. teaching_type=teaching_type,
  81. lecturer_shortnames=get_lecturer_shortnames(lines[1]),
  82. )
  83. def get_lecturer_shortnames(second_line: str) -> list[str]:
  84. lecturer_shorthands: list[str] = []
  85. words = second_line.split(" ")
  86. for word in words:
  87. if len(word) == LECTURER_SHORTHAND_SIZE:
  88. lecturer_shorthands.append(word)
  89. return lecturer_shorthands
  90. def get_module_shorthand(first_line: str, class_name: str) -> str:
  91. words = first_line.split(" ")
  92. if len(words) < 1:
  93. raise RuntimeError("Cannot extract module shorthand")
  94. word = words[0]
  95. if len(words) == 1:
  96. for i in reversed(range(len(class_name) + 1)):
  97. if word.endswith(class_name[0:i]):
  98. word = word[: word.rfind(class_name[0:i])]
  99. break
  100. if len(word) == 0:
  101. raise RuntimeError("Module shorthand cannot be empty")
  102. return word
  103. def get_id(
  104. class_name: str,
  105. module_shorthand: str,
  106. weekday: Weekday,
  107. start_seconds: int,
  108. end_seconds: int,
  109. ) -> str:
  110. return (
  111. f"{class_name}-{module_shorthand}-{weekday.index}-{start_seconds}-{end_seconds}"
  112. )
  113. def get_teaching_type(third_line: str) -> TeachingType:
  114. if "Online" in third_line:
  115. return TeachingType.ONLINE
  116. return TeachingType.ON_SITE
  117. def get_rooms(third_line: str) -> list[str]:
  118. if "DSMixe" in third_line:
  119. return []
  120. words = third_line.split(" ")
  121. return words