models.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. from enum import Enum, unique
  2. from dataclasses import dataclass
  3. from typing import Annotated
  4. from pydantic import BaseModel, PlainSerializer, Field, ConfigDict
  5. @dataclass
  6. class XLevel:
  7. x1: float
  8. x2: float
  9. @dataclass
  10. class YLevel:
  11. y1: float
  12. y2: float
  13. @dataclass
  14. class HorizontalLine:
  15. x1: float
  16. x2: float
  17. y: float
  18. @dataclass
  19. class Area:
  20. x1: float
  21. y1: float
  22. x2: float
  23. y2: float
  24. @unique
  25. class Weekday(Enum):
  26. MONTAG = ("Montag", 0)
  27. DIENSTAG = ("Dienstag", 1)
  28. MITTWOCH = ("Mittwoch", 2)
  29. DONNERSTAG = ("Donnerstag", 3)
  30. FREITAG = ("Freitag", 4)
  31. SAMSTAG = ("Samstag", 5)
  32. SONNTAG = ("Sonntag", 6)
  33. def __init__(self, display_name, index):
  34. self.display_name = display_name
  35. self.index = index
  36. @unique
  37. class SemesterType(Enum):
  38. HS = "Herbstsemester"
  39. FS = "Frühlingssemester"
  40. @dataclass(frozen=True)
  41. class Semester:
  42. yyyy: int
  43. semester_type: SemesterType
  44. @dataclass
  45. class Date:
  46. yyyy: int
  47. mm: int
  48. dd: int
  49. @dataclass
  50. class Time:
  51. hh: int
  52. mm: int
  53. @dataclass
  54. class ExportTimestamp:
  55. date: Date
  56. time: Time
  57. @unique
  58. class DegreeProgram(Enum):
  59. DATASCIENCE = "Data Science"
  60. ELEK_U_INFO = "Elektro- und Informationstechnik"
  61. ENER_U_UMWELT = "Energie- und Umwelttechnik"
  62. ICOMPETENCE = "iCompetence"
  63. INFORMATIK = "Informatik"
  64. KONTEXT_BWL = "Kontext BWL"
  65. KONTEXT_ENGLISCH = "Kontext Englisch"
  66. KONTEXT_GSW = "Kontext GSW"
  67. KONTEXT_KOMM = "Kontext Kommunikation"
  68. MIXED_BWL_GSW_KOMM = "Mixed BWL, GSW, Kommunikation"
  69. MASCHINENBAU = "Maschinenbau"
  70. SYSTEMTECHNIK = "Systemtechnik"
  71. WIRTSCHAFT_ING = "Wirtschaftsingenieurwesen"
  72. AGNOSTIC = "agnostic"
  73. @dataclass
  74. class PageMetadata:
  75. semester: Semester
  76. export_timestamp: ExportTimestamp
  77. class_name: str
  78. degree_program: DegreeProgram
  79. @dataclass
  80. class UnmergedTimeEntries:
  81. cells: list[Area]
  82. horizontal_lines: list[HorizontalLine]
  83. @dataclass(frozen=True)
  84. class TimeSlot:
  85. start_time: str
  86. end_time: str
  87. def start_seconds(self) -> int:
  88. hours, minutes = map(int, self.start_time.split(":"))
  89. return hours * 3600 + minutes * 60
  90. def end_seconds(self) -> int:
  91. hours, minutes = map(int, self.end_time.split(":"))
  92. return hours * 3600 + minutes * 60
  93. @dataclass
  94. class RawExtractedModule:
  95. weekday: Weekday
  96. start_seconds: int
  97. end_seconds: int
  98. text: str
  99. source_page_number: int
  100. @unique
  101. class TeachingType(Enum):
  102. ON_SITE = "on_site"
  103. ONLINE = "online"
  104. HYBRID = "hybrid"
  105. BLOCK = "blockmodule"
  106. @dataclass
  107. class RawLecturer:
  108. """
  109. Basic representation of an extracted lecturer from a pdf that needs to be parsed.
  110. """
  111. shorthand: str
  112. firstname: str
  113. surname: str
  114. class Lecturer(BaseModel):
  115. """
  116. JSON-serializable representation of a parsed lecturer ready to be exported.
  117. """
  118. short: str
  119. surname: str
  120. firstname: str
  121. # tells pydantic to to use the index field for the special Weekday Enum
  122. CustomWeekday = Annotated[Weekday, PlainSerializer(lambda v: v.index, return_type=int)]
  123. @dataclass
  124. class ParsedModuleCellTextData:
  125. module_shorthand: str
  126. degree_program: DegreeProgram
  127. class_name: str
  128. rooms: list[str]
  129. part_of_other_classes: list[str]
  130. teaching_type: TeachingType
  131. lecturer_shortnames: list[str]
  132. class ClassJsonModule(BaseModel):
  133. model_config = ConfigDict(populate_by_name=True)
  134. weekday: CustomWeekday
  135. module_shorthand: str = Field(..., alias="name")
  136. start_seconds: int = Field(..., alias="from")
  137. end_seconds: int = Field(..., alias="to")
  138. degree_program: DegreeProgram = Field(..., alias="degree_prg")
  139. class_name: str = Field(..., alias="class")
  140. rooms: list[str]
  141. pages: list[int]
  142. part_of_other_classes: list[str]
  143. id: str
  144. teaching_type: TeachingType
  145. lecturer_shorthands: list[str] = Field(..., alias="teachers")
  146. @dataclass
  147. class ClassPdfExtractionPageData:
  148. raw_extracted_modules: list[RawExtractedModule]
  149. page_metadata: PageMetadata
  150. @dataclass
  151. class StartsWithMatch:
  152. shorthand_found: str
  153. num_of_matches: int