models.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. from enum import Enum, unique
  2. from typing import Annotated, Any
  3. from pydantic import BaseModel, PlainSerializer, Field, ConfigDict, BeforeValidator
  4. class XLevel(BaseModel):
  5. x1: float
  6. x2: float
  7. class YLevel(BaseModel):
  8. y1: float
  9. y2: float
  10. class HorizontalLine(BaseModel):
  11. x1: float
  12. x2: float
  13. y: float
  14. class Area(BaseModel):
  15. x1: float
  16. y1: float
  17. x2: float
  18. y2: float
  19. @unique
  20. class Weekday(Enum):
  21. MONTAG = ("Montag", 0)
  22. DIENSTAG = ("Dienstag", 1)
  23. MITTWOCH = ("Mittwoch", 2)
  24. DONNERSTAG = ("Donnerstag", 3)
  25. FREITAG = ("Freitag", 4)
  26. SAMSTAG = ("Samstag", 5)
  27. SONNTAG = ("Sonntag", 6)
  28. def __init__(self, display_name, index):
  29. self.display_name = display_name
  30. self.index = index
  31. @unique
  32. class SemesterType(Enum):
  33. HS = "Herbstsemester"
  34. FS = "Frühlingssemester"
  35. class Semester(BaseModel):
  36. model_config = ConfigDict(frozen=True)
  37. yyyy: int
  38. semester_type: SemesterType
  39. class Date(BaseModel):
  40. yyyy: int
  41. mm: int
  42. dd: int
  43. class Time(BaseModel):
  44. hh: int
  45. mm: int
  46. class ExportTimestamp(BaseModel):
  47. date: Date
  48. time: Time
  49. @unique
  50. class DegreeProgram(Enum):
  51. DATASCIENCE = "Data Science"
  52. ELEK_U_INFO = "Elektro- und Informationstechnik"
  53. ENER_U_UMWELT = "Energie- und Umwelttechnik"
  54. ICOMPETENCE = "iCompetence"
  55. INFORMATIK = "Informatik"
  56. KONTEXT_BWL = "Kontext BWL"
  57. KONTEXT_ENGLISCH = "Kontext Englisch"
  58. KONTEXT_GSW = "Kontext GSW"
  59. KONTEXT_KOMM = "Kontext Kommunikation"
  60. MIXED_BWL_GSW_KOMM = "Mixed BWL, GSW, Kommunikation"
  61. MASCHINENBAU = "Maschinenbau"
  62. SYSTEMTECHNIK = "Systemtechnik"
  63. WIRTSCHAFT_ING = "Wirtschaftsingenieurwesen"
  64. AGNOSTIC = "SG-???"
  65. class PageMetadata(BaseModel):
  66. semester: Semester
  67. export_timestamp: ExportTimestamp
  68. class_name: str
  69. degree_program: DegreeProgram
  70. class UnmergedTimeEntries(BaseModel):
  71. cells: list[Area]
  72. horizontal_lines: list[HorizontalLine]
  73. class TimeSlot(BaseModel):
  74. model_config = ConfigDict(frozen=True)
  75. start_time: str
  76. end_time: str
  77. def start_seconds(self) -> int:
  78. hours, minutes = map(int, self.start_time.split(":"))
  79. return hours * 3600 + minutes * 60
  80. def end_seconds(self) -> int:
  81. hours, minutes = map(int, self.end_time.split(":"))
  82. return hours * 3600 + minutes * 60
  83. def to_tuple_if_list(v: Any) -> Any:
  84. if isinstance(v, list):
  85. return tuple(v)
  86. return v
  87. # needed for pydantic to correctly parse the custom Weekday Enum
  88. TolerantWeekday = Annotated[Weekday, BeforeValidator(to_tuple_if_list)]
  89. class RawExtractedModule(BaseModel):
  90. weekday: TolerantWeekday
  91. start_seconds: int
  92. end_seconds: int
  93. text: str
  94. source_page_number: int
  95. @unique
  96. class TeachingType(Enum):
  97. ON_SITE = "on_site"
  98. ONLINE = "online"
  99. HYBRID = "hybrid"
  100. BLOCK = "blockmodule"
  101. class RawLecturer(BaseModel):
  102. """
  103. Basic representation of an extracted lecturer from a pdf that needs to be parsed.
  104. """
  105. shorthand: str
  106. firstname: str
  107. surname: str
  108. class Lecturer(BaseModel):
  109. """
  110. JSON-serializable representation of a parsed lecturer ready to be exported.
  111. """
  112. short: str
  113. surname: str
  114. firstname: str
  115. # tells pydantic to to use the index field for the special Weekday Enum
  116. CustomWeekday = Annotated[Weekday, PlainSerializer(lambda v: v.index, return_type=int)]
  117. class ParsedModuleCellTextData(BaseModel):
  118. module_shorthand: str
  119. degree_program: DegreeProgram
  120. class_name: str
  121. rooms: list[str]
  122. part_of_other_classes: list[str]
  123. teaching_type: TeachingType
  124. lecturer_shorthands: list[str]
  125. class ClassJsonModule(BaseModel):
  126. model_config = ConfigDict(populate_by_name=True)
  127. weekday: CustomWeekday
  128. module_shorthand: str = Field(..., alias="name")
  129. start_seconds: int = Field(..., alias="from")
  130. end_seconds: int = Field(..., alias="to")
  131. degree_program: DegreeProgram = Field(..., alias="degree_prg")
  132. class_name: str = Field(..., alias="class")
  133. rooms: list[str]
  134. pages: list[int]
  135. part_of_other_classes: list[str]
  136. id: str
  137. teaching_type: TeachingType
  138. lecturer_shorthands: list[str] = Field(..., alias="teachers")
  139. class ClassPdfExtractionPageData(BaseModel):
  140. raw_extracted_modules: list[RawExtractedModule]
  141. page_metadata: PageMetadata
  142. class StartsWithMatch(BaseModel):
  143. shorthand_found: str
  144. num_of_matches: int