| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- from enum import Enum, unique
- from dataclasses import dataclass
- from typing import Annotated
- from pydantic import BaseModel, PlainSerializer, Field, ConfigDict
- @dataclass
- class XLevel:
- x1: float
- x2: float
- @dataclass
- class YLevel:
- y1: float
- y2: float
- @dataclass
- class HorizontalLine:
- x1: float
- x2: float
- y: float
- @dataclass
- class Area:
- x1: float
- y1: float
- x2: float
- y2: float
- @unique
- class Weekday(Enum):
- MONTAG = ("Montag", 0)
- DIENSTAG = ("Dienstag", 1)
- MITTWOCH = ("Mittwoch", 2)
- DONNERSTAG = ("Donnerstag", 3)
- FREITAG = ("Freitag", 4)
- SAMSTAG = ("Samstag", 5)
- SONNTAG = ("Sonntag", 6)
- def __init__(self, display_name, index):
- self.display_name = display_name
- self.index = index
- @unique
- class SemesterType(Enum):
- HS = "Herbstsemester"
- FS = "Frühlingssemester"
- @dataclass(frozen=True)
- class Semester:
- yyyy: int
- semester_type: SemesterType
- @dataclass
- class Date:
- yyyy: int
- mm: int
- dd: int
- @dataclass
- class Time:
- hh: int
- mm: int
- @dataclass
- class ExportTimestamp:
- date: Date
- time: Time
- @unique
- class DegreeProgram(Enum):
- DATASCIENCE = "Data Science"
- ELEK_U_INFO = "Elektro- und Informationstechnik"
- ENER_U_UMWELT = "Energie- und Umwelttechnik"
- ICOMPETENCE = "iCompetence"
- INFORMATIK = "Informatik"
- KONTEXT_BWL = "Kontext BWL"
- KONTEXT_ENGLISCH = "Kontext Englisch"
- KONTEXT_GSW = "Kontext GSW"
- KONTEXT_KOMM = "Kontext Kommunikation"
- MIXED_BWL_GSW_KOMM = "Mixed BWL, GSW, Kommunikation"
- MASCHINENBAU = "Maschinenbau"
- SYSTEMTECHNIK = "Systemtechnik"
- WIRTSCHAFT_ING = "Wirtschaftsingenieurwesen"
- AGNOSTIC = "SG-???"
- @dataclass
- class PageMetadata:
- semester: Semester
- export_timestamp: ExportTimestamp
- class_name: str
- degree_program: DegreeProgram
- @dataclass
- class UnmergedTimeEntries:
- cells: list[Area]
- horizontal_lines: list[HorizontalLine]
- @dataclass(frozen=True)
- class TimeSlot:
- start_time: str
- end_time: str
- def start_seconds(self) -> int:
- hours, minutes = map(int, self.start_time.split(":"))
- return hours * 3600 + minutes * 60
- def end_seconds(self) -> int:
- hours, minutes = map(int, self.end_time.split(":"))
- return hours * 3600 + minutes * 60
- @dataclass
- class RawExtractedModule:
- weekday: Weekday
- start_seconds: int
- end_seconds: int
- text: str
- source_page_number: int
- @unique
- class TeachingType(Enum):
- ON_SITE = "on_site"
- ONLINE = "online"
- HYBRID = "hybrid"
- BLOCK = "blockmodule"
- @dataclass
- class RawLecturer:
- """
- Basic representation of an extracted lecturer from a pdf that needs to be parsed.
- """
- shorthand: str
- firstname: str
- surname: str
- class Lecturer(BaseModel):
- """
- JSON-serializable representation of a parsed lecturer ready to be exported.
- """
- short: str
- surname: str
- firstname: str
- # tells pydantic to to use the index field for the special Weekday Enum
- CustomWeekday = Annotated[Weekday, PlainSerializer(lambda v: v.index, return_type=int)]
- @dataclass
- class ParsedModuleCellTextData:
- module_shorthand: str
- degree_program: DegreeProgram
- class_name: str
- rooms: list[str]
- part_of_other_classes: list[str]
- teaching_type: TeachingType
- lecturer_shortnames: list[str]
- class ClassJsonModule(BaseModel):
- model_config = ConfigDict(populate_by_name=True)
- weekday: CustomWeekday
- module_shorthand: str = Field(..., alias="name")
- start_seconds: int = Field(..., alias="from")
- end_seconds: int = Field(..., alias="to")
- degree_program: DegreeProgram = Field(..., alias="degree_prg")
- class_name: str = Field(..., alias="class")
- rooms: list[str]
- pages: list[int]
- part_of_other_classes: list[str]
- id: str
- teaching_type: TeachingType
- lecturer_shorthands: list[str] = Field(..., alias="teachers")
- @dataclass
- class ClassPdfExtractionPageData:
- raw_extracted_modules: list[RawExtractedModule]
- page_metadata: PageMetadata
- @dataclass
- class StartsWithMatch:
- shorthand_found: str
- num_of_matches: int
|