noah
/
modulplaner-backend


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
							from typing import List
import logging

from pydantic import TypeAdapter

from config import LECTURER_SHORTHAND_SIZE

from .models import (
    RawExtractedModule,
    ClassJsonModule,
    ParsedModuleCellTextData,
    DegreeProgram,
    TeachingType,
    Weekday,
)


def get_modules_for_class_json(
    modules: list[RawExtractedModule],
    class_name: str,
    degree_program: DegreeProgram,
    valid_lecturer_shorthands: list[str] | None = None,
) -> list[ClassJsonModule]:
    output_modules: list[ClassJsonModule] = []

    for input_module in modules:
        parsed_data: ParsedModuleCellTextData = parse_module_cell_text(
            input_module.text, class_name, degree_program, valid_lecturer_shorthands
        )

        output_modules.append(
            ClassJsonModule(
                weekday=input_module.weekday,
                module_shorthand=parsed_data.module_shorthand,  # pyright: ignore
                start_seconds=input_module.start_seconds,  # pyright: ignore
                end_seconds=input_module.end_seconds,  # pyright: ignore
                degree_program=parsed_data.degree_program,  # pyright: ignore
                class_name=class_name,  # pyright: ignore
                rooms=parsed_data.rooms,
                pages=[input_module.source_page_number],
                part_of_other_classes=parsed_data.part_of_other_classes,
                teaching_type=parsed_data.teaching_type,
                lecturer_shorthands=parsed_data.lecturer_shortnames,  # pyright: ignore
                id=get_id(
                    class_name,
                    parsed_data.module_shorthand,
                    input_module.weekday,
                    input_module.start_seconds,
                    input_module.end_seconds,
                ),
            )
        )

    return output_modules


def deduplicate_modules(modules: list[ClassJsonModule]) -> list[ClassJsonModule]:
    """de-duplicate modules based on their id field"""
    unique_modules_map: dict[str, ClassJsonModule] = {}
    for module in modules:
        if module.id in unique_modules_map:
            existing_module = unique_modules_map[module.id]
            existing_module.pages = sorted(
                list(set(existing_module.pages + module.pages))
            )
        else:
            unique_modules_map[module.id] = module
    return list(unique_modules_map.values())


def get_modules_json(modules: List[ClassJsonModule]) -> str:
    """
    Serializes a list of ClassJsonModule objects into a formatted JSON string.
    """
    adapter = TypeAdapter(List[ClassJsonModule])
    return adapter.dump_json(modules, by_alias=True).decode("utf-8")


def parse_mixed_degree_programs(
    degree_program: DegreeProgram, module_shorthand: str
) -> DegreeProgram:
    if degree_program == DegreeProgram.MIXED_BWL_GSW_KOMM:
        if module_shorthand in ["bplan", "lean"]:
            return DegreeProgram.KONTEXT_BWL
        if module_shorthand in ["wisa", "aua"]:
            return DegreeProgram.KONTEXT_KOMM
        return DegreeProgram.KONTEXT_GSW
    return degree_program


def parse_module_cell_text(
    text: str,
    class_name: str,
    degree_program: DegreeProgram,
    valid_lecturer_shorthands: list[str] | None = None,
) -> ParsedModuleCellTextData:
    lines = text.split("\n")
    logging.debug("Parsing module cell text: \n%s", text)
    if len(lines) != 3 and len(lines) != 2:
        raise RuntimeError("Invalid Number of Lines in the cell text.")
    if len(lines) == 3:
        rooms = get_rooms(lines[2])
        teaching_type = get_teaching_type(lines[2])
    else:
        rooms = []
        teaching_type = TeachingType.ON_SITE

    module_shorthand = get_module_shorthand(lines[0], class_name)

    return ParsedModuleCellTextData(
        module_shorthand=module_shorthand,
        degree_program=parse_mixed_degree_programs(degree_program, module_shorthand),
        class_name=class_name,
        rooms=rooms,
        part_of_other_classes=[],
        teaching_type=teaching_type,
        lecturer_shortnames=get_lecturer_shortnames(
            lines[1], valid_lecturer_shorthands
        ),
    )


def get_lecturer_shortnames(
    second_line: str, valid_lecturer_shorthands: list[str] | None = None
) -> list[str]:
    lecturer_shorthands: list[str] = []
    words = second_line.split(" ")
    if valid_lecturer_shorthands is None:
        for word in words:
            if len(word) == LECTURER_SHORTHAND_SIZE:
                lecturer_shorthands.append(word)
    else:
        for word in words:
            if word in valid_lecturer_shorthands or (
                len(word) == LECTURER_SHORTHAND_SIZE and shorthand.startswith(word)
                for shorthand in valid_lecturer_shorthands
            ):
                lecturer_shorthands.append(word)

    return lecturer_shorthands


def get_module_shorthand(first_line: str, class_name: str) -> str:
    words = first_line.split(" ")
    if len(words) < 1:
        raise RuntimeError("Cannot extract module shorthand")
    word = words[0]
    if len(words) == 1:
        for i in reversed(range(len(class_name) + 1)):
            if word.endswith(class_name[0:i]):
                word = word[: word.rfind(class_name[0:i])]
                break
    if len(word) == 0:
        raise RuntimeError("Module shorthand cannot be empty")
    return word


def get_id(
    class_name: str,
    module_shorthand: str,
    weekday: Weekday,
    start_seconds: int,
    end_seconds: int,
) -> str:
    return (
        f"{class_name}-{module_shorthand}-{weekday.index}-{start_seconds}-{end_seconds}"
    )


def get_teaching_type(third_line: str) -> TeachingType:
    if "Online" in third_line:
        return TeachingType.ONLINE
    return TeachingType.ON_SITE


def get_rooms(third_line: str) -> list[str]:
    if "DSMixe" in third_line:
        return []

    words = third_line.split(" ")
    return words