3 luni în urmă · 796f234ade
--- a/extract_lecturer_shorthands_pdf.py
+++ b/extract_lecturer_shorthands_pdf.py
@@ -14,6 +14,8 @@ from config import (
 
				 )
			
 
				 from parse import RawLecturer, Lecturer
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend.extract_lecturer_shorthands_pdf")
			
 
				+
			
 
				 
			
 
				 def extract_rows_from_lecturer_shorthand_pdf(input_file) -> list[RawLecturer]:
			
 
				     lecturers: list[RawLecturer] = []
			
@@ -34,7 +36,7 @@ def extract_rows_from_lecturer_shorthand_pdf(input_file) -> list[RawLecturer]:
 
				             # even if it drifts slightly left.
			
 
				             sep_x_1 = nachname_rects[0]["x0"] - 2
			
 
				             sep_x_2 = vorname_rects[0]["x0"] - 2
			
 
				-            logging.debug(
			
 
				+            logger.debug(
			
 
				                 "calculated separators: %d (Nachname), %d (Vorname)", sep_x_1, sep_x_2
			
 
				             )
			
 
				         else:
			
@@ -57,7 +59,7 @@ def extract_rows_from_lecturer_shorthand_pdf(input_file) -> list[RawLecturer]:
 
				 
			
 
				             # guard against empty lines list if page has no lines
			
 
				             if not lines_y1:
			
 
				-                logging.warning("First page has no lines")
			
 
				+                logger.warning("First page has no lines")
			
 
				                 crop_box = (0, 0, page.width, page.height)
			
 
				             else:
			
 
				                 crop_box = (0, min_line_y1, page.width, max_line_y1)
			
@@ -78,7 +80,7 @@ def extract_rows_from_lecturer_shorthand_pdf(input_file) -> list[RawLecturer]:
 
				 
			
 
				             for row_index, row in enumerate(table.rows):
			
 
				                 if row is None:
			
 
				-                    logging.debug("None table row found")
			
 
				+                    logger.debug("None table row found")
			
 
				                     continue
			
 
				 
			
 
				                 valid_cells = [cell for cell in row.cells if cell is not None]
			
@@ -93,7 +95,7 @@ def extract_rows_from_lecturer_shorthand_pdf(input_file) -> list[RawLecturer]:
 
				 
			
 
				                 row_bbox = (row_left, row_top, row_right, row_bottom)
			
 
				 
			
 
				-                logging.debug("row %d dimensions: %s", row_index, row_bbox)
			
 
				+                logger.debug("row %d dimensions: %s", row_index, row_bbox)
			
 
				 
			
 
				                 # column 1: From start of row -> Nachname separator
			
 
				                 col1_bbox = (row_left, row_top, sep_x_1, row_bottom)
			
@@ -102,19 +104,23 @@ def extract_rows_from_lecturer_shorthand_pdf(input_file) -> list[RawLecturer]:
 
				                 # column 3: From Vorname separator -> End of row
			
 
				                 col3_bbox = (sep_x_2, row_top, row_right, row_bottom)
			
 
				 
			
 
				-                logging.debug("col 1 bbox: %s", col1_bbox)
			
 
				-                logging.debug("col 2 bbox: %s", col2_bbox)
			
 
				-                logging.debug("col 3 bbox: %s", col3_bbox)
			
 
				+                logger.debug("col 1 bbox: %s", col1_bbox)
			
 
				+                logger.debug("col 2 bbox: %s", col2_bbox)
			
 
				+                logger.debug("col 3 bbox: %s", col3_bbox)
			
 
				 
			
 
				                 row_text: str = cropped_page.crop(row_bbox).extract_text()
			
 
				-                logging.debug("row text: %s", row_text)
			
 
				+                logger.debug("row text: %s", row_text)
			
 
				                 col1_text = cropped_page.crop(col1_bbox).extract_text()
			
 
				-                logging.debug("col 1 text: %s", col1_text)
			
 
				+                logger.debug("col 1 text: %s", col1_text)
			
 
				                 col2_text = cropped_page.crop(col2_bbox).extract_text()
			
 
				-                logging.debug("col 2 text: %s", col2_text)
			
 
				+                logger.debug("col 2 text: %s", col2_text)
			
 
				                 col3_text = cropped_page.crop(col3_bbox).extract_text()
			
 
				-                logging.debug("col 3 text: %s", col3_text)
			
 
				-                lecturers.append(RawLecturer(col1_text, col3_text, col2_text))
			
 
				+                logger.debug("col 3 text: %s", col3_text)
			
 
				+                lecturers.append(
			
 
				+                    RawLecturer(
			
 
				+                        shorthand=col1_text, firstname=col3_text, surname=col2_text
			
 
				+                    )
			
 
				+                )
			
 
				 
			
 
				     return lecturers
			
 
				 
			
@@ -147,7 +153,7 @@ def parse_lecturers(raw_lecturers: list[RawLecturer]) -> list[Lecturer]:
 
				     lecturers: list[Lecturer] = []
			
 
				     for raw_lecturer in raw_lecturers:
			
 
				         if is_table_header_row(raw_lecturer) or is_vak_example_row(raw_lecturer):
			
 
				-            logging.debug("skipping raw lecturer: %s", raw_lecturer)
			
 
				+            logger.debug("skipping raw lecturer: %s", raw_lecturer)
			
 
				         else:
			
 
				             new_lecturer: Lecturer = Lecturer(
			
 
				                 short=raw_lecturer.shorthand,
			
@@ -155,7 +161,7 @@ def parse_lecturers(raw_lecturers: list[RawLecturer]) -> list[Lecturer]:
 
				                 firstname=raw_lecturer.firstname,
			
 
				             )
			
 
				             if new_lecturer in lecturers:
			
 
				-                logging.debug("skipped over duplicate lecturer: %s", new_lecturer)
			
 
				+                logger.debug("skipped over duplicate lecturer: %s", new_lecturer)
			
 
				             else:
			
 
				                 lecturers.append(new_lecturer)
			
 
				     return lecturers
			
@@ -175,9 +181,16 @@ def main() -> None:
 
				         help="Path to the output JSON file",
			
 
				         default=LECTURER_SHORTHAND_JSON_OUTPUT_FILE,
			
 
				     )
			
 
				+    parser.add_argument(
			
 
				+        "--log-level",
			
 
				+        help="Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
			
 
				+        default="INFO",
			
 
				+        type=str.upper,
			
 
				+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
			
 
				+    )
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				-    logging.basicConfig(level=logging.INFO)
			
 
				+    logging.basicConfig(level=args.log_level)
			
 
				 
			
 
				     raw_lecturers: list[RawLecturer] = extract_rows_from_lecturer_shorthand_pdf(
			
 
				         args.input
			
--- a/generate_classes_json.py
+++ b/generate_classes_json.py
@@ -17,6 +17,8 @@ from parse import (
 
				 
			
 
				 from config import CLASS_TIMETABLE_PDF_INPUT_FILE, CLASSES_JSON_OUTPUT_FILE
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend")
			
 
				+
			
 
				 
			
 
				 def get_valid_lecturers(file_path: str) -> list[str]:
			
 
				     """
			
@@ -24,18 +26,18 @@ def get_valid_lecturers(file_path: str) -> list[str]:
 
				     """
			
 
				     valid_lecturers: list[str] = []
			
 
				     try:
			
 
				-        logging.warning("reading lecturers file: '%s'", file_path)
			
 
				+        logger.warning("reading lecturers file: '%s'", file_path)
			
 
				         with open(file_path, "r", encoding="utf-8") as f:
			
 
				             data = json.load(f)
			
 
				             if isinstance(data, list):
			
 
				                 for entry in data:
			
 
				                     if isinstance(entry, dict) and "short" in entry:
			
 
				                         valid_lecturers.append(entry["short"])
			
 
				-        logging.info(
			
 
				+        logger.info(
			
 
				             "Loaded %d valid lecturers from %s", len(valid_lecturers), file_path
			
 
				         )
			
 
				     except Exception as e:
			
 
				-        logging.error("Failed to load valid lecturers from '%s': %s", file_path, e)
			
 
				+        logger.error("Failed to load valid lecturers from '%s': %s", file_path, e)
			
 
				     return valid_lecturers
			
 
				 
			
 
				 
			
@@ -76,11 +78,18 @@ def main() -> None:
 
				         type=int,
			
 
				         default=1,
			
 
				     )
			
 
				+    parser.add_argument(
			
 
				+        "--log-level",
			
 
				+        help="Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
			
 
				+        default="INFO",
			
 
				+        type=str.upper,
			
 
				+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
			
 
				+    )
			
 
				 
			
 
				     args = parser.parse_args()
			
 
				     lecturers_file = args.lecturers
			
 
				 
			
 
				-    logging.basicConfig(level=logging.INFO)
			
 
				+    logging.basicConfig(level=args.log_level)
			
 
				 
			
 
				     valid_lecturer_shorthands: list[str] | None = None
			
 
				     if lecturers_file:
			
@@ -89,7 +98,7 @@ def main() -> None:
 
				     extraction_data: list[ClassPdfExtractionPageData]
			
 
				 
			
 
				     if args.load_intermediate:
			
 
				-        logging.info("Loading intermediate data from %s", args.load_intermediate)
			
 
				+        logger.info("Loading intermediate data from %s", args.load_intermediate)
			
 
				         with open(args.load_intermediate, "r", encoding="utf-8") as f:
			
 
				             extraction_data = TypeAdapter(
			
 
				                 list[ClassPdfExtractionPageData]
			
@@ -97,7 +106,7 @@ def main() -> None:
 
				     else:
			
 
				         extraction_data = extract_data_from_class_pdf(args.input, num_of_jobs=args.jobs)
			
 
				         if args.save_intermediate:
			
 
				-            logging.info("Saving intermediate data to %s", args.save_intermediate)
			
 
				+            logger.info("Saving intermediate data to %s", args.save_intermediate)
			
 
				             with open(args.save_intermediate, "w", encoding="utf-8") as f:
			
 
				                 f.write(
			
 
				                     TypeAdapter(list[ClassPdfExtractionPageData])
			
--- a/parse/above_table_text.py
+++ b/parse/above_table_text.py
@@ -11,6 +11,8 @@ from .models import (
 
				     Time,
			
 
				 )
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend.above_table_text")
			
 
				+
			
 
				 
			
 
				 def parse_above_table_text(
			
 
				     txt: str, previous_page_metadata: list[PageMetadata]
			
@@ -76,13 +78,13 @@ def get_class_name(third_line: str) -> str:
 
				 def get_degree_program(
			
 
				     third_line: str, class_name: str, previous_page_metadata: list[PageMetadata]
			
 
				 ) -> DegreeProgram:
			
 
				-    logging.debug("class_name: '%s'", class_name)
			
 
				+    logger.debug("class_name: '%s'", class_name)
			
 
				     if "Kontext BWL" and "Kommunikation" and "GSW" in third_line:
			
 
				         return DegreeProgram.MIXED_BWL_GSW_KOMM
			
 
				     for degree_program in DegreeProgram:
			
 
				         if degree_program.value in third_line:
			
 
				             return degree_program
			
 
				-    logging.warning("Using heuristics to guess the degree_program in %s", third_line)
			
 
				+    logger.warning("Using heuristics to guess the degree_program in %s", third_line)
			
 
				     try:
			
 
				         for page_metadata in previous_page_metadata:
			
 
				             if page_metadata.class_name == class_name[:-1]:
			
--- a/parse/img.py
+++ b/parse/img.py
@@ -5,6 +5,8 @@ from pdfplumber.page import Page
 
				 
			
 
				 from .models import Area
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend.img")
			
 
				+
			
 
				 
			
 
				 def is_mostly_white_area(page: Page, area: Area) -> bool:
			
 
				     """
			
@@ -31,6 +33,6 @@ def is_mostly_white_area(page: Page, area: Area) -> bool:
 
				 
			
 
				     total_pixels = arr.shape[0] * arr.shape[1]
			
 
				     whitish_percentage = is_whitish.sum() / total_pixels
			
 
				-    logging.debug("whitish: %.2f%%", whitish_percentage * 100)
			
 
				+    logger.debug("whitish: %.2f%%", whitish_percentage * 100)
			
 
				 
			
 
				     return whitish_percentage > 0.9
			
--- a/parse/parse_modules.py
+++ b/parse/parse_modules.py
@@ -1,5 +1,5 @@
 
				-from typing import List
			
 
				 import logging
			
 
				+from typing import List
			
 
				 
			
 
				 from pydantic import TypeAdapter
			
 
				 
			
@@ -16,6 +16,8 @@ from .models import (
 
				     ClassPdfExtractionPageData,
			
 
				 )
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend.parse_modules")
			
 
				+
			
 
				 
			
 
				 def get_modules_for_class_json(
			
 
				     modules: list[RawExtractedModule],
			
@@ -110,7 +112,7 @@ def parse_module_class_pdf_cell_text(
 
				     Parse a single Class Timetable PDF module cell text.
			
 
				     """
			
 
				     lines = text.split("\n")
			
 
				-    logging.debug("Parsing module cell text: \n%s", text)
			
 
				+    logger.debug("Parsing module cell text: \n%s", text)
			
 
				     if len(lines) != 3 and len(lines) != 2:
			
 
				         raise RuntimeError("Invalid Number of Lines in the cell text.")
			
 
				     if len(lines) == 3:
			
@@ -149,7 +151,7 @@ def get_lecturer_shorthands(
 
				             if len(word) == LECTURER_SHORTHAND_SIZE:
			
 
				                 lecturer_shorthands.append(word)
			
 
				             else:
			
 
				-                logging.warning("Could not get Lecturer Shorthand from word: %s", word)
			
 
				+                logger.warning("Could not get Lecturer Shorthand from word: %s", word)
			
 
				     else:
			
 
				         for word in words:
			
 
				             exact_starts_with_match = matches_startswith(
			
@@ -168,7 +170,7 @@ def get_lecturer_shorthands(
 
				                     minus_last_char_starts_with_match.shorthand_found
			
 
				                 )
			
 
				             else:
			
 
				-                logging.warning("Could not get Lecturer Shorthand from word: %s", word)
			
 
				+                logger.warning("Could not get Lecturer Shorthand from word: %s", word)
			
 
				     return lecturer_shorthands
			
 
				 
			
 
				 
			
@@ -213,13 +215,13 @@ def get_module_shorthand(
 
				                     f"cut off class name part '{class_name_part}'"
			
 
				                     + f" of class name '{class_name}' in line '{first_line}'"
			
 
				                 )
			
 
				-                logging.debug(debug_msg)
			
 
				+                logger.debug(debug_msg)
			
 
				                 break
			
 
				 
			
 
				         for foreign_class_name in all_class_names:
			
 
				             if word.endswith(foreign_class_name):
			
 
				                 word = word[: word.rfind(foreign_class_name)]
			
 
				-                logging.debug(
			
 
				+                logger.debug(
			
 
				                     "cut off class name '%s' in line '%s'",
			
 
				                     foreign_class_name,
			
 
				                     first_line,
			
--- a/parse/table_extraction.py
+++ b/parse/table_extraction.py
@@ -2,9 +2,9 @@ import logging
 
				 from multiprocessing import Pool
			
 
				 from pathlib import Path
			
 
				 
			
 
				+import pdfplumber
			
 
				 from pdfplumber.page import Page
			
 
				 from pdfplumber.table import Table
			
 
				-import pdfplumber
			
 
				 
			
 
				 from config import (
			
 
				     CLASS_TIMETABLE_PDF_TABLE_SETTINGS,
			
@@ -33,6 +33,9 @@ from .geometry import (
 
				 )
			
 
				 from .img import is_mostly_white_area
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend.table_extraction")
			
 
				+
			
 
				+
			
 
				 allowed_time_slots: list[TimeSlot] = [
			
 
				     TimeSlot(start_time=timeslot_tuple[0], end_time=timeslot_tuple[1])
			
 
				     for timeslot_tuple in ALLOWED_TIMESLOTS
			
@@ -75,7 +78,7 @@ def merge_vertically_spanning_cells(
 
				     current_area = initial_area
			
 
				 
			
 
				     while True:
			
 
				-        logging.debug(
			
 
				+        logger.debug(
			
 
				             "Searching for bottom boundary of area: %s on %s", current_area, weekday
			
 
				         )
			
 
				 
			
@@ -84,12 +87,12 @@ def merge_vertically_spanning_cells(
 
				             is_line_at_bottom(current_area, line, tolerance=20)
			
 
				             for line in horizontal_lines
			
 
				         ):
			
 
				-            logging.debug("Bottom boundary found: horizontal line")
			
 
				+            logger.debug("Bottom boundary found: horizontal line")
			
 
				             return current_area
			
 
				 
			
 
				         # case 2: reached the bottom of the timetable?
			
 
				         if is_vertical_match(current_area.y2, highest_y):
			
 
				-            logging.debug("Bottom boundary found: highest y level")
			
 
				+            logger.debug("Bottom boundary found: highest y level")
			
 
				             return current_area
			
 
				 
			
 
				         # case 3: find and merge with the next cell below
			
@@ -100,7 +103,7 @@ def merge_vertically_spanning_cells(
 
				             )
			
 
				 
			
 
				         next_cell = remaining_cells.pop(next_cell_index)
			
 
				-        logging.debug("Vertically merging with cell below: %s", next_cell)
			
 
				+        logger.debug("Vertically merging with cell below: %s", next_cell)
			
 
				 
			
 
				         current_area = Area(
			
 
				             x1=current_area.x1,
			
@@ -129,7 +132,7 @@ def get_modules_from_weekday(
 
				         initial_area = cells.pop(0)
			
 
				 
			
 
				         if is_mostly_white_area(page, initial_area):
			
 
				-            logging.debug("mostly white cell skipped")
			
 
				+            logger.debug("mostly white cell skipped")
			
 
				             continue
			
 
				 
			
 
				         merged_area: Area = merge_vertically_spanning_cells(
			
@@ -178,7 +181,7 @@ def get_highest_y_level(timeslot_y_levels, page_number) -> float:
 
				     try:
			
 
				         highest_y_level = timeslot_y_levels[allowed_time_slots[-1]].y2
			
 
				     except KeyError as e:
			
 
				-        logging.debug("timeslot_y_levels on page %d %s", page_number, timeslot_y_levels)
			
 
				+        logger.debug("timeslot_y_levels on page %d %s", page_number, timeslot_y_levels)
			
 
				         raise RuntimeError("Could not get YLevel for latest TimeSlot") from e
			
 
				     return highest_y_level
			
 
				 
			
@@ -198,7 +201,7 @@ def get_usable_table_index(found_tables: list) -> int:
 
				         x0, top, x1, bottom = table.bbox
			
 
				         width = x1 - x0
			
 
				         height = bottom - top
			
 
				-        logging.debug(
			
 
				+        logger.debug(
			
 
				             "table num %d: width: %d, height: %d",
			
 
				             index + 1,
			
 
				             width,
			
@@ -301,11 +304,11 @@ def collect_timeslot_y_levels_of_row(
 
				     Returns:
			
 
				         int for the current expected `TimeSlot` index
			
 
				     """
			
 
				-    logging.debug("row: %d, col: %d", collection_data.row_index, 0)
			
 
				+    logger.debug("row: %d, col: %d", collection_data.row_index, 0)
			
 
				     row = collection_data.table.rows[collection_data.row_index]
			
 
				     cell = row.cells[0]
			
 
				     if cell is None:
			
 
				-        logging.warning("None Table cell found, not collecting YLevel of Row")
			
 
				+        logger.warning("None Table cell found, not collecting YLevel of Row")
			
 
				         return collection_data.expected_timeslot_index
			
 
				     cell_text = collection_data.page.crop(
			
 
				         (cell[0], cell[1], cell[2], cell[3])
			
@@ -315,7 +318,7 @@ def collect_timeslot_y_levels_of_row(
 
				         target_timeslot.start_time in cell_text
			
 
				         and target_timeslot.end_time in cell_text
			
 
				     ):
			
 
				-        logging.warning("Unexpected TimeSlot found: '%s'", cell_text)
			
 
				+        logger.warning("Unexpected TimeSlot found: '%s'", cell_text)
			
 
				         return collection_data.expected_timeslot_index
			
 
				     if target_timeslot == collection_data.last_timeslot:
			
 
				         for weekday in Weekday:
			
@@ -337,19 +340,19 @@ def collect_weekday_areas(weekday_areas, page, row, row_index) -> None:
 
				     """
			
 
				     empty_start_found = False
			
 
				     for column_index, cell in enumerate(row.cells):
			
 
				-        logging.debug("row: %d, col: %d", row_index, column_index)
			
 
				-        logging.debug(cell)
			
 
				+        logger.debug("row: %d, col: %d", row_index, column_index)
			
 
				+        logger.debug(cell)
			
 
				         if cell is None:
			
 
				-            logging.debug("None Table Cell Found")
			
 
				+            logger.debug("None Table Cell Found")
			
 
				         else:
			
 
				             cell_text = page.crop((cell[0], cell[1], cell[2], cell[3])).extract_text()
			
 
				             if not empty_start_found and len(cell_text) == 0:
			
 
				-                logging.debug("empty start found")
			
 
				+                logger.debug("empty start found")
			
 
				                 empty_start_found = True
			
 
				 
			
 
				             weekday_enum: Weekday | None = get_weekday_from_text(cell_text)
			
 
				             if weekday_enum:
			
 
				-                logging.debug("Weekday %s found", cell_text)
			
 
				+                logger.debug("Weekday %s found", cell_text)
			
 
				                 weekday_areas[weekday_enum] = Area(
			
 
				                     x1=cell[0], y1=cell[3], x2=cell[2], y2=0
			
 
				                 )
			
@@ -362,7 +365,7 @@ def get_last_timeslot(time_slots: list[TimeSlot]) -> TimeSlot:
 
				     if len(time_slots) == 0:
			
 
				         raise RuntimeError("Cannot get the latest timeslot from an empty list")
			
 
				     last_timeslot = time_slots[-1]
			
 
				-    logging.debug("last timeslot found: %s", last_timeslot)
			
 
				+    logger.debug("last timeslot found: %s", last_timeslot)
			
 
				 
			
 
				     return last_timeslot
			
 
				 
			
@@ -382,7 +385,7 @@ def select_main_table(page: Page, page_index: int) -> Table:
 
				     Selects the main table on the PDF Page. This should be the timetable.
			
 
				     """
			
 
				     found_tables = page.find_tables(CLASS_TIMETABLE_PDF_TABLE_SETTINGS)
			
 
				-    logging.debug(
			
 
				+    logger.debug(
			
 
				         "amount of tables found on page %d: %d",
			
 
				         page_index + 1,
			
 
				         len(found_tables),
			
@@ -406,15 +409,15 @@ def collected_unmerged_time_entries_by_weekday(
 
				             cells=[], horizontal_lines=[]
			
 
				         )
			
 
				         target_area: Area = weekday_areas[weekday]
			
 
				-        logging.debug("target_area: %s", target_area)
			
 
				+        logger.debug("target_area: %s", target_area)
			
 
				 
			
 
				         for row_index, row in enumerate(table.rows):
			
 
				             for column_index, cell in enumerate(row.cells):
			
 
				                 if cell is None:
			
 
				-                    logging.debug("None table cell found")
			
 
				+                    logger.debug("None table cell found")
			
 
				                     continue
			
 
				-                logging.debug("row: %d, col: %d", row_index, column_index)
			
 
				-                logging.debug("cell: %s", cell)
			
 
				+                logger.debug("row: %d, col: %d", row_index, column_index)
			
 
				+                logger.debug("cell: %s", cell)
			
 
				                 if (
			
 
				                     target_area.x1 <= cell[0]
			
 
				                     and target_area.y1 <= cell[1]
			
@@ -424,7 +427,7 @@ def collected_unmerged_time_entries_by_weekday(
 
				                     unmerged_time_entries_by_weekday[weekday].cells.append(
			
 
				                         Area(x1=cell[0], y1=cell[1], x2=cell[2], y2=cell[3])
			
 
				                     )
			
 
				-                    logging.debug("%s cell found", weekday)
			
 
				+                    logger.debug("%s cell found", weekday)
			
 
				 
			
 
				         collect_horizontal_lines(
			
 
				             unmerged_time_entries_by_weekday, page, target_area, weekday
			
@@ -454,7 +457,7 @@ def collect_horizontal_lines(
 
				             continue
			
 
				 
			
 
				         if target_area.x1 <= line_x1 and target_area.x2 >= line_x2:
			
 
				-            logging.debug("%s timeslot seperator line found", weekday)
			
 
				+            logger.debug("%s timeslot seperator line found", weekday)
			
 
				             unmerged_time_entries_by_weekday[weekday].horizontal_lines.append(
			
 
				                 HorizontalLine(x1=line_x1, x2=line_x2, y=line_bottom)
			
 
				             )
			
@@ -467,10 +470,10 @@ def extract_data_from_class_pdf(
 
				     Extracts all data from the specified Class Timetable PDF filename.
			
 
				     Can run via multiple jobs.
			
 
				     """
			
 
				-    logging.info("Starting extraction with %d jobs", num_of_jobs)
			
 
				+    logger.info("Starting extraction with %d jobs", num_of_jobs)
			
 
				 
			
 
				     num_pages: int = get_number_of_pdf_pages(input_filename)
			
 
				-    logging.info("Found %d pages to process", num_pages)
			
 
				+    logger.info("Found %d pages to process", num_pages)
			
 
				 
			
 
				     processed_pages: list[RawClassPdfExtractionPageData] = process_pages_in_parallel(
			
 
				         num_of_jobs, input_filename, num_pages
			
@@ -531,7 +534,7 @@ def get_above_table_text(page: Page, table_y1: float) -> str:
 
				     upper_region = page.crop((0, 0, page.width, table_y1))
			
 
				     text_above_table = upper_region.extract_text()
			
 
				 
			
 
				-    logging.debug("Text found above the table:")
			
 
				-    logging.debug(text_above_table)
			
 
				+    logger.debug("Text found above the table:")
			
 
				+    logger.debug(text_above_table)
			
 
				 
			
 
				     return text_above_table
			
--- a/rip_modulplaner_frontend_data.py
+++ b/rip_modulplaner_frontend_data.py
@@ -17,6 +17,8 @@ from config import (
 
				     REQUESTS_TIMEOUT,
			
 
				 )
			
 
				 
			
 
				+logger = logging.getLogger("modulplaner-backend.rip_frontend_data")
			
 
				+
			
 
				 
			
 
				 def download_file(url: str, local_path: Path) -> bool:
			
 
				     """
			
@@ -30,16 +32,16 @@ def download_file(url: str, local_path: Path) -> bool:
 
				 
			
 
				         with open(local_path, "wb") as f:
			
 
				             f.write(response.content)
			
 
				-        logging.info("Downloaded: %s", local_path)
			
 
				+        logger.info("Downloaded: %s", local_path)
			
 
				         return True
			
 
				     except requests.exceptions.HTTPError as e:
			
 
				         if e.response.status_code == 404:
			
 
				-            logging.warning("File not found (404): %s", url)
			
 
				+            logger.warning("File not found (403): %s", url)
			
 
				         else:
			
 
				-            logging.error("Failed to download %s: %s", url, e)
			
 
				+            logger.error("Failed to download %s: %s", url, e)
			
 
				         return False
			
 
				     except Exception as e:
			
 
				-        logging.error("Error downloading %s: %s", url, e)
			
 
				+        logger.error("Error downloading %s: %s", url, e)
			
 
				         return False
			
 
				 
			
 
				 
			
@@ -49,12 +51,12 @@ def get_semester_versions(
 
				     """
			
 
				     Downloads and parses the semester-versions.json file.
			
 
				     """
			
 
				-    logging.info("Fetching semester list...")
			
 
				+    logger.info("Fetching semester list...")
			
 
				     if not download_file(
			
 
				         f"{base_url}/{FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE}",
			
 
				         output_dir / FRONTEND_RIPPER_SEMESTER_VERSIONS_FILE,
			
 
				     ):
			
 
				-        logging.error("Could not download semester-versions.json. Exiting.")
			
 
				+        logger.error("Could not download semester-versions.json. Exiting.")
			
 
				         return None
			
 
				 
			
 
				     try:
			
@@ -63,7 +65,7 @@ def get_semester_versions(
 
				         ) as f:
			
 
				             return json.load(f)
			
 
				     except json.JSONDecodeError:
			
 
				-        logging.error("Error parsing semester-versions.json")
			
 
				+        logger.error("Error parsing semester-versions.json")
			
 
				         return None
			
 
				 
			
 
				 
			
@@ -71,7 +73,7 @@ def process_semester(semester: str, base_url: str, output_dir: Path) -> None:
 
				     """
			
 
				     Downloads files associated with a specific semester.
			
 
				     """
			
 
				-    logging.info("Processing Semester: %s", semester)
			
 
				+    logger.info("Processing Semester: %s", semester)
			
 
				 
			
 
				     semester_level_files = ["blockclasses.json", "config.json"]
			
 
				     for s_file in semester_level_files:
			
@@ -89,7 +91,7 @@ def process_semester(semester: str, base_url: str, output_dir: Path) -> None:
 
				                         output_dir / semester / blockclass_file,
			
 
				                     )
			
 
				         except (json.JSONDecodeError, OSError) as e:
			
 
				-            logging.error("Error reading config.json for %s: %s", semester, e)
			
 
				+            logger.error("Error reading config.json for %s: %s", semester, e)
			
 
				 
			
 
				 
			
 
				 def process_version(
			
@@ -121,9 +123,16 @@ def main():
 
				         help="Output directory for downloaded files",
			
 
				         default=FRONTEND_RIPPER_OUTPUT_DIR_DEFAULT,
			
 
				     )
			
 
				+    parser.add_argument(
			
 
				+        "--log-level",
			
 
				+        help="Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
			
 
				+        default="INFO",
			
 
				+        type=str.upper,
			
 
				+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
			
 
				+    )
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
			
 
				+    logging.basicConfig(level=args.log_level)
			
 
				 
			
 
				     base_url = args.base_url
			
 
				     output_dir = Path(args.output_dir)