img.py 1.0 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import logging
  2. import numpy
  3. from pdfplumber.page import Page
  4. from .models import Area
  5. logger = logging.getLogger("modulplaner-backend.img")
  6. def is_mostly_white_area(page: Page, area: Area) -> bool:
  7. """
  8. Checks wether an Area can be considered mostly white.
  9. Intended for detecting empty timetable cells.
  10. """
  11. img = (
  12. page.crop((area.x1, area.y1, area.x2, area.y2))
  13. .to_image(resolution=150)
  14. .original.convert("RGB")
  15. )
  16. arr = numpy.array(img)
  17. total_pixels = arr.shape[0] * arr.shape[1]
  18. r = arr[:, :, 0].astype(int)
  19. g = arr[:, :, 1].astype(int)
  20. b = arr[:, :, 2].astype(int)
  21. min_rgb = numpy.minimum(numpy.minimum(r, g), b)
  22. max_rgb = numpy.maximum(numpy.maximum(r, g), b)
  23. channel_spread = max_rgb - min_rgb
  24. is_whitish = (min_rgb >= 250) & (channel_spread <= 25)
  25. total_pixels = arr.shape[0] * arr.shape[1]
  26. whitish_percentage = is_whitish.sum() / total_pixels
  27. logger.debug("whitish: %.2f%%", whitish_percentage * 100)
  28. return whitish_percentage > 0.9