from pathlib import Path from jinja2 import Environment from pdfminer.high_level import extract_text class Menus: def __init__(self, menu_pdf_file: Path): self.menu_pdf_file = menu_pdf_file self.month = "" self.days = [] self.extract_cells() def extract_cells(self) -> None: text = extract_text(self.menu_pdf_file, page_numbers=[0]) previous_was_title = False curated_text = "" for element in text.splitlines(): line = Line(element) if line.is_glitch(): previous_was_title = True elif line.starts_with_glitch(): curated_text += f"{line.text[2:]}\n" previous_was_title = True elif line.is_part_of_previous_line(): curated_text = curated_text.removesuffix("\n") curated_text += f"{line.text}\n" previous_was_title = False elif line.text == "" and previous_was_title: previous_was_title = False else: curated_text += f"{line.text}\n" previous_was_title = False cells = curated_text.split("\n\n") self.month = cells[0] self.days = cells[6:11] class Line: def __init__(self, text: str): self.text = text def is_glitch(self) -> bool: return len(self.text) == 1 def starts_with_glitch(self) -> bool: return len(self.text) > 1 and self.text[1] == " " def is_part_of_previous_line(self) -> bool: return len(self.text) > 0 and self.text[0].islower() class MenuMessageFormatter: def __init__(self, env: Environment): self.env = env def create_message(self, menus: Menus, is_update: bool) -> str: template = self.env.get_template("message.txt") return template.render(menus=menus, is_update=is_update)