From 50d88ad09b8d0e1cd754e59f5794c5d5ed339aeb Mon Sep 17 00:00:00 2001 From: McElwain Date: Fri, 1 May 2026 20:56:36 -0500 Subject: [PATCH] feat: generate PDF annotation notes from CSV --- app/annotate.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 app/annotate.py diff --git a/app/annotate.py b/app/annotate.py new file mode 100644 index 0000000..8664305 --- /dev/null +++ b/app/annotate.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import argparse +import pandas as pd +import fitz # PyMuPDF + + +BASE_DIR = Path(__file__).resolve().parents[1] +INPUT_DIR = BASE_DIR / "input" +OUTPUT_DIR = BASE_DIR / "output" +DATA_DIR = BASE_DIR / "data" + + +def add_textbox(page, row): + """ + Adds a Bluebeam-friendly note: + - Square annotation: white fill, blue border + - FreeText annotation: red text + """ + + x = float(row["x"]) + y = float(row["y"]) + w = float(row["w"]) + h = float(row["h"]) + + rect = fitz.Rect(x, y, x + w, y + h) + text = str(row["note_text"]) + border_width = float(row.get("border_width", 1.5) or 1.5) + + box = page.add_rect_annot(rect) + box.set_colors( + stroke=(0, 0, 1), # blue border + fill=(1, 1, 1), # white fill + ) + box.set_border(width=border_width) + box.set_info({ + "title": str(row.get("author", "PDF Annotation Merge")), + "subject": str(row.get("category", "Drafter Note")) + " Box", + "content": text, + }) + box.update(opacity=1) + + inset = 6 + text_rect = fitz.Rect(x + inset, y + inset, x + w - inset, y + h - inset) + + note = page.add_freetext_annot( + text_rect, + text, + fontsize=float(row.get("font_size", 10) or 10), + fontname="helv", + text_color=(1, 0, 0), # red text + fill_color=None, + align=fitz.TEXT_ALIGN_LEFT, + ) + + note.set_info({ + "title": str(row.get("author", "PDF Annotation Merge")), + "subject": str(row.get("category", "Drafter Note")), + "content": text, + }) + note.update() + + +def generate_annotations(csv_path): + df = pd.read_csv(csv_path) + + OUTPUT_DIR.mkdir(exist_ok=True) + + required = {"file", "page", "x", "y", "w", "h", "note_text"} + missing = required - set(df.columns) + if missing: + raise ValueError(f"CSV is missing required columns: {sorted(missing)}") + + for filename, group in df.groupby("file"): + input_pdf = INPUT_DIR / filename + output_pdf = OUTPUT_DIR / filename.replace(".pdf", "_annotated.pdf") + + if not input_pdf.exists(): + print(f"SKIP: missing input file: {input_pdf}") + continue + + doc = fitz.open(input_pdf) + + for _, row in group.iterrows(): + page_number = int(row["page"]) - 1 + + if page_number < 0 or page_number >= len(doc): + print(f"SKIP: invalid page {row['page']} for {filename}") + continue + + page = doc[page_number] + add_textbox(page, row) + + doc.save(output_pdf, garbage=4, deflate=True) + doc.close() + + print(f"WROTE: {output_pdf}") + + +def main(): + parser = argparse.ArgumentParser(description="Generate PDF annotations from CSV data.") + parser.add_argument( + "csv", + nargs="?", + default=str(DATA_DIR / "annotations.csv"), + help="Path to annotations CSV file", + ) + args = parser.parse_args() + + generate_annotations(Path(args.csv)) + + +if __name__ == "__main__": + main()