feat: generate PDF annotation notes from CSV

2026-05-01 20:56:36 -05:00 · 2026-05-01 20:56:36 -05:00 · 50d88ad09b
parent 2885406409
commit 50d88ad09b
1 changed files with 115 additions and 0 deletions
--- a/app/annotate.py
+++ b/app/annotate.py
@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+import argparse
+import pandas as pd
+import fitz  # PyMuPDF
+
+
+BASE_DIR = Path(__file__).resolve().parents[1]
+INPUT_DIR = BASE_DIR / "input"
+OUTPUT_DIR = BASE_DIR / "output"
+DATA_DIR = BASE_DIR / "data"
+
+
+def add_textbox(page, row):
+    """
+    Adds a Bluebeam-friendly note:
+    - Square annotation: white fill, blue border
+    - FreeText annotation: red text
+    """
+
+    x = float(row["x"])
+    y = float(row["y"])
+    w = float(row["w"])
+    h = float(row["h"])
+
+    rect = fitz.Rect(x, y, x + w, y + h)
+    text = str(row["note_text"])
+    border_width = float(row.get("border_width", 1.5) or 1.5)
+
+    box = page.add_rect_annot(rect)
+    box.set_colors(
+        stroke=(0, 0, 1),  # blue border
+        fill=(1, 1, 1),    # white fill
+    )
+    box.set_border(width=border_width)
+    box.set_info({
+        "title": str(row.get("author", "PDF Annotation Merge")),
+        "subject": str(row.get("category", "Drafter Note")) + " Box",
+        "content": text,
+    })
+    box.update(opacity=1)
+
+    inset = 6
+    text_rect = fitz.Rect(x + inset, y + inset, x + w - inset, y + h - inset)
+
+    note = page.add_freetext_annot(
+        text_rect,
+        text,
+        fontsize=float(row.get("font_size", 10) or 10),
+        fontname="helv",
+        text_color=(1, 0, 0),  # red text
+        fill_color=None,
+        align=fitz.TEXT_ALIGN_LEFT,
+    )
+
+    note.set_info({
+        "title": str(row.get("author", "PDF Annotation Merge")),
+        "subject": str(row.get("category", "Drafter Note")),
+        "content": text,
+    })
+    note.update()
+
+
+def generate_annotations(csv_path):
+    df = pd.read_csv(csv_path)
+
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    required = {"file", "page", "x", "y", "w", "h", "note_text"}
+    missing = required - set(df.columns)
+    if missing:
+        raise ValueError(f"CSV is missing required columns: {sorted(missing)}")
+
+    for filename, group in df.groupby("file"):
+        input_pdf = INPUT_DIR / filename
+        output_pdf = OUTPUT_DIR / filename.replace(".pdf", "_annotated.pdf")
+
+        if not input_pdf.exists():
+            print(f"SKIP: missing input file: {input_pdf}")
+            continue
+
+        doc = fitz.open(input_pdf)
+
+        for _, row in group.iterrows():
+            page_number = int(row["page"]) - 1
+
+            if page_number < 0 or page_number >= len(doc):
+                print(f"SKIP: invalid page {row['page']} for {filename}")
+                continue
+
+            page = doc[page_number]
+            add_textbox(page, row)
+
+        doc.save(output_pdf, garbage=4, deflate=True)
+        doc.close()
+
+        print(f"WROTE: {output_pdf}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate PDF annotations from CSV data.")
+    parser.add_argument(
+        "csv",
+        nargs="?",
+        default=str(DATA_DIR / "annotations.csv"),
+        help="Path to annotations CSV file",
+    )
+    args = parser.parse_args()
+
+    generate_annotations(Path(args.csv))
+
+
+if __name__ == "__main__":
+    main()