From b49e4b732a6327acdc941cdb1171c071a85c4b65 Mon Sep 17 00:00:00 2001
From: McElwain <sean.mcelwain@outlook.com>
Date: Fri, 1 May 2026 21:54:15 -0500
Subject: [PATCH] feat: generate colored PDF highlight annotations

---
 app/highlight.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 app/highlight.py

diff --git a/app/highlight.py b/app/highlight.py
new file mode 100644
index 0000000..17465c2
--- /dev/null
+++ b/app/highlight.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+import argparse
+import pandas as pd
+import fitz
+
+BASE_DIR = Path(__file__).resolve().parents[1]
+INPUT_DIR = BASE_DIR / "input"
+OUTPUT_DIR = BASE_DIR / "output"
+DATA_DIR = BASE_DIR / "data"
+
+COLORS = {
+    "green": (0, 1, 0),
+    "red": (1, 0, 0),
+    "blue": (0, 0, 1),
+    "yellow": (1, 1, 0),
+}
+
+def add_highlight(page, row):
+    x = float(row["x"])
+    y = float(row["y"])
+    w = float(row["w"])
+    h = float(row["h"])
+    color_name = str(row.get("color", "green")).lower().strip()
+    color = COLORS.get(color_name, COLORS["green"])
+
+    rect = fitz.Rect(x, y, x + w, y + h)
+
+    annot = page.add_highlight_annot(rect)
+    annot.set_colors(stroke=color)
+    annot.set_info({
+        "title": str(row.get("author", "PDF Annotation Merge")),
+        "subject": str(row.get("category", f"{color_name.title()} Highlight")),
+        "content": str(row.get("note_text", "")),
+    })
+    annot.update(opacity=float(row.get("opacity", 0.35) or 0.35))
+
+def generate_highlights(csv_path):
+    df = pd.read_csv(csv_path)
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    required = {"file", "page", "x", "y", "w", "h", "color"}
+    missing = required - set(df.columns)
+    if missing:
+        raise ValueError(f"CSV is missing required columns: {sorted(missing)}")
+
+    for filename, group in df.groupby("file"):
+        input_pdf = INPUT_DIR / filename
+        output_pdf = OUTPUT_DIR / filename.replace(".pdf", "_highlighted.pdf")
+
+        if not input_pdf.exists():
+            print(f"SKIP: missing input file: {input_pdf}")
+            continue
+
+        doc = fitz.open(input_pdf)
+
+        for _, row in group.iterrows():
+            page_number = int(row["page"]) - 1
+            if page_number < 0 or page_number >= len(doc):
+                print(f"SKIP: invalid page {row['page']} for {filename}")
+                continue
+            add_highlight(doc[page_number], row)
+
+        doc.save(output_pdf, garbage=4, deflate=True)
+        doc.close()
+        print(f"WROTE: {output_pdf}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate PDF highlight annotations from CSV.")
+    parser.add_argument(
+        "-c",
+        "--csv",
+        default=str(DATA_DIR / "highlights.csv"),
+        help="Path to highlights CSV file. Default: data/highlights.csv",
+    )
+    args = parser.parse_args()
+    generate_highlights(Path(args.csv))
+
+if __name__ == "__main__":
+    main()