pdf-annotation-merge/app/annotate.py

#!/usr/bin/env python3

from pathlib import Path
import argparse
import pandas as pd
import fitz  # PyMuPDF


BASE_DIR = Path(__file__).resolve().parents[1]
INPUT_DIR = BASE_DIR / "input"
OUTPUT_DIR = BASE_DIR / "output"
DATA_DIR = BASE_DIR / "data"


def add_textbox(page, row):
    """
    Adds a Bluebeam-friendly note:
    - Square annotation: white fill, blue border
    - FreeText annotation: red text
    """

    x = float(row["x"])
    y = float(row["y"])
    w = float(row["w"])
    h = float(row["h"])

    rect = fitz.Rect(x, y, x + w, y + h)
    text = str(row["note_text"]).replace("\\n", "\n")
    border_width = float(row.get("border_width", 1.5) or 1.5)

    box = page.add_rect_annot(rect)
    box.set_colors(
        stroke=(0, 0, 1),  # blue border
        fill=(1, 1, 1),    # white fill
    )
    box.set_border(width=border_width)
    box.set_info({
        "title": str(row.get("author", "PDF Annotation Merge")),
        "subject": str(row.get("category", "Drafter Note")) + " Box",
        "content": text,
    })
    box.update(opacity=1)

    inset = 6
    text_rect = fitz.Rect(x + inset, y + inset, x + w - inset, y + h - inset)

    note = page.add_freetext_annot(
        text_rect,
        text,
        fontsize=float(row.get("font_size", 10) or 10),
        fontname="helv",
        text_color=(1, 0, 0),  # red text
        fill_color=None,
        align=fitz.TEXT_ALIGN_LEFT,
    )

    note.set_info({
        "title": str(row.get("author", "PDF Annotation Merge")),
        "subject": str(row.get("category", "Drafter Note")),
        "content": text,
    })
    note.update()


def generate_annotations(csv_path):
    df = pd.read_csv(csv_path)

    OUTPUT_DIR.mkdir(exist_ok=True)

    required = {"file", "page", "x", "y", "w", "h", "note_text"}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"CSV is missing required columns: {sorted(missing)}")

    for filename, group in df.groupby("file"):
        input_pdf = INPUT_DIR / filename
        output_pdf = OUTPUT_DIR / filename.replace(".pdf", "_annotated.pdf")

        if not input_pdf.exists():
            print(f"SKIP: missing input file: {input_pdf}")
            continue

        doc = fitz.open(input_pdf)

        for _, row in group.iterrows():
            page_number = int(row["page"]) - 1

            if page_number < 0 or page_number >= len(doc):
                print(f"SKIP: invalid page {row['page']} for {filename}")
                continue

            page = doc[page_number]
            add_textbox(page, row)

        doc.save(output_pdf, garbage=4, deflate=True)
        doc.close()

        print(f"WROTE: {output_pdf}")


def main():
    parser = argparse.ArgumentParser(description="Generate PDF annotations from CSV data.")

    parser.add_argument(
        "-c",
        "--csv",
        default=str(DATA_DIR / "annotations.csv"),
        help="Path to annotations CSV file. Default: data/annotations.csv",
    )

    args = parser.parse_args()

    generate_annotations(Path(args.csv))


if __name__ == "__main__":
    main()