feat: generate PDF annotation notes from CSV
This commit is contained in:
parent
2885406409
commit
50d88ad09b
|
|
@ -0,0 +1,115 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import fitz # PyMuPDF
|
||||
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parents[1]
|
||||
INPUT_DIR = BASE_DIR / "input"
|
||||
OUTPUT_DIR = BASE_DIR / "output"
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
|
||||
|
||||
def add_textbox(page, row):
|
||||
"""
|
||||
Adds a Bluebeam-friendly note:
|
||||
- Square annotation: white fill, blue border
|
||||
- FreeText annotation: red text
|
||||
"""
|
||||
|
||||
x = float(row["x"])
|
||||
y = float(row["y"])
|
||||
w = float(row["w"])
|
||||
h = float(row["h"])
|
||||
|
||||
rect = fitz.Rect(x, y, x + w, y + h)
|
||||
text = str(row["note_text"])
|
||||
border_width = float(row.get("border_width", 1.5) or 1.5)
|
||||
|
||||
box = page.add_rect_annot(rect)
|
||||
box.set_colors(
|
||||
stroke=(0, 0, 1), # blue border
|
||||
fill=(1, 1, 1), # white fill
|
||||
)
|
||||
box.set_border(width=border_width)
|
||||
box.set_info({
|
||||
"title": str(row.get("author", "PDF Annotation Merge")),
|
||||
"subject": str(row.get("category", "Drafter Note")) + " Box",
|
||||
"content": text,
|
||||
})
|
||||
box.update(opacity=1)
|
||||
|
||||
inset = 6
|
||||
text_rect = fitz.Rect(x + inset, y + inset, x + w - inset, y + h - inset)
|
||||
|
||||
note = page.add_freetext_annot(
|
||||
text_rect,
|
||||
text,
|
||||
fontsize=float(row.get("font_size", 10) or 10),
|
||||
fontname="helv",
|
||||
text_color=(1, 0, 0), # red text
|
||||
fill_color=None,
|
||||
align=fitz.TEXT_ALIGN_LEFT,
|
||||
)
|
||||
|
||||
note.set_info({
|
||||
"title": str(row.get("author", "PDF Annotation Merge")),
|
||||
"subject": str(row.get("category", "Drafter Note")),
|
||||
"content": text,
|
||||
})
|
||||
note.update()
|
||||
|
||||
|
||||
def generate_annotations(csv_path):
|
||||
df = pd.read_csv(csv_path)
|
||||
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
required = {"file", "page", "x", "y", "w", "h", "note_text"}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"CSV is missing required columns: {sorted(missing)}")
|
||||
|
||||
for filename, group in df.groupby("file"):
|
||||
input_pdf = INPUT_DIR / filename
|
||||
output_pdf = OUTPUT_DIR / filename.replace(".pdf", "_annotated.pdf")
|
||||
|
||||
if not input_pdf.exists():
|
||||
print(f"SKIP: missing input file: {input_pdf}")
|
||||
continue
|
||||
|
||||
doc = fitz.open(input_pdf)
|
||||
|
||||
for _, row in group.iterrows():
|
||||
page_number = int(row["page"]) - 1
|
||||
|
||||
if page_number < 0 or page_number >= len(doc):
|
||||
print(f"SKIP: invalid page {row['page']} for {filename}")
|
||||
continue
|
||||
|
||||
page = doc[page_number]
|
||||
add_textbox(page, row)
|
||||
|
||||
doc.save(output_pdf, garbage=4, deflate=True)
|
||||
doc.close()
|
||||
|
||||
print(f"WROTE: {output_pdf}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate PDF annotations from CSV data.")
|
||||
parser.add_argument(
|
||||
"csv",
|
||||
nargs="?",
|
||||
default=str(DATA_DIR / "annotations.csv"),
|
||||
help="Path to annotations CSV file",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
generate_annotations(Path(args.csv))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue