82 lines
2.4 KiB
Python
82 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from pathlib import Path
|
|
import argparse
|
|
import pandas as pd
|
|
import fitz
|
|
|
|
BASE_DIR = Path(__file__).resolve().parents[1]
|
|
INPUT_DIR = BASE_DIR / "input"
|
|
OUTPUT_DIR = BASE_DIR / "output"
|
|
DATA_DIR = BASE_DIR / "data"
|
|
|
|
COLORS = {
|
|
"green": (0, 1, 0),
|
|
"red": (1, 0, 0),
|
|
"blue": (0, 0, 1),
|
|
"yellow": (1, 1, 0),
|
|
}
|
|
|
|
def add_highlight(page, row):
|
|
x = float(row["x"])
|
|
y = float(row["y"])
|
|
w = float(row["w"])
|
|
h = float(row["h"])
|
|
color_name = str(row.get("color", "green")).lower().strip()
|
|
color = COLORS.get(color_name, COLORS["green"])
|
|
|
|
rect = fitz.Rect(x, y, x + w, y + h)
|
|
|
|
annot = page.add_highlight_annot(rect)
|
|
annot.set_colors(stroke=color)
|
|
annot.set_info({
|
|
"title": str(row.get("author", "PDF Annotation Merge")),
|
|
"subject": str(row.get("category", f"{color_name.title()} Highlight")),
|
|
"content": str(row.get("note_text", "")),
|
|
})
|
|
annot.update(opacity=float(row.get("opacity", 0.35) or 0.35))
|
|
|
|
def generate_highlights(csv_path):
|
|
df = pd.read_csv(csv_path)
|
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
|
|
required = {"file", "page", "x", "y", "w", "h", "color"}
|
|
missing = required - set(df.columns)
|
|
if missing:
|
|
raise ValueError(f"CSV is missing required columns: {sorted(missing)}")
|
|
|
|
for filename, group in df.groupby("file"):
|
|
input_pdf = INPUT_DIR / filename
|
|
output_pdf = OUTPUT_DIR / filename.replace(".pdf", "_highlighted.pdf")
|
|
|
|
if not input_pdf.exists():
|
|
print(f"SKIP: missing input file: {input_pdf}")
|
|
continue
|
|
|
|
doc = fitz.open(input_pdf)
|
|
|
|
for _, row in group.iterrows():
|
|
page_number = int(row["page"]) - 1
|
|
if page_number < 0 or page_number >= len(doc):
|
|
print(f"SKIP: invalid page {row['page']} for {filename}")
|
|
continue
|
|
add_highlight(doc[page_number], row)
|
|
|
|
doc.save(output_pdf, garbage=4, deflate=True)
|
|
doc.close()
|
|
print(f"WROTE: {output_pdf}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate PDF highlight annotations from CSV.")
|
|
parser.add_argument(
|
|
"-c",
|
|
"--csv",
|
|
default=str(DATA_DIR / "highlights.csv"),
|
|
help="Path to highlights CSV file. Default: data/highlights.csv",
|
|
)
|
|
args = parser.parse_args()
|
|
generate_highlights(Path(args.csv))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|