From 8c6e86279928fe4b16dab721b8eaa85579db190e Mon Sep 17 00:00:00 2001 From: McElwain Date: Sat, 9 May 2026 15:45:22 -0500 Subject: [PATCH] Add debug overlay replica preview and save mode --- app/logic/document_outputs.py | 33 ++++++++++++++++--- app/routes/documents.py | 49 +++++++++++++++++++++++++++++ app/templates/documents/detail.html | 18 +++++++++++ 3 files changed, 96 insertions(+), 4 deletions(-) diff --git a/app/logic/document_outputs.py b/app/logic/document_outputs.py index 53fb746..9f047f7 100644 --- a/app/logic/document_outputs.py +++ b/app/logic/document_outputs.py @@ -1006,7 +1006,7 @@ def _render_replica_pdf_from_layout( page_layout = pages.get(page_num, {"lines": []}) render_entries = [] - if mode == "scan_backed" and (page_layout.get("words") or []): + if mode in {"scan_backed", "debug_overlay"} and (page_layout.get("words") or []): render_entries = _build_word_entries_for_page(page_layout, page_h) else: render_entries = page_layout.get("lines", []) or [] @@ -1017,12 +1017,35 @@ def _render_replica_pdf_from_layout( continue text_obj = c.beginText() - text_obj.setTextRenderMode(3 if mode == "scan_backed" else 0) + if mode == "scan_backed": + text_obj.setTextRenderMode(3) + else: + text_obj.setTextRenderMode(0) + text_obj.setFont(line.get("font_family_guess") or "Helvetica", float(line.get("font_size_guess") or 10)) text_obj.setTextOrigin(float(line["pdf_x"]), float(line["pdf_y"]) + 1) + + if mode == "debug_overlay": + c.setStrokeColorRGB(1, 0, 0) + c.setFillColorRGB(1, 0, 0) + else: + c.setStrokeColorRGB(0, 0, 0) + c.setFillColorRGB(0, 0, 0) + text_obj.textLine(text_line) c.drawText(text_obj) + if mode == "debug_overlay": + bbox = line.get("bbox_source") + if bbox and isinstance(bbox, (list, tuple)) and len(bbox) == 4: + try: + left, top, right, bottom = [float(v) for v in bbox] + c.setStrokeColorRGB(1, 0, 0) + c.setLineWidth(0.4) + c.rect(left, page_h - bottom, max(0.5, right - left), max(0.5, bottom - top), stroke=1, fill=0) + except Exception: + pass + c.showPage() if c is None: @@ -1035,7 +1058,7 @@ def _render_replica_pdf_from_layout( def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: str) -> None: - if mode not in {"clean", "scan_backed"}: + if mode not in {"clean", "scan_backed", "debug_overlay"}: raise ValueError(f"Unsupported replica mode: {mode}") current_file, _, _, _, _ = _get_replica_source_context(document) @@ -1047,8 +1070,10 @@ def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: s if mode == "clean": out_path = out_path.with_name(f"{stem}_replica_clean{suffix}") - else: + elif mode == "scan_backed": out_path = out_path.with_name(f"{stem}_replica_scan_backed{suffix}") + else: + out_path = out_path.with_name(f"{stem}_replica_debug_overlay{suffix}") out_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/app/routes/documents.py b/app/routes/documents.py index 9ec0ec2..1f40b71 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -1249,6 +1249,51 @@ def save_replica_pdf_scan_backed(document_id: str, output_path: str = Form(""), return RedirectResponse(url=f"/documents/{document.document_id}?success=saved_replica_pdf_scan_backed&tab=ocr-review", status_code=303) + +@router.post("/{document_id}/save-replica-pdf-debug-overlay", response_class=RedirectResponse) +def save_replica_pdf_debug_overlay(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)): + if not _storage_available(): + return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303) + + document = ( + db.query(Document) + .options( + selectinload(Document.text_versions), + selectinload(Document.naming_fields), + selectinload(Document.replica_review_states), + selectinload(Document.replica_outputs), + selectinload(Document.analysis_versions), + ) + .filter(Document.document_id == document_id) + .first() + ) + if document is None: + return RedirectResponse(url="/documents/", status_code=303) + + try: + output_path_obj = _resolve_document_output_path(document, output_path) + save_replica_pdf(db, document, output_path_obj, mode="debug_overlay") + except ValueError as e: + if "invalid_output_path" in str(e): + return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303) + traceback.print_exc() + return RedirectResponse( + url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab=ocr-review", + status_code=303, + ) + except Exception: + traceback.print_exc() + return RedirectResponse( + url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab=ocr-review", + status_code=303, + ) + + return RedirectResponse( + url=f"/documents/{document.document_id}?success=saved_replica_pdf_debug_overlay&tab=ocr-review&viewer_source=replica_debug_overlay", + status_code=303, + ) + + @router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse) def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)): return RedirectResponse( @@ -1702,10 +1747,12 @@ def document_detail(document_id: str, request: Request, queue: str | None = None replica_clean_output = _get_latest_replica_output(document, "clean") replica_scan_backed_output = _get_latest_replica_output(document, "scan_backed") + replica_debug_overlay_output = _get_latest_replica_output(document, "debug_overlay") scan_path = document.current_path replica_path = replica_clean_output.file_path if replica_clean_output and replica_clean_output.file_path else None replica_scan_backed_path = replica_scan_backed_output.file_path if replica_scan_backed_output and replica_scan_backed_output.file_path else None + replica_debug_overlay_path = replica_debug_overlay_output.file_path if replica_debug_overlay_output and replica_debug_overlay_output.file_path else None effective_viewer_source = viewer_source or "scan" preview_path = scan_path @@ -1714,6 +1761,8 @@ def document_detail(document_id: str, request: Request, queue: str | None = None preview_path = replica_path elif effective_viewer_source == "replica_scan_backed" and replica_scan_backed_path: preview_path = replica_scan_backed_path + elif effective_viewer_source == "replica_debug_overlay" and replica_debug_overlay_path: + preview_path = replica_debug_overlay_path else: effective_viewer_source = "scan" preview_path = scan_path diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html index b156aa2..a1ca3b4 100644 --- a/app/templates/documents/detail.html +++ b/app/templates/documents/detail.html @@ -186,6 +186,9 @@ document.addEventListener("DOMContentLoaded", () => {
+
+ +
@@ -208,6 +211,18 @@ document.addEventListener("DOMContentLoaded", () => { +{% if success == "saved_replica_pdf_debug_overlay" %} +
+ Debug overlay PDF saved. +
+{% endif %} + +{% if error == "save_replica_pdf_debug_overlay_failed" %} +
+ Could not save debug overlay PDF. +
+{% endif %} + {% if success == "saved_replica_pdf_scan_backed_fallback" %}
Clean replica could not be generated for this document, so a scan-backed replica was created instead. @@ -253,6 +268,9 @@ document.addEventListener("DOMContentLoaded", () => { {% if replica_scan_backed_output %} Replica (Scan-backed) {% endif %} + {% if replica_debug_overlay_output %} + Replica (Debug) + {% endif %}