Add debug overlay replica preview and save mode

This commit is contained in:
Sean McElwain 2026-05-09 15:45:22 -05:00
parent 76352b8a54
commit 8c6e862799
3 changed files with 96 additions and 4 deletions

View File

@ -1006,7 +1006,7 @@ def _render_replica_pdf_from_layout(
page_layout = pages.get(page_num, {"lines": []})
render_entries = []
if mode == "scan_backed" and (page_layout.get("words") or []):
if mode in {"scan_backed", "debug_overlay"} and (page_layout.get("words") or []):
render_entries = _build_word_entries_for_page(page_layout, page_h)
else:
render_entries = page_layout.get("lines", []) or []
@ -1017,12 +1017,35 @@ def _render_replica_pdf_from_layout(
continue
text_obj = c.beginText()
text_obj.setTextRenderMode(3 if mode == "scan_backed" else 0)
if mode == "scan_backed":
text_obj.setTextRenderMode(3)
else:
text_obj.setTextRenderMode(0)
text_obj.setFont(line.get("font_family_guess") or "Helvetica", float(line.get("font_size_guess") or 10))
text_obj.setTextOrigin(float(line["pdf_x"]), float(line["pdf_y"]) + 1)
if mode == "debug_overlay":
c.setStrokeColorRGB(1, 0, 0)
c.setFillColorRGB(1, 0, 0)
else:
c.setStrokeColorRGB(0, 0, 0)
c.setFillColorRGB(0, 0, 0)
text_obj.textLine(text_line)
c.drawText(text_obj)
if mode == "debug_overlay":
bbox = line.get("bbox_source")
if bbox and isinstance(bbox, (list, tuple)) and len(bbox) == 4:
try:
left, top, right, bottom = [float(v) for v in bbox]
c.setStrokeColorRGB(1, 0, 0)
c.setLineWidth(0.4)
c.rect(left, page_h - bottom, max(0.5, right - left), max(0.5, bottom - top), stroke=1, fill=0)
except Exception:
pass
c.showPage()
if c is None:
@ -1035,7 +1058,7 @@ def _render_replica_pdf_from_layout(
def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: str) -> None:
if mode not in {"clean", "scan_backed"}:
if mode not in {"clean", "scan_backed", "debug_overlay"}:
raise ValueError(f"Unsupported replica mode: {mode}")
current_file, _, _, _, _ = _get_replica_source_context(document)
@ -1047,8 +1070,10 @@ def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: s
if mode == "clean":
out_path = out_path.with_name(f"{stem}_replica_clean{suffix}")
else:
elif mode == "scan_backed":
out_path = out_path.with_name(f"{stem}_replica_scan_backed{suffix}")
else:
out_path = out_path.with_name(f"{stem}_replica_debug_overlay{suffix}")
out_path.parent.mkdir(parents=True, exist_ok=True)

View File

@ -1249,6 +1249,51 @@ def save_replica_pdf_scan_backed(document_id: str, output_path: str = Form(""),
return RedirectResponse(url=f"/documents/{document.document_id}?success=saved_replica_pdf_scan_backed&tab=ocr-review", status_code=303)
@router.post("/{document_id}/save-replica-pdf-debug-overlay", response_class=RedirectResponse)
def save_replica_pdf_debug_overlay(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)):
if not _storage_available():
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
document = (
db.query(Document)
.options(
selectinload(Document.text_versions),
selectinload(Document.naming_fields),
selectinload(Document.replica_review_states),
selectinload(Document.replica_outputs),
selectinload(Document.analysis_versions),
)
.filter(Document.document_id == document_id)
.first()
)
if document is None:
return RedirectResponse(url="/documents/", status_code=303)
try:
output_path_obj = _resolve_document_output_path(document, output_path)
save_replica_pdf(db, document, output_path_obj, mode="debug_overlay")
except ValueError as e:
if "invalid_output_path" in str(e):
return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303)
traceback.print_exc()
return RedirectResponse(
url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab=ocr-review",
status_code=303,
)
except Exception:
traceback.print_exc()
return RedirectResponse(
url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab=ocr-review",
status_code=303,
)
return RedirectResponse(
url=f"/documents/{document.document_id}?success=saved_replica_pdf_debug_overlay&tab=ocr-review&viewer_source=replica_debug_overlay",
status_code=303,
)
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
return RedirectResponse(
@ -1702,10 +1747,12 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
replica_clean_output = _get_latest_replica_output(document, "clean")
replica_scan_backed_output = _get_latest_replica_output(document, "scan_backed")
replica_debug_overlay_output = _get_latest_replica_output(document, "debug_overlay")
scan_path = document.current_path
replica_path = replica_clean_output.file_path if replica_clean_output and replica_clean_output.file_path else None
replica_scan_backed_path = replica_scan_backed_output.file_path if replica_scan_backed_output and replica_scan_backed_output.file_path else None
replica_debug_overlay_path = replica_debug_overlay_output.file_path if replica_debug_overlay_output and replica_debug_overlay_output.file_path else None
effective_viewer_source = viewer_source or "scan"
preview_path = scan_path
@ -1714,6 +1761,8 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
preview_path = replica_path
elif effective_viewer_source == "replica_scan_backed" and replica_scan_backed_path:
preview_path = replica_scan_backed_path
elif effective_viewer_source == "replica_debug_overlay" and replica_debug_overlay_path:
preview_path = replica_debug_overlay_path
else:
effective_viewer_source = "scan"
preview_path = scan_path

View File

@ -186,6 +186,9 @@ document.addEventListener("DOMContentLoaded", () => {
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-scan-backed" style="display:inline;">
<button type="submit">Save Replica PDF (Scan-backed)</button>
</form>
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-debug-overlay" style="display:inline;">
<button type="submit">Save Replica PDF (Debug Overlay)</button>
</form>
</div>
</div>
@ -208,6 +211,18 @@ document.addEventListener("DOMContentLoaded", () => {
</div>
{% if success == "saved_replica_pdf_debug_overlay" %}
<div style="background:#ecfdf5; border:1px solid #86efac; color:#166534; padding:0.75rem 1rem; border-radius:10px; margin-bottom:1rem;">
Debug overlay PDF saved.
</div>
{% endif %}
{% if error == "save_replica_pdf_debug_overlay_failed" %}
<div style="background:#ffe4e6; border:1px solid #fecdd3; color:#7f1d1d; padding:0.75rem 1rem; border-radius:10px; margin-bottom:1rem;">
Could not save debug overlay PDF.
</div>
{% endif %}
{% if success == "saved_replica_pdf_scan_backed_fallback" %}
<div style="background:#ecfdf5; border:1px solid #86efac; color:#166534; padding:0.75rem 1rem; border-radius:10px; margin-bottom:1rem;">
Clean replica could not be generated for this document, so a scan-backed replica was created instead.
@ -253,6 +268,9 @@ document.addEventListener("DOMContentLoaded", () => {
{% if replica_scan_backed_output %}
<a class="preview-source-link{% if viewer_source == 'replica_scan_backed' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=replica_scan_backed">Replica (Scan-backed)</a>
{% endif %}
{% if replica_debug_overlay_output %}
<a class="preview-source-link{% if viewer_source == 'replica_debug_overlay' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=replica_debug_overlay">Replica (Debug)</a>
{% endif %}
</div>
</div>