Remove scan-backed replica and standardize replica output naming

This commit is contained in:
Sean McElwain 2026-05-28 21:05:59 -05:00
parent 6e1d497aa7
commit 7649f48890
3 changed files with 5 additions and 27 deletions

View File

@ -1264,22 +1264,20 @@ def _render_replica_pdf_from_layout(
def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: str) -> None:
if mode not in {"clean", "scan_backed", "debug_overlay"}:
if mode not in {"clean", "debug_overlay"}:
raise ValueError(f"Unsupported replica mode: {mode}")
current_file, _, _, _, _ = _get_replica_source_context(document)
out_path = Path(output_path)
out_path = out_path.with_name(re.sub(r"_v\d+(?=\.[^.]+$)", "", out_path.name))
stem = re.sub(r"(_replica_clean|_replica_scan_backed)$", "", out_path.stem)
stem = re.sub(r"(_replica|_replica_clean|_replica_scan_backed|_replica_debug_overlay|_debug)$", "", out_path.stem)
suffix = out_path.suffix or ".pdf"
if mode == "clean":
out_path = out_path.with_name(f"{stem}_replica_clean{suffix}")
elif mode == "scan_backed":
out_path = out_path.with_name(f"{stem}_replica_scan_backed{suffix}")
out_path = out_path.with_name(f"{stem}_replica{suffix}")
else:
out_path = out_path.with_name(f"{stem}_replica_debug_overlay{suffix}")
out_path = out_path.with_name(f"{stem}_debug{suffix}")
out_path.parent.mkdir(parents=True, exist_ok=True)
@ -1294,16 +1292,6 @@ def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: s
if mode == "clean" and not page_lines:
raise ValueError("clean_replica_has_no_renderable_lines")
if mode == "clean":
has_text = False
for page in layout_json.get("pages", []):
if page.get("lines"):
has_text = True
break
if not has_text:
actual_mode = "scan_backed"
out_path = out_path.with_name(f"{stem}_replica_scan_backed{suffix}")
layout_json = build_replica_layout(document, mode="scan_backed")
layout_version = _save_replica_layout_version(db, document, layout_json, mode=actual_mode)

View File

@ -2281,7 +2281,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
line_numbers = list(range(1, max(actual_line_count, expected_line_count) + 1))
replica_clean_output = _get_latest_replica_output(document, "clean")
replica_scan_backed_output = _get_latest_replica_output(document, "scan_backed")
replica_scan_backed_output = None # scan-backed replica disabled; scan uses document.current_path
replica_debug_overlay_output = _get_latest_replica_output(document, "debug_overlay")
overlay_page_data = []
@ -2371,8 +2371,6 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
preview_path = scan_path
elif effective_viewer_source == "replica" and replica_path:
preview_path = replica_path
elif effective_viewer_source == "replica_scan_backed" and replica_scan_backed_path:
preview_path = replica_scan_backed_path
elif effective_viewer_source == "replica_debug_overlay" and replica_debug_overlay_path:
preview_path = replica_debug_overlay_path
else:

View File

@ -244,11 +244,6 @@ document.addEventListener("DOMContentLoaded", () => {
<input type="hidden" name="return_viewer_source" value="replica">
<button type="submit">Save Replica PDF</button>
</form>
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-scan-backed" style="display:inline;">
<input type="hidden" name="return_tab" value="ocr-review">
<input type="hidden" name="return_viewer_source" value="replica_scan_backed">
<button type="submit">Save Replica PDF (Scan-backed)</button>
</form>
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-debug-overlay" style="display:inline;">
<input type="hidden" name="return_tab" value="ocr-review">
<input type="hidden" name="return_viewer_source" value="replica_debug_overlay">
@ -337,9 +332,6 @@ document.addEventListener("DOMContentLoaded", () => {
{% if replica_clean_output %}
<a class="preview-source-link{% if viewer_source == 'replica' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=replica">Replica</a>
{% endif %}
{% if replica_scan_backed_output %}
<a class="preview-source-link{% if viewer_source == 'replica_scan_backed' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=replica_scan_backed">Replica (Scan-backed)</a>
{% endif %}
{% if replica_debug_overlay_output %}
<a class="preview-source-link{% if viewer_source == 'replica_debug_overlay' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=replica_debug_overlay">Replica (Debug)</a>
<a class="preview-source-link{% if viewer_source == 'docx' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=docx">DOCX</a>