diff --git a/app/logic/document_outputs.py b/app/logic/document_outputs.py index f7653a8..7d2181c 100644 --- a/app/logic/document_outputs.py +++ b/app/logic/document_outputs.py @@ -72,6 +72,10 @@ from sqlalchemy.orm import Session from app.core.config import FIELD_ENRICHED_ROOT, OCR_CORRECTED_ROOT from app.models.document import Document from app.models.document_version import DocumentVersion + +from app.models.document_replica_layout_version import DocumentReplicaLayoutVersion +from app.models.document_replica_output import DocumentReplicaOutput +from app.models.document_replica_review_state import DocumentReplicaReviewState from app.models.text_version import TextVersion @@ -685,19 +689,10 @@ def save_ocr_corrected_pdf_current(db: Session, document: Document, output_path: except Exception: share_path_value = None - document.share_path = share_path_value - document.current_path = str(out_path) - document.canonical_filename = out_path.name - document.sha256_current = file_hash - db.add(document) - + # Replica outputs are non-destructive exports for now. + # Do not replace the primary/current document path. db.commit() - keep_paths = {str(out_path)} - if document.share_path: - keep_paths.add(str(document.share_path)) - _prune_old_saved_files(db, document, keep_paths) - def save_field_enriched_pdf_current(db: Session, document: Document, output_path: Path) -> None: if not document.current_path: @@ -736,3 +731,263 @@ def save_field_enriched_pdf_current(db: Session, document: Document, output_path if document.share_path: keep_paths.add(str(document.share_path)) _prune_old_saved_files(db, document, keep_paths) + + +def _next_replica_layout_version_number(db: Session, document_id: int) -> int: + return ( + db.query(func.max(DocumentReplicaLayoutVersion.version_number)) + .filter(DocumentReplicaLayoutVersion.document_id == document_id) + .scalar() + or 0 + ) + 1 + + +def _get_current_replica_review_state(document: Document) -> DocumentReplicaReviewState | None: + rows = getattr(document, "replica_review_states", None) or [] + return rows[0] if rows else None + + +def _get_replica_source_context(document: Document): + if not document.current_path: + raise ValueError("Document has no current_path") + + current_file = Path(document.current_path) + if not current_file.exists(): + raise FileNotFoundError(f"Current file not found: {current_file}") + + raw_ocr = _latest_current_text_version(document, "raw_ocr") + reviewed = _latest_current_text_version(document, "reviewed") + + if raw_ocr is None: + raise ValueError("No current raw OCR version found") + if reviewed is None: + raise ValueError("No current reviewed text found") + if current_file.suffix.lower() != ".pdf": + raise ValueError("Replica PDF generation currently supports PDFs only") + + raw_lines = _flatten_layout_lines(raw_ocr.layout_json) + reviewed_lines = _flatten_layout_lines(reviewed.layout_json) + + if not raw_lines: + raise ValueError("No OCR line boxes found in raw OCR layout data") + if reviewed_lines and len(reviewed_lines) != len(raw_lines): + raise ValueError("Reviewed line layout does not match raw OCR line layout") + + source_layout = reviewed.layout_json if reviewed.layout_json else raw_ocr.layout_json + if not source_layout: + raise ValueError("No source layout found") + + return current_file, raw_ocr, reviewed, source_layout + + +def build_replica_layout(document: Document, mode: str = "shared") -> dict: + current_file, raw_ocr, reviewed, source_layout = _get_replica_source_context(document) + reader = PdfReader(str(current_file)) + + pages = [] + page_layouts = {page["page"]: page for page in source_layout.get("pages", [])} + + for page_num, pdf_page in enumerate(reader.pages, start=1): + page_w = float(pdf_page.mediabox.width) + page_h = float(pdf_page.mediabox.height) + page_layout = page_layouts.get(page_num, {"lines": []}) + src_w = float(page_layout.get("image_width") or 1.0) + src_h = float(page_layout.get("image_height") or 1.0) + scale_x = page_w / src_w + scale_y = page_h / src_h + + line_entries = [] + for line in page_layout.get("lines", []): + text_line = (line.get("text") or "").strip() + if not text_line: + continue + + left, top, right, bottom = line["bbox"] + pdf_x = left * scale_x + pdf_y = page_h - (bottom * scale_y) + box_width = max(10.0, (right - left) * scale_x) + box_height = max(6.0, (bottom - top) * scale_y) + font_size = _fit_font_size(text_line, box_width, box_height) + + line_entries.append( + { + "text": text_line, + "bbox_source": [left, top, right, bottom], + "pdf_x": pdf_x, + "pdf_y": pdf_y, + "box_width": box_width, + "box_height": box_height, + "font_family_guess": "Helvetica", + "font_size_guess": font_size, + "text_color_guess": "#000000", + "text_render_mode_clean": 0, + "text_render_mode_scan_backed": 3, + } + ) + + pages.append( + { + "page": page_num, + "page_width": page_w, + "page_height": page_h, + "image_width": src_w, + "image_height": src_h, + "lines": line_entries, + } + ) + + return { + "schema_version": 1, + "mode_source": mode, + "current_path": str(current_file), + "text_version_source": { + "raw_ocr_version_id": raw_ocr.id if raw_ocr else None, + "reviewed_version_id": reviewed.id if reviewed else None, + }, + "pages": pages, + } + + +def _save_replica_layout_version( + db: Session, + document: Document, + layout_json: dict, + mode: str, + created_by: str = "save_replica_pdf", +) -> DocumentReplicaLayoutVersion: + db.query(DocumentReplicaLayoutVersion).filter( + DocumentReplicaLayoutVersion.document_id == document.id, + DocumentReplicaLayoutVersion.is_current == True, # noqa: E712 + ).update({"is_current": False}, synchronize_session=False) + + version = DocumentReplicaLayoutVersion( + document_id=document.id, + version_number=_next_replica_layout_version_number(db, document.id), + version_type="heuristic", + render_mode_source=mode, + is_current=True, + created_by=created_by, + quality_flags=[], + inference_metadata_json={"pipeline": "heuristic_replica_v1", "mode": mode}, + layout_json=layout_json, + ) + db.add(version) + db.flush() + + state = _get_current_replica_review_state(document) + if state is None: + state = DocumentReplicaReviewState(document_id=document.id) + db.add(state) + + state.current_replica_layout_version_id = version.id + state.is_reviewed = False + state.is_approved = False + state.needs_manual_adjustment = False + state.needs_model_retry = False + db.flush() + + return version + + +def _render_replica_pdf_from_layout( + current_file: Path, + layout_json: dict, + out_path: Path, + mode: str, +) -> None: + reader = PdfReader(str(current_file)) + out_path.parent.mkdir(parents=True, exist_ok=True) + + with tempfile.TemporaryDirectory() as tmpdirname: + tmpdir = Path(tmpdirname) + images = _render_pdf_page_images(current_file, tmpdir) + overlay_pdf_path = tmpdir / "replica.pdf" + c = None + + pages = {page["page"]: page for page in layout_json.get("pages", [])} + + for page_num, img_path in enumerate(images, start=1): + pdf_page = reader.pages[page_num - 1] + page_w = float(pdf_page.mediabox.width) + page_h = float(pdf_page.mediabox.height) + + if c is None: + c = canvas.Canvas(str(overlay_pdf_path), pagesize=(page_w, page_h)) + else: + c.setPageSize((page_w, page_h)) + + if mode == "scan_backed": + c.drawImage(ImageReader(str(img_path)), 0, 0, width=page_w, height=page_h) + + page_layout = pages.get(page_num, {"lines": []}) + + for line in page_layout.get("lines", []): + text_line = (line.get("text") or "").strip() + if not text_line: + continue + + text_obj = c.beginText() + text_obj.setTextRenderMode(3 if mode == "scan_backed" else 0) + text_obj.setFont(line.get("font_family_guess") or "Helvetica", float(line.get("font_size_guess") or 10)) + text_obj.setTextOrigin(float(line["pdf_x"]), float(line["pdf_y"]) + 1) + text_obj.textLine(text_line) + c.drawText(text_obj) + + c.showPage() + + if c is None: + raise ValueError("Failed to build replica PDF") + + c.save() + shutil.copy2(overlay_pdf_path, out_path) + + compress_pdf_with_ghostscript(out_path) + + +def save_replica_pdf(db: Session, document: Document, output_path: Path, mode: str) -> None: + if mode not in {"clean", "scan_backed"}: + raise ValueError(f"Unsupported replica mode: {mode}") + + current_file, _, _, _ = _get_replica_source_context(document) + out_path = Path(output_path) + out_path = out_path.with_name(re.sub(r"_v\d+(?=\.[^.]+$)", "", out_path.name)) + + stem = re.sub(r"(_replica_clean|_replica_scan_backed)$", "", out_path.stem) + suffix = out_path.suffix or ".pdf" + + if mode == "clean": + out_path = out_path.with_name(f"{stem}_replica_clean{suffix}") + else: + out_path = out_path.with_name(f"{stem}_replica_scan_backed{suffix}") + + out_path.parent.mkdir(parents=True, exist_ok=True) + + layout_json = build_replica_layout(document, mode=mode) + layout_version = _save_replica_layout_version(db, document, layout_json, mode=mode) + + _render_replica_pdf_from_layout(current_file, layout_json, out_path, mode=mode) + + file_hash = sha256_for_file(out_path) + file_size = out_path.stat().st_size + + try: + mirror_path = _mirror_to_secondary_owner(document, out_path) + share_path_value = str(mirror_path) if mirror_path else None + except Exception: + share_path_value = None + + output = DocumentReplicaOutput( + document_id=document.id, + replica_layout_version_id=layout_version.id, + output_type=mode, + file_path=str(out_path), + sha256=file_hash, + file_size_bytes=file_size, + created_by="save_replica_pdf", + render_settings_json={"mode": mode}, + ) + db.add(output) + + # Replica outputs are non-destructive exports. + # Do not replace the primary/current document path or prune sibling files. + db.commit() diff --git a/app/models/__init__.py b/app/models/__init__.py index 6a09210..fd4dbad 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -18,3 +18,6 @@ __all__ = [ "DocumentPreset", ] from app.models.document_naming_field import DocumentNamingField +from app.models.document_replica_layout_version import DocumentReplicaLayoutVersion +from app.models.document_replica_output import DocumentReplicaOutput +from app.models.document_replica_review_state import DocumentReplicaReviewState diff --git a/app/models/document.py b/app/models/document.py index 1389ed5..f16db3a 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -105,3 +105,17 @@ class Document(Base): cascade="all, delete-orphan", uselist=False, ) + + replica_layout_versions: Mapped[list["DocumentReplicaLayoutVersion"]] = relationship( + back_populates="document", + cascade="all, delete-orphan", + order_by="DocumentReplicaLayoutVersion.version_number", + ) + replica_outputs: Mapped[list["DocumentReplicaOutput"]] = relationship( + back_populates="document", + cascade="all, delete-orphan", + ) + replica_review_states: Mapped[list["DocumentReplicaReviewState"]] = relationship( + back_populates="document", + cascade="all, delete-orphan", + ) diff --git a/app/models/document_replica_layout_version.py b/app/models/document_replica_layout_version.py new file mode 100644 index 0000000..083b2a8 --- /dev/null +++ b/app/models/document_replica_layout_version.py @@ -0,0 +1,37 @@ +from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, JSON, String, Text +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.db.base import Base + + +class DocumentReplicaLayoutVersion(Base): + __tablename__ = "document_replica_layout_versions" + + id = Column(Integer, primary_key=True, index=True) + document_id = Column(Integer, ForeignKey("documents.id"), nullable=False, index=True) + + version_number = Column(Integer, nullable=False) + version_type = Column(String, nullable=False, default="heuristic") + render_mode_source = Column(String, nullable=False, default="shared") + is_current = Column(Boolean, nullable=False, default=True) + + created_by = Column(String, nullable=True) + derived_from_text_version_id = Column(Integer, ForeignKey("text_versions.id"), nullable=True) + derived_from_replica_layout_version_id = Column(Integer, ForeignKey("document_replica_layout_versions.id"), nullable=True) + + model_name = Column(String, nullable=True) + model_version = Column(String, nullable=True) + prompt_version = Column(String, nullable=True) + + quality_score = Column(String, nullable=True) + quality_note = Column(Text, nullable=True) + quality_flags = Column(JSON, nullable=True) + inference_metadata_json = Column(JSON, nullable=True) + layout_json = Column(JSON, nullable=False) + + created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False) + + document = relationship("Document", back_populates="replica_layout_versions") + outputs = relationship("DocumentReplicaOutput", back_populates="replica_layout_version", cascade="all, delete-orphan") + parent_layout_version = relationship("DocumentReplicaLayoutVersion", remote_side=[id]) diff --git a/app/models/document_replica_output.py b/app/models/document_replica_output.py new file mode 100644 index 0000000..64dee69 --- /dev/null +++ b/app/models/document_replica_output.py @@ -0,0 +1,25 @@ +from sqlalchemy import Column, DateTime, ForeignKey, Integer, JSON, String +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.db.base import Base + + +class DocumentReplicaOutput(Base): + __tablename__ = "document_replica_outputs" + + id = Column(Integer, primary_key=True, index=True) + document_id = Column(Integer, ForeignKey("documents.id"), nullable=False, index=True) + replica_layout_version_id = Column(Integer, ForeignKey("document_replica_layout_versions.id"), nullable=False, index=True) + + output_type = Column(String, nullable=False) + file_path = Column(String, nullable=False) + sha256 = Column(String, nullable=True) + file_size_bytes = Column(Integer, nullable=True) + created_by = Column(String, nullable=True) + render_settings_json = Column(JSON, nullable=True) + + created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False) + + document = relationship("Document", back_populates="replica_outputs") + replica_layout_version = relationship("DocumentReplicaLayoutVersion", back_populates="outputs") diff --git a/app/models/document_replica_review_state.py b/app/models/document_replica_review_state.py new file mode 100644 index 0000000..d2a9414 --- /dev/null +++ b/app/models/document_replica_review_state.py @@ -0,0 +1,27 @@ +from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, Text +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func + +from app.db.base import Base + + +class DocumentReplicaReviewState(Base): + __tablename__ = "document_replica_review_states" + + id = Column(Integer, primary_key=True, index=True) + document_id = Column(Integer, ForeignKey("documents.id"), nullable=False, index=True) + current_replica_layout_version_id = Column(Integer, ForeignKey("document_replica_layout_versions.id"), nullable=True) + + is_reviewed = Column(Boolean, nullable=False, default=False) + is_approved = Column(Boolean, nullable=False, default=False) + needs_model_retry = Column(Boolean, nullable=False, default=False) + needs_manual_adjustment = Column(Boolean, nullable=False, default=False) + + reviewed_by = Column(Text, nullable=True) + review_note = Column(Text, nullable=True) + + reviewed_at = Column(DateTime(timezone=True), nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False) + + document = relationship("Document", back_populates="replica_review_states") diff --git a/app/routes/documents.py b/app/routes/documents.py index 9a5a114..74b5a33 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -22,6 +22,7 @@ from app.db.deps import get_db from app.logic.document_outputs import ( save_field_enriched_pdf_current, save_ocr_corrected_pdf_current, + save_replica_pdf, ) from app.logic.storage_paths import build_proposed_storage_path from app.logic.extraction import ( @@ -1003,6 +1004,38 @@ def move_to_trash(document_id: str, db: Session = Depends(get_db)): + +def _resolve_document_output_path(document, output_path: str = "") -> Path: + save_root = get_default_save_root() + naming_row = document.naming_fields[0] if getattr(document, "naming_fields", None) else None + + default_output_path = Path( + build_proposed_storage_path( + document=document, + save_root=save_root, + naming_row=naming_row, + ) + ) + default_output_path = default_output_path.with_name( + re.sub(r"(?:_v\d+|_\d+)(?=\.[^.]+$)", "", default_output_path.name) + ) + if default_output_path.suffix.lower() != ".pdf": + default_output_path = default_output_path.with_suffix(".pdf") + + output_path_raw = (output_path or "").strip() + output_path_obj = Path(output_path_raw) if output_path_raw else default_output_path + + if output_path_obj.suffix.lower() != ".pdf": + output_path_obj = output_path_obj.with_suffix(".pdf") + + allowed_root = Path(save_root).resolve() + resolved_parent = output_path_obj.parent.resolve() + if allowed_root != resolved_parent and allowed_root not in resolved_parent.parents: + raise ValueError("invalid_output_path") + + output_path_obj.parent.mkdir(parents=True, exist_ok=True) + return output_path_obj + @router.post("/{document_id}/save-pdf", response_class=RedirectResponse) def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)): if not _storage_available(): @@ -1024,41 +1057,14 @@ def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depend if document is None: return RedirectResponse(url="/documents/", status_code=303) - save_root = get_default_save_root() - naming_row = document.naming_fields[0] if getattr(document, "naming_fields", None) else None - - default_output_path = Path( - build_proposed_storage_path( - document=document, - save_root=save_root, - naming_row=naming_row, - ) - ) - default_output_path = default_output_path.with_name( - re.sub(r"(?:_v\d+|_\d+)(?=\.[^.]+$)", "", default_output_path.name) - ) - if default_output_path.suffix.lower() != ".pdf": - default_output_path = default_output_path.with_suffix(".pdf") - - output_path_raw = (output_path or "").strip() - if output_path_raw: - output_path_obj = Path(output_path_raw) - else: - output_path_obj = default_output_path - - if output_path_obj.suffix.lower() != ".pdf": - output_path_obj = output_path_obj.with_suffix(".pdf") - - allowed_root = Path(save_root).resolve() - resolved_parent = output_path_obj.parent.resolve() - if allowed_root != resolved_parent and allowed_root not in resolved_parent.parents: + try: + output_path_obj = _resolve_document_output_path(document, output_path) + except ValueError: return RedirectResponse( url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303, ) - output_path_obj.parent.mkdir(parents=True, exist_ok=True) - has_extracted = bool(getattr(document, "extracted_fields", None)) has_additional = bool(getattr(document, "additional_fields", None)) @@ -1079,6 +1085,70 @@ def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depend + +@router.post("/{document_id}/save-replica-pdf", response_class=RedirectResponse) +def save_replica_pdf_clean(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)): + if not _storage_available(): + return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303) + + document = ( + db.query(Document) + .options( + selectinload(Document.text_versions), + selectinload(Document.naming_fields), + selectinload(Document.replica_review_states), + ) + .filter(Document.document_id == document_id) + .first() + ) + if document is None: + return RedirectResponse(url="/documents/", status_code=303) + + try: + output_path_obj = _resolve_document_output_path(document, output_path) + save_replica_pdf(db, document, output_path_obj, mode="clean") + except ValueError as e: + if "invalid_output_path" in str(e): + return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303) + return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_failed&tab=ocr-review", status_code=303) + except Exception: + traceback.print_exc() + return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_failed&tab=ocr-review", status_code=303) + + return RedirectResponse(url=f"/documents/{document.document_id}?success=saved_replica_pdf&tab=ocr-review", status_code=303) + + +@router.post("/{document_id}/save-replica-pdf-scan-backed", response_class=RedirectResponse) +def save_replica_pdf_scan_backed(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)): + if not _storage_available(): + return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303) + + document = ( + db.query(Document) + .options( + selectinload(Document.text_versions), + selectinload(Document.naming_fields), + selectinload(Document.replica_review_states), + ) + .filter(Document.document_id == document_id) + .first() + ) + if document is None: + return RedirectResponse(url="/documents/", status_code=303) + + try: + output_path_obj = _resolve_document_output_path(document, output_path) + save_replica_pdf(db, document, output_path_obj, mode="scan_backed") + except ValueError as e: + if "invalid_output_path" in str(e): + return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303) + return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_scan_backed_failed&tab=ocr-review", status_code=303) + except Exception: + traceback.print_exc() + return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_scan_backed_failed&tab=ocr-review", status_code=303) + + return RedirectResponse(url=f"/documents/{document.document_id}?success=saved_replica_pdf_scan_backed&tab=ocr-review", status_code=303) + @router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse) def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)): return RedirectResponse( @@ -1459,12 +1529,13 @@ async def save_line_items( ) @router.get("/{document_id}/preview-file") -def document_preview_file(document_id: str, db: Session = Depends(get_db)): +def document_preview_file(document_id: str, path: str | None = None, db: Session = Depends(get_db)): document = db.query(Document).filter(Document.document_id == document_id).first() - if document is None or not document.current_path: + resolved_path = path or (document.current_path if document else None) + if document is None or not resolved_path: return HTMLResponse(content="Preview file not found", status_code=404) - path_obj = Path(document.current_path) + path_obj = Path(resolved_path) if not path_obj.exists() or not path_obj.is_file(): return HTMLResponse(content="Preview file not found", status_code=404) @@ -1472,8 +1543,26 @@ def document_preview_file(document_id: str, db: Session = Depends(get_db)): return FileResponse(path=str(path_obj), media_type=media_type, filename=path_obj.name, headers={"Content-Disposition": "inline; filename=\"" + path_obj.name + "\""}) + +def _get_latest_replica_output(document, output_type: str): + outputs = getattr(document, "replica_outputs", None) or [] + matches = [row for row in outputs if getattr(row, "output_type", None) == output_type] + matches.sort(key=lambda x: getattr(x, "created_at", None) or 0, reverse=True) + return matches[0] if matches else None + + +def _build_preview_url_for_path(request: Request, document_id: str, path_value: str | None): + if not path_value: + return None + path_obj = Path(path_value) + if not path_obj.exists() or not path_obj.is_file(): + return None + from urllib.parse import quote + base = str(request.url_for("document_preview_file", document_id=document_id)) + return f"{base}?path={quote(str(path_obj))}&v={int(path_obj.stat().st_mtime)}" + @router.get("/{document_id}", response_class=HTMLResponse) -def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)): +def document_detail(document_id: str, request: Request, queue: str | None = None, viewer_source: str = "scan", db: Session = Depends(get_db)): current_user = getattr(request.state, "current_user", None) document = ( db.query(Document) @@ -1511,12 +1600,26 @@ def document_detail(document_id: str, request: Request, queue: str | None = None actual_line_count = len(review_text_value.splitlines()) if review_text_value else 0 line_numbers = list(range(1, max(actual_line_count, expected_line_count) + 1)) - file_url = None + replica_clean_output = _get_latest_replica_output(document, "clean") + replica_scan_backed_output = _get_latest_replica_output(document, "scan_backed") + + scan_path = document.current_path + replica_path = replica_clean_output.file_path if replica_clean_output and replica_clean_output.file_path else None + replica_scan_backed_path = replica_scan_backed_output.file_path if replica_scan_backed_output and replica_scan_backed_output.file_path else None + + effective_viewer_source = viewer_source or "scan" + preview_path = scan_path + + if effective_viewer_source == "replica" and replica_path: + preview_path = replica_path + elif effective_viewer_source == "replica_scan_backed" and replica_scan_backed_path: + preview_path = replica_scan_backed_path + else: + effective_viewer_source = "scan" + preview_path = scan_path + storage_available = _storage_available() - if document.current_path: - current_path = Path(document.current_path) - if current_path.exists() and current_path.is_file(): - file_url = str(request.url_for("document_preview_file", document_id=document.document_id)) + file_url = _build_preview_url_for_path(request, document.document_id, preview_path) app_url = str(request.url_for("document_detail", document_id=document.document_id)) error = request.query_params.get("error") @@ -1615,6 +1718,9 @@ def document_detail(document_id: str, request: Request, queue: str | None = None "review_text_value": review_text_value, "file_url": file_url, "storage_available": storage_available, + "viewer_source": effective_viewer_source, + "replica_clean_output": replica_clean_output, + "replica_scan_backed_output": replica_scan_backed_output, "version_rows": version_rows, "current_line_item_version": current_line_item_version, "ocr_version_options": ocr_version_options, diff --git a/app/static/app.css b/app/static/app.css index 7eb6767..0233e74 100644 --- a/app/static/app.css +++ b/app/static/app.css @@ -6231,3 +6231,40 @@ table { } } /* ===== end line item queue card polish ===== */ + + + +.preview-card-header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.75rem; + flex-wrap: wrap; +} + +.preview-source-toggle { + display: flex; + gap: 0.45rem; + flex-wrap: wrap; +} + +.preview-source-link { + display: inline-flex; + align-items: center; + justify-content: center; + min-height: 2rem; + padding: 0.35rem 0.7rem; + border: 1px solid #d7dce5; + border-radius: 999px; + background: #fff; + color: #334155; + text-decoration: none; + font-size: 0.82rem; + line-height: 1; +} + +.preview-source-link.active { + background: #0f172a; + border-color: #0f172a; + color: #fff; +} diff --git a/app/templates/base.html b/app/templates/base.html index 9148c80..3882793 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -4,7 +4,7 @@
Storage mount unavailable. Preview is temporarily unavailable.
{% elif file_url %} diff --git a/run.sh b/run.sh old mode 100755 new mode 100644