From 0d70e6b7bb968a6197c80c127accfa4549064bbd Mon Sep 17 00:00:00 2001 From: McElwain Date: Fri, 3 Apr 2026 08:38:13 -0500 Subject: [PATCH] feat: Phase 3.5 add inbox/upload/server ingest, OCR rerun, and text version tracking --- app/core/config.py | 9 +- app/db/session.py | 11 +- app/logic/ingest.py | 372 ++++++++++++++++++++++++++++ app/main.py | 5 + app/models/text_version.py | 23 +- app/routes/documents.py | 87 ++++++- app/routes/ingest.py | 180 ++++++++++++++ app/templates/documents/detail.html | 53 +++- app/templates/ingest/index.html | 53 ++++ app/templates/ingest/result.html | 39 +++ 10 files changed, 807 insertions(+), 25 deletions(-) create mode 100644 app/logic/ingest.py create mode 100644 app/routes/ingest.py create mode 100644 app/templates/ingest/index.html create mode 100644 app/templates/ingest/result.html diff --git a/app/core/config.py b/app/core/config.py index e66e39e..0ed83ae 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -3,7 +3,8 @@ from dotenv import load_dotenv load_dotenv() -class Settings: - DATABASE_URL: str = os.getenv("DATABASE_URL", "postgresql://user:pass@localhost:5432/document_processor") - -settings = Settings() +DATABASE_URL = os.getenv("DATABASE_URL", "") +DOCUMENT_STORAGE_ROOT = os.getenv("DOCUMENT_STORAGE_ROOT", "/mnt/storage/document-processor") +DOCUMENT_ARCHIVE_ROOT = os.getenv("DOCUMENT_ARCHIVE_ROOT", "/mnt/storage/document-processor/archive/current") +INBOX_ROOT = os.getenv("INBOX_ROOT", "/mnt/storage/document-processor/incoming/inbox") +UPLOAD_ROOT = os.getenv("UPLOAD_ROOT", "/mnt/storage/document-processor/incoming/uploads") diff --git a/app/db/session.py b/app/db/session.py index 1599e53..36ced16 100644 --- a/app/db/session.py +++ b/app/db/session.py @@ -1,14 +1,7 @@ from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker -from app.core.config import settings +from app.core.config import DATABASE_URL -engine = create_engine(settings.DATABASE_URL, echo=True) +engine = create_engine(DATABASE_URL, echo=True) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - -def get_db(): - db = SessionLocal() - try: - yield db - finally: - db.close() diff --git a/app/logic/ingest.py b/app/logic/ingest.py new file mode 100644 index 0000000..2791a2d --- /dev/null +++ b/app/logic/ingest.py @@ -0,0 +1,372 @@ +from __future__ import annotations + +import hashlib +import mimetypes +import shutil +import subprocess +import tempfile +from difflib import SequenceMatcher +from pathlib import Path +from uuid import uuid4 + +from sqlalchemy import func +from sqlalchemy.orm import Session + +from app.core.config import DOCUMENT_ARCHIVE_ROOT, INBOX_ROOT, UPLOAD_ROOT +from app.models.document import Document +from app.models.document_version import DocumentVersion +from app.models.text_version import TextVersion + + +ALLOWED_EXTENSIONS = {".pdf", ".jpg", ".jpeg", ".png"} + + +def is_supported_file(path: Path) -> bool: + return path.is_file() and path.suffix.lower() in ALLOWED_EXTENSIONS + + +def sha256_for_file(path: Path) -> str: + hasher = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def guess_mime_type(path: Path) -> str: + mime_type, _ = mimetypes.guess_type(str(path)) + return mime_type or "application/octet-stream" + + +def build_storage_path(document_id: str, source_path: Path) -> Path: + archive_root = Path(DOCUMENT_ARCHIVE_ROOT) + filename = f"{document_id}{source_path.suffix.lower()}" + return archive_root / filename + + +def get_next_text_version_number(db: Session, document_id: int) -> int: + max_version = ( + db.query(func.max(TextVersion.version_number)) + .filter(TextVersion.document_id == document_id) + .scalar() + ) + return (max_version or 0) + 1 + + +def get_tesseract_version() -> str | None: + try: + result = subprocess.run( + ["tesseract", "--version"], + capture_output=True, + text=True, + check=True, + ) + line = result.stdout.splitlines()[0].strip() + return line + except Exception: + return None + + +def get_pdftotext_version() -> str | None: + try: + result = subprocess.run( + ["pdftotext", "-v"], + capture_output=True, + text=True, + ) + text = (result.stderr or result.stdout).splitlines() + return text[0].strip() if text else None + except Exception: + return None + + +def extract_pdf_text(path: Path) -> str: + try: + result = subprocess.run( + ["pdftotext", str(path), "-"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except Exception: + return "" + + +def ocr_image(path: Path) -> str: + try: + result = subprocess.run( + ["tesseract", str(path), "stdout"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except Exception: + return "" + + +def ocr_pdf(path: Path) -> str: + with tempfile.TemporaryDirectory() as tmpdir: + output_prefix = Path(tmpdir) / "page" + try: + subprocess.run( + ["pdftoppm", "-png", str(path), str(output_prefix)], + capture_output=True, + text=True, + check=True, + ) + except Exception: + return "" + + texts: list[str] = [] + for img in sorted(Path(tmpdir).glob("page-*.png")): + text = ocr_image(img) + if text: + texts.append(text) + + return "\n\n".join(texts).strip() + + +def run_ocr_only(path: Path) -> tuple[str, str | None, str | None]: + suffix = path.suffix.lower() + tesseract_version = get_tesseract_version() + + if suffix == ".pdf": + return ocr_pdf(path).strip(), "tesseract", tesseract_version + if suffix in {".jpg", ".jpeg", ".png"}: + return ocr_image(path).strip(), "tesseract", tesseract_version + return "", None, None + + +def get_raw_text_for_document(path: Path) -> tuple[str, str | None, str | None, str | None]: + suffix = path.suffix.lower() + + if suffix == ".pdf": + extracted = extract_pdf_text(path) + if len(extracted.strip()) >= 40: + return extracted, "pdftotext", get_pdftotext_version(), "initial_ingest" + + ocr_text = ocr_pdf(path).strip() + return ocr_text, "tesseract", get_tesseract_version(), "initial_ingest_fallback" + + if suffix in {".jpg", ".jpeg", ".png"}: + return ocr_image(path).strip(), "tesseract", get_tesseract_version(), "initial_ingest" + + return "", None, None, None + + +def compute_quality_score(source_text: str, reviewed_text: str) -> float: + if not source_text and not reviewed_text: + return 100.0 + if not source_text: + return 0.0 + ratio = SequenceMatcher(None, source_text, reviewed_text).ratio() + return round(ratio * 100, 2) + + +def archive_document( + db: Session, + source: Path, + source_system: str, + document_type: str = "receipt", +) -> Document: + if not source.exists(): + raise FileNotFoundError(f"Source file not found: {source}") + + if not is_supported_file(source): + raise ValueError(f"Unsupported file type: {source.suffix}") + + document_id = f"doc_{uuid4().hex[:12]}" + current_path = build_storage_path(document_id, source) + + current_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, current_path) + + file_size = current_path.stat().st_size + mime_type = guess_mime_type(current_path) + sha256_current = sha256_for_file(current_path) + + raw_text, ocr_engine, ocr_engine_version, rerun_source = get_raw_text_for_document(current_path) + + document = Document( + document_id=document_id, + document_type=document_type, + source_path=str(source), + current_path=str(current_path), + original_filename=source.name, + canonical_filename=current_path.name, + mime_type=mime_type, + file_size=file_size, + page_count=1 if source.suffix.lower() == ".pdf" else None, + sha256_current=sha256_current, + storage_status="ingested", + review_status="ocr_complete" if raw_text else "ingested", + ) + db.add(document) + db.flush() + + version = DocumentVersion( + document_id=document.id, + version_number=1, + version_type="original", + file_path=str(current_path), + sha256=sha256_current, + created_by=source_system, + notes=f"Ingested from {source_system}", + ) + db.add(version) + + if raw_text: + text_version = TextVersion( + document_id=document.id, + version_number=1, + version_type="raw_ocr", + text_content=raw_text, + created_by="system", + is_current=True, + ocr_engine=ocr_engine, + ocr_engine_version=ocr_engine_version, + rerun_source=rerun_source, + quality_flags=[], + quality_note=None, + ) + db.add(text_version) + + db.commit() + db.refresh(document) + return document + + +def rerun_ocr_for_document(db: Session, document: Document) -> TextVersion: + if not document.current_path: + raise ValueError("Document has no current_path") + + current_file = Path(document.current_path) + if not current_file.exists(): + raise FileNotFoundError(f"Current file not found: {current_file}") + + raw_text, ocr_engine, ocr_engine_version = run_ocr_only(current_file) + if not raw_text: + raise ValueError("OCR produced no text") + + existing_raw = ( + db.query(TextVersion) + .filter( + TextVersion.document_id == document.id, + TextVersion.version_type == "raw_ocr", + TextVersion.is_current.is_(True), + ) + .all() + ) + + previous_raw_id = None + for tv in existing_raw: + tv.is_current = False + previous_raw_id = tv.id + + new_text = TextVersion( + document_id=document.id, + version_number=get_next_text_version_number(db, document.id), + version_type="raw_ocr", + text_content=raw_text, + created_by="rerun_ocr", + is_current=True, + ocr_engine=ocr_engine, + ocr_engine_version=ocr_engine_version, + rerun_source="manual_rerun", + quality_flags=[], + quality_note=None, + derived_from_version_id=previous_raw_id, + ) + db.add(new_text) + + document.review_status = "ocr_complete" + + db.commit() + db.refresh(new_text) + return new_text + + +def ingest_file( + db: Session, + file_path: str, + source_system: str, + document_type: str = "receipt", +) -> Document: + source = Path(file_path).expanduser().resolve() + return archive_document( + db=db, + source=source, + source_system=source_system, + document_type=document_type, + ) + + +def ingest_uploaded_file( + db: Session, + filename: str, + file_bytes: bytes, + source_system: str = "upload_ingest", + document_type: str = "receipt", +) -> Document: + suffix = Path(filename).suffix.lower() + if suffix not in ALLOWED_EXTENSIONS: + raise ValueError(f"Unsupported file type: {suffix}") + + upload_root = Path(UPLOAD_ROOT) + upload_root.mkdir(parents=True, exist_ok=True) + + staged_name = f"{uuid4().hex[:12]}_{Path(filename).name}" + staged_path = upload_root / staged_name + staged_path.write_bytes(file_bytes) + + return archive_document( + db=db, + source=staged_path, + source_system=source_system, + document_type=document_type, + ) + + +def ingest_directory( + db: Session, + directory_path: str, + recursive: bool = True, + source_system: str = "directory_ingest", + document_type: str = "receipt", +) -> list[Document]: + source_dir = Path(directory_path).expanduser().resolve() + + if not source_dir.exists() or not source_dir.is_dir(): + raise NotADirectoryError(f"Directory not found: {source_dir}") + + files = source_dir.rglob("*") if recursive else source_dir.glob("*") + + ingested: list[Document] = [] + for path in files: + if not is_supported_file(path): + continue + try: + ingested.append( + ingest_file( + db=db, + file_path=str(path), + source_system=source_system, + document_type=document_type, + ) + ) + except Exception: + continue + + return ingested + + +def ingest_inbox(db: Session) -> list[Document]: + return ingest_directory( + db=db, + directory_path=INBOX_ROOT, + recursive=True, + source_system="inbox_ingest", + document_type="receipt", + ) diff --git a/app/main.py b/app/main.py index 15ac75c..365fc3b 100644 --- a/app/main.py +++ b/app/main.py @@ -1,12 +1,17 @@ from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles from app.routes.documents import router as documents_router from app.routes.health import router as health_router +from app.routes.ingest import router as ingest_router app = FastAPI(title="document-processor") +app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), name="files") + app.include_router(health_router) app.include_router(documents_router) +app.include_router(ingest_router) @app.get("/") diff --git a/app/models/text_version.py b/app/models/text_version.py index 716c7f8..d55973f 100644 --- a/app/models/text_version.py +++ b/app/models/text_version.py @@ -1,5 +1,7 @@ from datetime import datetime -from sqlalchemy import String, DateTime, ForeignKey, Text, Boolean +from decimal import Decimal + +from sqlalchemy import String, DateTime, ForeignKey, Text, Boolean, Integer, JSON, Numeric from sqlalchemy.orm import Mapped, mapped_column, relationship from app.db.base import Base @@ -13,14 +15,27 @@ class TextVersion(Base): ForeignKey("documents.id"), nullable=False, index=True ) - version_type: Mapped[str] = mapped_column( - String(50), nullable=False - ) # raw_ocr, reviewed + version_number: Mapped[int] = mapped_column(Integer, nullable=False) + version_type: Mapped[str] = mapped_column(String(50), nullable=False) # raw_ocr, reviewed + text_content: Mapped[str] = mapped_column(Text, nullable=False) created_by: Mapped[str | None] = mapped_column(String(100), nullable=True) is_current: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) + ocr_engine: Mapped[str | None] = mapped_column(String(100), nullable=True) + ocr_engine_version: Mapped[str | None] = mapped_column(String(100), nullable=True) + rerun_source: Mapped[str | None] = mapped_column(String(100), nullable=True) + + quality_score: Mapped[Decimal | None] = mapped_column(Numeric(5, 2), nullable=True) + quality_flags: Mapped[list | None] = mapped_column(JSON, nullable=True) + quality_note: Mapped[str | None] = mapped_column(Text, nullable=True) + + derived_from_version_id: Mapped[int | None] = mapped_column( + ForeignKey("text_versions.id"), + nullable=True, + ) + created_at: Mapped[datetime] = mapped_column( DateTime, default=datetime.utcnow, nullable=False ) diff --git a/app/routes/documents.py b/app/routes/documents.py index efa60ed..5d62aff 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -7,6 +7,7 @@ from fastapi.templating import Jinja2Templates from sqlalchemy.orm import Session, selectinload from app.db.deps import get_db +from app.logic.ingest import compute_quality_score, rerun_ocr_for_document from app.models.document import Document from app.models.document_version import DocumentVersion from app.models.text_version import TextVersion @@ -16,6 +17,27 @@ router = APIRouter(prefix="/documents", tags=["documents"]) BASE_DIR = Path(__file__).resolve().parent.parent templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) +QUALITY_FLAG_OPTIONS = [ + "bad_embedded_text", + "ocr_garbled", + "low_text_coverage", + "missing_lines", + "bad_line_breaks", + "low_contrast", + "blurry", + "skewed_scan", + "cropped", + "shadowed", + "small_text", + "thermal_faded", + "handwriting_present", + "receipt_damage", + "manual_rerun_helped", + "manual_rerun_no_change", + "major_manual_cleanup", + "minor_manual_cleanup", +] + @router.get("/", response_class=HTMLResponse) def list_documents(request: Request, db: Session = Depends(get_db)): @@ -35,14 +57,12 @@ def test_ingest(db: Session = Depends(get_db)): document_id=public_id, document_type="receipt", source_path=f"/mnt/storage/documents/incoming/{public_id}.pdf", - original_path=f"/mnt/storage/documents/archive/originals/{public_id}.pdf", current_path=f"/mnt/storage/documents/current/{public_id}.pdf", original_filename=f"{public_id}.pdf", canonical_filename=f"{public_id}.pdf", mime_type="application/pdf", file_size=245760, page_count=1, - sha256_original="dummy_original_hash", sha256_current="dummy_current_hash", storage_status="ingested", review_status="ocr_complete", @@ -54,8 +74,8 @@ def test_ingest(db: Session = Depends(get_db)): document_id=document.id, version_number=1, version_type="original", - file_path=document.original_path or document.source_path, - sha256=document.sha256_original, + file_path=document.current_path, + sha256=document.sha256_current, created_by="system", notes="Initial test ingest", ) @@ -63,6 +83,7 @@ def test_ingest(db: Session = Depends(get_db)): raw_text = TextVersion( document_id=document.id, + version_number=1, version_type="raw_ocr", text_content=( "CVS PHARMACY\n" @@ -72,6 +93,11 @@ def test_ingest(db: Session = Depends(get_db)): ), created_by="system", is_current=True, + ocr_engine="test_seed", + ocr_engine_version=None, + rerun_source="initial_ingest", + quality_flags=[], + quality_note=None, ) db.add(raw_text) @@ -80,10 +106,27 @@ def test_ingest(db: Session = Depends(get_db)): return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) +@router.post("/{document_id}/rerun-ocr", response_class=RedirectResponse) +def rerun_ocr(document_id: str, db: Session = Depends(get_db)): + document = db.query(Document).filter(Document.document_id == document_id).first() + + if document is None: + return RedirectResponse(url="/documents/", status_code=303) + + try: + rerun_ocr_for_document(db, document) + except Exception: + return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) + + return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) + + @router.post("/{document_id}/review-text", response_class=RedirectResponse) def save_reviewed_text( document_id: str, reviewed_text: str = Form(...), + quality_flags: list[str] | None = Form(None), + quality_note: str = Form(""), db: Session = Depends(get_db), ): document = ( @@ -96,6 +139,17 @@ def save_reviewed_text( if document is None: return RedirectResponse(url="/documents/", status_code=303) + sorted_text_versions = sorted( + document.text_versions, + key=lambda x: (x.version_number, x.created_at), + reverse=True, + ) + + current_raw = next( + (tv for tv in sorted_text_versions if tv.version_type == "raw_ocr" and tv.is_current), + None, + ) + existing_reviewed = [ tv for tv in document.text_versions if tv.version_type == "reviewed" and tv.is_current ] @@ -104,13 +158,20 @@ def save_reviewed_text( reviewed_version = TextVersion( document_id=document.id, + version_number=max(tv.version_number for tv in document.text_versions) + 1 if document.text_versions else 1, version_type="reviewed", text_content=reviewed_text, created_by="mcelwain", is_current=True, + derived_from_version_id=current_raw.id if current_raw else None, ) db.add(reviewed_version) + if current_raw: + current_raw.quality_score = compute_quality_score(current_raw.text_content, reviewed_text) + current_raw.quality_flags = quality_flags or [] + current_raw.quality_note = quality_note or None + document.review_status = "reviewed" db.commit() @@ -137,12 +198,12 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge sorted_text_versions = sorted( document.text_versions, - key=lambda x: x.created_at, + key=lambda x: (x.version_number, x.created_at), reverse=True, ) raw_ocr = next( - (tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"), + (tv for tv in sorted_text_versions if tv.version_type == "raw_ocr" and tv.is_current), None, ) @@ -157,6 +218,16 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge else raw_ocr.text_content if raw_ocr is not None else "" ) + file_url = None + if document.current_path: + storage_root = Path("/mnt/storage/document-processor") + current_path = Path(document.current_path) + try: + rel = current_path.relative_to(storage_root) + file_url = f"/files/{rel.as_posix()}" + except Exception: + file_url = None + return templates.TemplateResponse( request=request, name="documents/detail.html", @@ -166,5 +237,9 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge "raw_ocr": raw_ocr, "reviewed_ocr": reviewed_ocr, "review_text_value": review_text_value, + "file_url": file_url, + "quality_flag_options": QUALITY_FLAG_OPTIONS, + "current_quality_flags": raw_ocr.quality_flags if raw_ocr and raw_ocr.quality_flags else [], + "current_quality_note": raw_ocr.quality_note if raw_ocr and raw_ocr.quality_note else "", }, ) diff --git a/app/routes/ingest.py b/app/routes/ingest.py new file mode 100644 index 0000000..bce03fd --- /dev/null +++ b/app/routes/ingest.py @@ -0,0 +1,180 @@ +from pathlib import Path + +from fastapi import APIRouter, Depends, File, Form, Request, UploadFile +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates +from sqlalchemy.orm import Session + +from app.core.config import INBOX_ROOT +from app.db.deps import get_db +from app.logic.ingest import ingest_directory, ingest_file, ingest_inbox, ingest_uploaded_file + +router = APIRouter(prefix="/ingest", tags=["ingest"]) + +BASE_DIR = Path(__file__).resolve().parent.parent +templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) + + +@router.get("/", response_class=HTMLResponse) +def ingest_home(request: Request): + return templates.TemplateResponse( + request=request, + name="ingest/index.html", + context={ + "request": request, + "inbox_root": INBOX_ROOT, + }, + ) + + +@router.post("/upload-files", response_class=HTMLResponse) +async def ingest_upload_files( + request: Request, + uploaded_files: list[UploadFile] = File(...), + db: Session = Depends(get_db), +): + documents = [] + errors = [] + + for uploaded_file in uploaded_files: + try: + file_bytes = await uploaded_file.read() + document = ingest_uploaded_file( + db=db, + filename=uploaded_file.filename or "upload.pdf", + file_bytes=file_bytes, + source_system="upload_ingest", + ) + documents.append(document) + except Exception as e: + errors.append(f"{uploaded_file.filename}: {e}") + + if errors and not documents: + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": "Upload failed.", + "documents": [], + "errors": errors, + }, + status_code=400, + ) + + message = f"Ingested {len(documents)} uploaded file(s)." + if errors: + message += f" {len(errors)} file(s) had errors." + + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": message, + "documents": documents, + "errors": errors, + }, + ) + + +@router.post("/server-file", response_class=HTMLResponse) +def ingest_server_file( + request: Request, + file_path: str = Form(...), + db: Session = Depends(get_db), +): + try: + document = ingest_file( + db=db, + file_path=file_path, + source_system="server_file_ingest", + ) + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": f"Ingested server file successfully: {document.document_id}", + "documents": [document], + "errors": [], + }, + ) + except Exception as e: + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": f"Error ingesting server file: {e}", + "documents": [], + "errors": [], + }, + status_code=400, + ) + + +@router.post("/server-directory", response_class=HTMLResponse) +def ingest_server_directory( + request: Request, + directory_path: str = Form(...), + recursive: str | None = Form(None), + db: Session = Depends(get_db), +): + try: + docs = ingest_directory( + db=db, + directory_path=directory_path, + recursive=recursive is not None, + source_system="server_directory_ingest", + ) + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": f"Ingested {len(docs)} file(s) from server directory.", + "documents": docs, + "errors": [], + }, + ) + except Exception as e: + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": f"Error ingesting server directory: {e}", + "documents": [], + "errors": [], + }, + status_code=400, + ) + + +@router.post("/inbox", response_class=HTMLResponse) +def ingest_inbox_route(request: Request, db: Session = Depends(get_db)): + try: + docs = ingest_inbox(db=db) + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": f"Ingested {len(docs)} file(s) from inbox.", + "documents": docs, + "errors": [], + }, + ) + except Exception as e: + return templates.TemplateResponse( + request=request, + name="ingest/result.html", + context={ + "request": request, + "message": f"Error ingesting inbox: {e}", + "documents": [], + "errors": [], + }, + status_code=400, + ) diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html index 0f66f7d..74cda7b 100644 --- a/app/templates/documents/detail.html +++ b/app/templates/documents/detail.html @@ -13,7 +13,6 @@ +

Document preview

+ {% if file_url %} + {% if document.mime_type == "application/pdf" %} + + {% elif document.mime_type in ["image/jpeg", "image/png"] %} + Document image + {% else %} +

Open file

+ {% endif %} + {% else %} +

No preview available.

+ {% endif %} +

Document versions

{% if document.versions %}