From 431372438e04cb127864fd80fb4d27f79e294218 Mon Sep 17 00:00:00 2001 From: McElwain Date: Sun, 5 Apr 2026 12:05:19 -0500 Subject: [PATCH] feat: finalized Phase 3.5/4.1 workflow and added queue-trash plus UX-1 shell --- app/logic/document_outputs.py | 42 ++ app/main.py | 1 + app/routes/documents.py | 114 +++- app/static/app.css | 510 ++++++++++++++ app/templates/documents/detail.html | 622 +++++++++++------- .../documents/detail.html.bak.20260404_073442 | 383 +++++++++++ app/templates/documents/list.html | 101 ++- app/templates/ingest/index.html | 123 +++- app/templates/ingest/result.html | 111 +++- app/templates/queue/index.html | 213 +++--- app/templates/trash/index.html | 129 ++-- 11 files changed, 1874 insertions(+), 475 deletions(-) create mode 100644 app/static/app.css create mode 100644 app/templates/documents/detail.html.bak.20260404_073442 diff --git a/app/logic/document_outputs.py b/app/logic/document_outputs.py index 26156e6..a519693 100644 --- a/app/logic/document_outputs.py +++ b/app/logic/document_outputs.py @@ -1,6 +1,7 @@ from __future__ import annotations import hashlib +import os import shutil import subprocess import tempfile @@ -28,6 +29,45 @@ def sha256_for_file(path: Path) -> str: return hasher.hexdigest() +def compress_pdf_with_ghostscript(path: Path) -> bool: + compressed_path = path.with_suffix(".compressed.pdf") + + try: + subprocess.run( + [ + "gs", + "-sDEVICE=pdfwrite", + "-dCompatibilityLevel=1.4", + "-dPDFSETTINGS=/ebook", + "-dNOPAUSE", + "-dQUIET", + "-dBATCH", + f"-sOutputFile={compressed_path}", + str(path), + ], + check=True, + capture_output=True, + text=True, + ) + + if not compressed_path.exists() or compressed_path.stat().st_size == 0: + return False + + original_size = path.stat().st_size if path.exists() else 0 + compressed_size = compressed_path.stat().st_size + + # Only replace if compression actually helped. + if original_size > 0 and compressed_size < original_size: + os.replace(compressed_path, path) + else: + compressed_path.unlink(missing_ok=True) + + return True + except Exception: + compressed_path.unlink(missing_ok=True) + return False + + def get_next_document_version_number(db: Session, document_id: int) -> int: max_version = ( db.query(func.max(DocumentVersion.version_number)) @@ -188,6 +228,8 @@ def create_ocr_corrected_pdf_version(db: Session, document: Document) -> Documen c.save() shutil.copy2(overlay_pdf_path, out_path) + compress_pdf_with_ghostscript(out_path) + file_hash = sha256_for_file(out_path) version = DocumentVersion( diff --git a/app/main.py b/app/main.py index e68be00..d96da4b 100644 --- a/app/main.py +++ b/app/main.py @@ -8,6 +8,7 @@ from app.routes.queue import router as queue_router from app.routes.trash import router as trash_router app = FastAPI(title="document-processor") +app.mount("/static", StaticFiles(directory="app/static"), name="static") app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), name="files") diff --git a/app/routes/documents.py b/app/routes/documents.py index 9f5b381..a91c269 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -25,6 +25,50 @@ from app.models.text_version import TextVersion router = APIRouter(prefix="/documents", tags=["documents"]) BASE_DIR = Path(__file__).resolve().parent.parent + + +def _build_queue_navigation(db: Session, document: Document, queue: str | None) -> dict: + if not queue: + return {"queue": None, "prev_doc": None, "next_doc": None} + + base = db.query(Document).filter(Document.is_trashed.is_(False)) + + if queue == "ocr": + docs = ( + base.filter(Document.review_status != "reviewed") + .order_by(Document.created_at.asc()) + .all() + ) + elif queue == "fields": + docs = ( + base.filter(Document.review_status == "reviewed") + .all() + ) + filtered = [] + for d in docs: + has_fields = bool(getattr(d, "extracted_fields", None)) + if not has_fields: + filtered.append(d) + docs = sorted(filtered, key=lambda d: d.updated_at or d.created_at) + elif queue == "recent": + docs = ( + base.order_by(Document.updated_at.desc()) + .all() + ) + else: + return {"queue": None, "prev_doc": None, "next_doc": None} + + ids = [d.document_id for d in docs] + if document.document_id not in ids: + return {"queue": queue, "prev_doc": None, "next_doc": None} + + idx = ids.index(document.document_id) + prev_doc = docs[idx - 1] if idx > 0 else None + next_doc = docs[idx + 1] if idx < len(docs) - 1 else None + + return {"queue": queue, "prev_doc": prev_doc, "next_doc": next_doc} + + templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) QUALITY_FLAG_OPTIONS = [ @@ -117,6 +161,68 @@ def _apply_reviewed_lines_to_layout(base_layout: dict | None, reviewed_text: str return new_layout + +def _get_queue_navigation(db: Session, document: Document) -> dict: + active_docs = ( + db.query(Document) + .filter(Document.is_trashed.is_(False)) + .order_by(Document.created_at.asc()) + .all() + ) + doc_ids = [d.document_id for d in active_docs] + prev_doc = None + next_doc = None + + if document.document_id in doc_ids: + idx = doc_ids.index(document.document_id) + if idx > 0: + prev_doc = active_docs[idx - 1] + if idx < len(active_docs) - 1: + next_doc = active_docs[idx + 1] + + needs_ocr = ( + db.query(Document) + .filter(Document.is_trashed.is_(False)) + .filter(Document.review_status != "reviewed") + .order_by(Document.created_at.asc()) + .all() + ) + + reviewed_no_fields = [] + for d in ( + db.query(Document) + .options(selectinload(Document.extracted_fields)) + .filter(Document.is_trashed.is_(False)) + .filter(Document.review_status == "reviewed") + .order_by(Document.updated_at.asc()) + .all() + ): + if not d.extracted_fields: + reviewed_no_fields.append(d) + + next_ocr = None + next_fields = None + + if needs_ocr: + for d in needs_ocr: + if d.document_id != document.document_id: + next_ocr = d + break + + if reviewed_no_fields: + for d in reviewed_no_fields: + if d.document_id != document.document_id: + next_fields = d + break + + return { + "prev_doc": prev_doc, + "next_doc": next_doc, + "next_ocr_doc": next_ocr, + "next_fields_doc": next_fields, + } + + def _extracted_field_form_values(document: Document, request: Request) -> dict: current = get_current_extracted_fields(document) auto = request.query_params.get("autofill_extracted") @@ -339,7 +445,7 @@ def save_extracted_fields_route( @router.get("/{document_id}", response_class=HTMLResponse) -def document_detail(document_id: str, request: Request, db: Session = Depends(get_db)): +def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)): document = ( db.query(Document) .options( @@ -381,6 +487,8 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge extracted_form = _extracted_field_form_values(document, request) current_extracted = get_current_extracted_fields(document) + queue_nav = _get_queue_navigation(db, document) + return templates.TemplateResponse( request=request, @@ -388,6 +496,10 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge context={ "request": request, "document": document, + "prev_doc": queue_nav.get("prev_doc"), + "next_doc": queue_nav.get("next_doc"), + "next_ocr_doc": queue_nav.get("next_ocr") or queue_nav.get("next_ocr_doc"), + "next_fields_doc": queue_nav.get("next_fields") or queue_nav.get("next_fields_doc"), "raw_ocr": raw_ocr, "reviewed_ocr": reviewed_ocr, "review_text_value": review_text_value, diff --git a/app/static/app.css b/app/static/app.css new file mode 100644 index 0000000..3430237 --- /dev/null +++ b/app/static/app.css @@ -0,0 +1,510 @@ +:root { + --bg: #f3f5f9; + --panel: #ffffff; + --panel-muted: #f8fafc; + --border: #d8dee8; + --text: #1f2937; + --text-muted: #6b7280; + --accent: #2563eb; + --accent-soft: #dbeafe; + --success: #166534; + --success-soft: #dcfce7; + --warn: #92400e; + --warn-soft: #fef3c7; + --danger: #991b1b; + --danger-soft: #fee2e2; + --shadow: 0 1px 2px rgba(0,0,0,0.06), 0 8px 20px rgba(0,0,0,0.04); + --radius: 12px; + --sans: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; + --mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; + --rail-closed: 52px; + --rail-open: 230px; +} + +* { box-sizing: border-box; } + +html, body { + margin: 0; + padding: 0; + min-height: 100%; + background: var(--bg); + color: var(--text); + font-family: var(--sans); +} + +body { + min-height: 100vh; +} + +a { + color: var(--accent); + text-decoration: none; +} +a:hover { + text-decoration: underline; +} + +.app-shell { + min-height: 100vh; +} + +.sidebar { + position: fixed; + inset: 0 auto 0 0; + width: var(--rail-closed); + background: #08142f; + color: #e5e7eb; + overflow-x: hidden; + overflow-y: auto; + padding: 0.75rem 0.45rem 1rem 0.45rem; + z-index: 100; + transition: width 0.18s ease; +} + +.app-shell.nav-open .sidebar { + width: var(--rail-open); +} + +.main { + margin-left: var(--rail-closed); + min-height: 100vh; + padding: 1.25rem; + transition: margin-left 0.18s ease; +} + +.app-shell.nav-open .main { + margin-left: var(--rail-open); +} + +.sidebar-top { + display: flex; + align-items: center; + gap: 0.7rem; + min-height: 36px; + margin-bottom: 1rem; +} + +.sidebar-toggle { + width: 24px; + height: 18px; + display: flex; + flex-direction: column; + justify-content: space-between; + cursor: pointer; + flex: 0 0 24px; + margin-left: 2px; +} + +.sidebar-toggle span { + display: block; + width: 100%; + height: 2px; + background: #94a3b8; + border-radius: 999px; +} + +.sidebar-toggle:hover span { + background: #e2e8f0; +} + +.brand { + font-weight: 700; + font-size: 1rem; + white-space: nowrap; + display: none; +} + +.sidebar-section-title { + color: #94a3b8; + font-size: 0.72rem; + text-transform: uppercase; + letter-spacing: 0.08em; + margin: 1rem 0 0.5rem 0; + white-space: nowrap; + display: none; +} + +.nav-list { + display: flex; + flex-direction: column; + gap: 0.35rem; +} + +.nav-link { + display: flex; + align-items: center; + gap: 0.65rem; + color: #e5e7eb; + border-radius: 10px; + padding: 0.72rem 0.6rem; + white-space: nowrap; + overflow: hidden; +} + +.nav-link:hover { + background: rgba(255,255,255,0.08); + text-decoration: none; +} + +.nav-link.active { + background: rgba(59,130,246,0.28); + color: #fff; +} + +.nav-link-short { + display: inline-flex; + min-width: 1rem; + justify-content: center; + font-weight: 600; +} + +.nav-link-text { + display: none; +} + +.app-shell.nav-open .brand, +.app-shell.nav-open .sidebar-section-title, +.app-shell.nav-open .nav-link-text { + display: initial; +} + +.app-shell:not(.nav-open) .nav-link { + justify-content: center; +} + +.topbar { + display: flex; + justify-content: space-between; + align-items: flex-start; + gap: 1rem; + margin-bottom: 1rem; +} + +.page-title { + margin: 0; + font-size: 1.7rem; +} + +.page-subtitle { + margin: 0.25rem 0 0 0; + color: var(--text-muted); + font-size: 0.95rem; +} + +.card { + background: var(--panel); + border: 1px solid var(--border); + border-radius: var(--radius); + box-shadow: var(--shadow); + padding: 1rem; + margin-bottom: 1rem; +} + +.card-title { + margin: 0 0 0.75rem 0; + font-size: 1.05rem; +} + +.button-row { + display: flex; + flex-wrap: wrap; + gap: 0.65rem; + align-items: center; +} + +button, +.button-link { + appearance: none; + border: 1px solid var(--border); + background: var(--panel); + color: var(--text); + border-radius: 10px; + padding: 0.65rem 0.95rem; + cursor: pointer; + font: inherit; +} + +button:hover, +.button-link:hover { + background: #f3f4f6; + text-decoration: none; +} + +button.primary, +.button-link.primary { + background: var(--accent); + color: white; + border-color: var(--accent); +} + +button.danger { + background: var(--danger-soft); + color: var(--danger); + border-color: #fecaca; +} + +.badges { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; +} + +.badge { + display: inline-flex; + align-items: center; + border-radius: 999px; + padding: 0.28rem 0.7rem; + font-size: 0.82rem; + border: 1px solid var(--border); + background: var(--panel-muted); + color: var(--text); +} + +.badge.reviewed { + background: var(--success-soft); + color: var(--success); + border-color: #bbf7d0; +} + +.badge.pending { + background: var(--warn-soft); + color: var(--warn); + border-color: #fde68a; +} + +.badge.trashed { + background: var(--danger-soft); + color: var(--danger); + border-color: #fecaca; +} + +.table-wrap { + overflow: auto; +} + +table { + width: 100%; + border-collapse: collapse; + font-size: 0.95rem; +} + +th, td { + text-align: left; + vertical-align: top; + border-bottom: 1px solid var(--border); + padding: 0.7rem 0.6rem; +} + +th { + color: var(--text-muted); + font-weight: 600; + font-size: 0.83rem; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.grid-2 { + display: grid; + grid-template-columns: minmax(0, 1fr) minmax(0, 1fr); + gap: 1rem; + align-items: start; +} + +.meta-grid { + display: grid; + grid-template-columns: repeat(2, minmax(220px, 1fr)); + gap: 0.65rem 1rem; +} + +.meta-item { + background: var(--panel-muted); + border: 1px solid var(--border); + border-radius: 10px; + padding: 0.65rem 0.8rem; +} + +.meta-label { + display: block; + color: var(--text-muted); + font-size: 0.78rem; + margin-bottom: 0.25rem; +} + +.preview-frame { + width: 100%; + height: 950px; + border: 1px solid var(--border); + border-radius: 10px; + background: white; +} + +.preview-image { + width: 100%; + max-height: 950px; + object-fit: contain; + border: 1px solid var(--border); + border-radius: 10px; + background: white; +} + +.sticky-actions { + position: sticky; + top: 0; + z-index: 20; + background: rgba(243,245,249,0.94); + padding: 0.25rem 0 0.75rem 0; +} + +.form-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 0.85rem 1rem; +} + +.form-field { + display: flex; + flex-direction: column; + gap: 0.35rem; +} + +.form-field.full { + grid-column: 1 / -1; +} + +label { + font-size: 0.86rem; + color: var(--text-muted); +} + +input[type="text"], +input[type="date"], +textarea { + width: 100%; + border: 1px solid var(--border); + background: white; + border-radius: 10px; + padding: 0.7rem 0.75rem; + font: inherit; + color: var(--text); +} + +textarea { + font-family: var(--mono); + line-height: 1.45; +} + +.editor-wrap { + display: grid; + grid-template-columns: 52px 1fr; + gap: 0.5rem; + align-items: start; +} + +.line-numbers { + font-family: var(--mono); + white-space: pre; + text-align: right; + color: var(--text-muted); + user-select: none; + padding-top: 0.75rem; + line-height: 1.45; +} + +pre.codeblock { + white-space: pre-wrap; + word-break: break-word; + font-family: var(--mono); + background: #f8fafc; + border: 1px solid var(--border); + border-radius: 10px; + padding: 0.8rem; + max-height: 18rem; + overflow: auto; +} + +.error-box { + background: var(--danger-soft); + color: var(--danger); + border: 1px solid #fecaca; + border-radius: 10px; + padding: 0.9rem 1rem; + margin-bottom: 1rem; +} + +.empty-state { + color: var(--text-muted); + padding: 0.4rem 0; +} + +.mobile-header, +.sidebar-overlay { + display: none !important; +} + +@media (max-width: 1100px) { + .grid-2, + .form-grid, + .meta-grid { + grid-template-columns: 1fr; + } + + .preview-frame { + height: 720px; + } +} + + +.doc-header-sticky { + position: sticky; + top: 0; + z-index: 30; + background: rgba(243,245,249,0.96); + backdrop-filter: blur(8px); + padding-bottom: 0.75rem; + margin-bottom: 1rem; +} + +.tabbar { + display: flex; + gap: 0.5rem; + flex-wrap: wrap; + margin-bottom: 1rem; +} + +.tab-button { + appearance: none; + border: 1px solid var(--border); + background: var(--panel); + color: var(--text); + border-radius: 999px; + padding: 0.5rem 0.85rem; + cursor: pointer; + font: inherit; +} + +.tab-button.active { + background: var(--accent); + color: #fff; + border-color: var(--accent); +} + +.tab-panel { + display: none; +} + +.tab-panel.active { + display: block; +} + +.queue-nav-row { + display: flex; + gap: 0.65rem; + flex-wrap: wrap; + align-items: center; + margin-top: 0.75rem; +} + +.queue-label { + color: var(--text-muted); + font-size: 0.9rem; +} diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html index 6a74f71..19d9fc5 100644 --- a/app/templates/documents/detail.html +++ b/app/templates/documents/detail.html @@ -3,207 +3,390 @@ {{ document.document_id }} + -

Back to documents

- -

{{ document.document_id }}

- - {% if error == "line_count_mismatch" %} -
- Could not save reviewed OCR because line count did not match OCR layout. - Expected {{ error_expected }}, got {{ error_actual }}. +
+ -
- -
- -

Document metadata

-
    -
  • Type: {{ document.document_type }}
  • -
  • Source path: {{ document.source_path }}
  • -
  • Current path: {{ document.current_path }}
  • -
  • Share path: {{ document.share_path or "" }}
  • -
  • App URL: {{ app_url }}
  • -
  • Original filename: {{ document.original_filename }}
  • -
  • Canonical filename: {{ document.canonical_filename }}
  • -
  • MIME type: {{ document.mime_type }}
  • -
  • File size: {{ document.file_size }}
  • -
  • Page count: {{ document.page_count }}
  • -
  • Storage status: {{ document.storage_status }}
  • -
  • Review status: {{ document.review_status }}
  • -
  • Created at: {{ document.created_at }}
  • -
  • Updated at: {{ document.updated_at }}
  • -
- -

Saved PDF scaffolds

-
- -
-
- -
- -

Document preview

- {% if file_url %} - {% if document.mime_type == "application/pdf" %} - - {% elif document.mime_type in ["image/jpeg", "image/png"] %} - Document image - {% else %} -

Open file

+
+ {% if error == "line_count_mismatch" %} +
+ Could not save reviewed OCR because line count did not match OCR layout. + Expected {{ error_expected }}, got {{ error_actual }}. +
+ {% elif error == "save_ocr_corrected_failed" %} +
+ Could not save OCR-corrected PDF. Check that reviewed OCR line count matches raw OCR line count. +
+ {% elif error == "rerun_ocr_failed" %} +
OCR rerun failed.
+ {% elif error == "save_field_enriched_failed" %} +
Could not save field-enriched PDF.
{% endif %} - {% else %} -

No preview available.

- {% endif %} -

Document versions

- {% if document.versions %} -
    - {% for version in document.versions %} -
  • - v{{ version.version_number }} — - {{ version.version_type }} — - {{ version.file_path }} — - {{ version.created_at }} - {% if version.notes %}
    {{ version.notes }}{% endif %} -
  • - {% endfor %} -
- {% else %} -

No versions found.

- {% endif %} +
+
+
+

{{ document.document_id }}

+

{{ document.original_filename or document.canonical_filename or document.document_type }}

+
+
+ {{ document.review_status }} + {{ document.document_type }} + {{ document.mime_type }} +
+
-

Raw OCR

-
- -
+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
- {% if raw_ocr %} -

- Text version: v{{ raw_ocr.version_number }}
- OCR engine: {{ raw_ocr.ocr_engine or "unknown" }}
- OCR engine version: {{ raw_ocr.ocr_engine_version or "unknown" }}
- Rerun source: {{ raw_ocr.rerun_source or "unknown" }}
- Quality score: {{ raw_ocr.quality_score if raw_ocr.quality_score is not none else "not scored yet" }}
- Quality flags: {{ raw_ocr.quality_flags if raw_ocr.quality_flags else [] }}
- Quality note: {{ raw_ocr.quality_note or "" }} -

-
{{ raw_ocr.text_content }}
- {% else %} -

No raw OCR text found.

- {% endif %} - -

Reviewed OCR

- {% if reviewed_ocr %} -

- Current reviewed version saved at {{ reviewed_ocr.created_at }} — - v{{ reviewed_ocr.version_number }} -

- {% else %} -

No reviewed OCR saved yet.

- {% endif %} - -

- Expected OCR lines: {{ expected_line_count }}
- Current editor lines: {{ actual_line_count }} -
- Line count mismatch may affect corrected PDF layout. - -

- -
-
- +
+ {% if prev_doc %} + ← Previous + {% endif %} + {% if next_doc %} + Next → + {% endif %} + {% if next_ocr_doc %} + Next OCR review + {% endif %} + {% if next_fields_doc %} + Next field extraction + {% endif %} +
+
-
-
{% for n in line_numbers %}{{ n }} +
+
+
+

Document preview

+ {% if file_url %} + {% if document.mime_type == "application/pdf" %} + + {% elif document.mime_type in ["image/jpeg", "image/png"] %} + Document image + {% else %} +

Open file

+ {% endif %} + {% else %} +

No preview available.

+ {% endif %} +
+
+ +
+
+
+ + + + +
+ +
+

Reviewed OCR

+ {% if reviewed_ocr %} +

Current reviewed version saved at {{ reviewed_ocr.created_at }} — v{{ reviewed_ocr.version_number }}

+ {% else %} +

No reviewed OCR saved yet.

+ {% endif %} + +

+ Expected OCR lines: {{ expected_line_count }}
+ Current editor lines: {{ actual_line_count }}
+ + Line count mismatch may affect corrected PDF layout. + +

+ + +
+ +
+
{% for n in line_numbers %}{{ n }} {% endfor %}
- + +
+
+ +
+ +
+ {% for flag in quality_flag_options %} + + {% endfor %} +
+
+ +
+ + +
+ +
+ +
+ +
+ +
+

Extracted fields

+ + {% if current_extracted %} +

Current extracted fields last updated at {{ current_extracted.updated_at }}

+ {% else %} +

No extracted fields saved yet.

+ {% endif %} + +
+ +
+ +
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+ +
+
+
+ +
+

Document versions

+ {% if document.versions %} +
+ + + + + + + + + + + + {% for version in document.versions %} + + + + + + + + {% endfor %} + +
VersionTypePathCreatedNotes
v{{ version.version_number }}{{ version.version_type }}{{ version.file_path }}{{ version.created_at }}{{ version.notes or "" }}
+
+ {% else %} +

No versions found.

+ {% endif %} +
+ +
+

Raw OCR

+ {% if raw_ocr %} +
+
Text versionv{{ raw_ocr.version_number }}
+
OCR engine{{ raw_ocr.ocr_engine or "unknown" }}
+
Engine version{{ raw_ocr.ocr_engine_version or "unknown" }}
+
Rerun source{{ raw_ocr.rerun_source or "unknown" }}
+
Quality score{{ raw_ocr.quality_score if raw_ocr.quality_score is not none else "not scored yet" }}
+
Quality note{{ raw_ocr.quality_note or "" }}
+
+

Quality flags: {{ raw_ocr.quality_flags if raw_ocr and raw_ocr.quality_flags else [] }}

+
{{ raw_ocr.text_content }}
+ {% else %} +

No raw OCR text found.

+ {% endif %} +
+
+
-

Quality flags

-
- {% for flag in quality_flag_options %} - - {% endfor %} +
+

Metadata

+
+
Type{{ document.document_type }}
+
Review status{{ document.review_status }}
+
Source path{{ document.source_path }}
+
Current path{{ document.current_path }}
+
Original filename{{ document.original_filename }}
+
Canonical filename{{ document.canonical_filename }}
+
MIME type{{ document.mime_type }}
+
File size{{ document.file_size }}
+
Page count{{ document.page_count }}
+
Share path{{ document.share_path or "" }}
+
Created at{{ document.created_at }}
+
Updated at{{ document.updated_at }}
+
+
+
-

Quality note

-
- -
+ - -

Extracted fields

- - {% if current_extracted %} -

Current extracted fields last updated at {{ current_extracted.updated_at }}

- {% else %} -

No extracted fields saved yet.

- {% endif %} - -
- - -
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
-
- -
-
- -
-
- + } +})(); + diff --git a/app/templates/documents/detail.html.bak.20260404_073442 b/app/templates/documents/detail.html.bak.20260404_073442 new file mode 100644 index 0000000..085f84d --- /dev/null +++ b/app/templates/documents/detail.html.bak.20260404_073442 @@ -0,0 +1,383 @@ + + + + + {{ document.document_id }} + + + +

+ Back to documents | + Open review queue | + Open trash +

+ +

{{ document.document_id }}

+ + {% if error == "line_count_mismatch" %} +
+ Could not save reviewed OCR because line count did not match OCR layout. + Expected {{ error_expected }}, got {{ error_actual }}. +
+ {% elif error == "save_ocr_corrected_failed" %} +
+ Could not save OCR-corrected PDF. Check that reviewed OCR line count matches raw OCR line count. +
+ {% elif error == "rerun_ocr_failed" %} +
+ OCR rerun failed. +
+ {% elif error == "save_field_enriched_failed" %} +
+ Could not save field-enriched PDF. +
+ {% endif %} + +
+ +
+ +
+

Document metadata

+
    +
  • Type: {{ document.document_type }}
  • +
  • Source path: {{ document.source_path }}
  • +
  • Current path: {{ document.current_path }}
  • +
  • Share path: {{ document.share_path or "" }}
  • +
  • App URL: {{ app_url }}
  • +
  • Original filename: {{ document.original_filename }}
  • +
  • Canonical filename: {{ document.canonical_filename }}
  • +
  • MIME type: {{ document.mime_type }}
  • +
  • File size: {{ document.file_size }}
  • +
  • Page count: {{ document.page_count }}
  • +
  • Storage status: {{ document.storage_status }}
  • +
  • Review status: {{ document.review_status }}
  • +
  • Created at: {{ document.created_at }}
  • +
  • Updated at: {{ document.updated_at }}
  • +
+
+ +
+

Saved PDF outputs

+
+ +
+
+ +
+
+ +
+
+
+

Document preview

+
+ {% if file_url %} + {% if document.mime_type == "application/pdf" %} + + {% elif document.mime_type in ["image/jpeg", "image/png"] %} + Document image + {% else %} +

Open file

+ {% endif %} + {% else %} +

No preview available.

+ {% endif %} +
+
+ +
+

Document versions

+ {% if document.versions %} + + + + + + + + + + + + {% for version in document.versions %} + + + + + + + + {% endfor %} + +
VersionTypePathCreatedNotes
v{{ version.version_number }}{{ version.version_type }}{{ version.file_path }}{{ version.created_at }}{{ version.notes or "" }}
+ {% else %} +

No versions found.

+ {% endif %} +
+
+ +
+
+

Raw OCR

+
+ +
+ + {% if raw_ocr %} +

+ Text version: v{{ raw_ocr.version_number }}
+ OCR engine: {{ raw_ocr.ocr_engine or "unknown" }}
+ OCR engine version: {{ raw_ocr.ocr_engine_version or "unknown" }}
+ Rerun source: {{ raw_ocr.rerun_source or "unknown" }}
+ Quality score: {{ raw_ocr.quality_score if raw_ocr.quality_score is not none else "not scored yet" }}
+ Quality flags: {{ raw_ocr.quality_flags if raw_ocr.quality_flags else [] }}
+ Quality note: {{ raw_ocr.quality_note or "" }} +

+ {% else %} +

No raw OCR text found.

+ {% endif %} +
+ +
+

Reviewed OCR

+ {% if reviewed_ocr %} +

+ Current reviewed version saved at {{ reviewed_ocr.created_at }} — + v{{ reviewed_ocr.version_number }} +

+ {% else %} +

No reviewed OCR saved yet.

+ {% endif %} + +

+ Expected OCR lines: {{ expected_line_count }}
+ Current editor lines: {{ actual_line_count }} +
+ Line count mismatch may affect corrected PDF layout. + +

+ +
+
+ +
+ +
+
{% for n in line_numbers %}{{ n }} +{% endfor %}
+ +
+ +

Quality flags

+
+ {% for flag in quality_flag_options %} + + {% endfor %} +
+ +

Quality note

+
+ +
+ +
+ +
+
+
+ +
+

Extracted fields

+ + {% if current_extracted %} +

Current extracted fields last updated at {{ current_extracted.updated_at }}

+ {% else %} +

No extracted fields saved yet.

+ {% endif %} + +
+ + +
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+
+ +
+
+ +
+
+
+
+
+ + + + diff --git a/app/templates/documents/list.html b/app/templates/documents/list.html index 4c736e5..53f13bd 100644 --- a/app/templates/documents/list.html +++ b/app/templates/documents/list.html @@ -3,27 +3,92 @@ Documents + -

Open trash

-

Open review queue

-

Documents

+
+ - {% if documents %} -
    - {% for doc in documents %} -
  • - {{ doc.document_id }} - — {{ doc.document_type or "unknown" }} - — {{ doc.review_status }} - — {{ doc.created_at }} -
  • - {% endfor %} -
- {% else %} -

No documents yet.

- {% endif %} +
+
+
+

Documents

+

Active documents available for review and processing.

+
+
+ + + +
+

All documents

+ {% if documents %} +
+ + + + + + + + + + + + {% for doc in documents %} + + + + + + + + {% endfor %} + +
DocumentTypeReview statusCurrent pathUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.current_path }}{{ doc.updated_at }}
+
+ {% else %} +

No documents found.

+ {% endif %} +
+
+
+ + diff --git a/app/templates/ingest/index.html b/app/templates/ingest/index.html index f43711b..5df07bd 100644 --- a/app/templates/ingest/index.html +++ b/app/templates/ingest/index.html @@ -3,51 +3,102 @@ Ingest + -

Ingest

+
+ -

Inbox ingest

-

Configured inbox: {{ inbox_root }}

-
- -
+
+
+
+

Ingest

+

Upload files or ingest from server-side paths.

+
+
-
+
+

Upload files

+
+
+ + +
+
+ +
+
+
-

Server-side ingest

+
+

Server-side ingest

-

Ingest one server file

-
-
- -

- -
+
+
+ + +
+
+ +
+
-

Ingest one server directory

-
-
- -

- -

- -
+
+
+ + +
+
+ +
+
+ +
+
-
+
+
+ + +
+
+ +
+
+
+
+
-

Upload ingest

-
-
- -

- -
+ diff --git a/app/templates/ingest/result.html b/app/templates/ingest/result.html index 100febb..3158dd4 100644 --- a/app/templates/ingest/result.html +++ b/app/templates/ingest/result.html @@ -3,37 +3,96 @@ Ingest Result + -

Ingest Result

+
+ -

- Back to ingest | - View documents -

+
+
+
+

Ingest Result

+

{{ message }}

+
+
- {% if errors %} -

Errors

-
    - {% for error in errors %} -
  • {{ error }}
  • - {% endfor %} -
- {% endif %} + - {% if documents %} -

Documents

-
    - {% for doc in documents %} -
  • - {{ doc.document_id }} - — {{ doc.original_filename }} - — {{ doc.current_path }} -
  • - {% endfor %} -
- {% endif %} + {% if documents %} +
+

Documents

+
+ + + + + + + + + + {% for doc in documents %} + + + + + + {% endfor %} + +
DocumentFilenameCurrent path
{{ doc.document_id }}{{ doc.original_filename or doc.canonical_filename }}{{ doc.current_path }}
+
+
+ {% endif %} + + {% if errors %} +
+

Errors

+
    + {% for err in errors %} +
  • {{ err }}
  • + {% endfor %} +
+
+ {% endif %} +
+
+ + diff --git a/app/templates/queue/index.html b/app/templates/queue/index.html index 4c5f868..0b17337 100644 --- a/app/templates/queue/index.html +++ b/app/templates/queue/index.html @@ -3,106 +3,131 @@ Review Queue - + -

Open trash

-

Back to documents

+
+ -
- {% if next_ocr %} - Next needing OCR review - {% endif %} - {% if next_ocr and next_fields %} | {% endif %} - {% if next_fields %} - Next needing field extraction - {% endif %} -
+
+
+
+

Review Queue

+

Work through OCR review and field extraction in order.

+
+
-

Needs OCR review ({{ needs_ocr_review|length }})

- {% if needs_ocr_review %} - - - - - - - - - - - {% for doc in needs_ocr_review %} - - - - - - - {% endfor %} - -
DocumentTypeReview statusUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.updated_at }}
- {% else %} -

No documents currently need OCR review.

- {% endif %} +
+
+ {% if next_ocr %} + Next needing OCR review + {% endif %} + {% if next_fields %} + Next needing field extraction + {% endif %} +
+
-

Needs field extraction ({{ needs_field_extraction|length }})

- {% if needs_field_extraction %} - - - - - - - - - - - {% for doc in needs_field_extraction %} - - - - - - - {% endfor %} - -
DocumentTypeReview statusUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.updated_at }}
- {% else %} -

No reviewed documents are waiting on field extraction.

- {% endif %} +
+

Needs OCR review ({{ needs_ocr_review|length }})

+ {% if needs_ocr_review %} +
+ + + + {% for doc in needs_ocr_review %} + + + + + + + {% endfor %} + +
DocumentTypeReview statusUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.updated_at }}
+
+ {% else %} +

No documents currently need OCR review.

+ {% endif %} +
-

Recently updated

- {% if recently_updated %} - - - - - - - - - - - - {% for doc in recently_updated %} - - - - - - - - {% endfor %} - -
DocumentTypeReview statusCurrent pathUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.current_path }}{{ doc.updated_at }}
- {% endif %} +
+

Needs field extraction ({{ needs_field_extraction|length }})

+ {% if needs_field_extraction %} +
+ + + + {% for doc in needs_field_extraction %} + + + + + + + {% endfor %} + +
DocumentTypeReview statusUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.updated_at }}
+
+ {% else %} +

No reviewed documents are waiting on field extraction.

+ {% endif %} +
+ +
+

Recently updated

+ {% if recently_updated %} +
+ + + + {% for doc in recently_updated %} + + + + + + + + {% endfor %} + +
DocumentTypeReview statusCurrent pathUpdated
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.current_path }}{{ doc.updated_at }}
+
+ {% endif %} +
+
+
+ + diff --git a/app/templates/trash/index.html b/app/templates/trash/index.html index 3265fd0..9bae0c2 100644 --- a/app/templates/trash/index.html +++ b/app/templates/trash/index.html @@ -3,53 +3,94 @@ Trash - + -

Back to documents | Open review queue

+
+ - {% if documents %} - - - - - - - - - - - - - {% for doc in documents %} - - - - - - - - - {% endfor %} - -
DocumentTypeReview statusTrashed atCurrent pathActions
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.trashed_at }}{{ doc.current_path }} -
- -
-
- -
-
- {% else %} -

Trash is empty.

- {% endif %} +
+
+
+

Trash

+

Soft-deleted documents can be restored or removed permanently.

+
+
+ +
+ {% if documents %} +
+ + + + + + + + + + + + + {% for doc in documents %} + + + + + + + + + {% endfor %} + +
DocumentTypeReview statusTrashed atCurrent pathActions
{{ doc.document_id }}{{ doc.document_type }}{{ doc.review_status }}{{ doc.trashed_at }}{{ doc.current_path }} +
+
+ +
+
+ +
+
+
+
+ {% else %} +

Trash is empty.

+ {% endif %} +
+
+
+ +