diff --git a/app/logic/document_outputs.py b/app/logic/document_outputs.py
index 26156e6..a519693 100644
--- a/app/logic/document_outputs.py
+++ b/app/logic/document_outputs.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import hashlib
+import os
import shutil
import subprocess
import tempfile
@@ -28,6 +29,45 @@ def sha256_for_file(path: Path) -> str:
return hasher.hexdigest()
+def compress_pdf_with_ghostscript(path: Path) -> bool:
+ compressed_path = path.with_suffix(".compressed.pdf")
+
+ try:
+ subprocess.run(
+ [
+ "gs",
+ "-sDEVICE=pdfwrite",
+ "-dCompatibilityLevel=1.4",
+ "-dPDFSETTINGS=/ebook",
+ "-dNOPAUSE",
+ "-dQUIET",
+ "-dBATCH",
+ f"-sOutputFile={compressed_path}",
+ str(path),
+ ],
+ check=True,
+ capture_output=True,
+ text=True,
+ )
+
+ if not compressed_path.exists() or compressed_path.stat().st_size == 0:
+ return False
+
+ original_size = path.stat().st_size if path.exists() else 0
+ compressed_size = compressed_path.stat().st_size
+
+ # Only replace if compression actually helped.
+ if original_size > 0 and compressed_size < original_size:
+ os.replace(compressed_path, path)
+ else:
+ compressed_path.unlink(missing_ok=True)
+
+ return True
+ except Exception:
+ compressed_path.unlink(missing_ok=True)
+ return False
+
+
def get_next_document_version_number(db: Session, document_id: int) -> int:
max_version = (
db.query(func.max(DocumentVersion.version_number))
@@ -188,6 +228,8 @@ def create_ocr_corrected_pdf_version(db: Session, document: Document) -> Documen
c.save()
shutil.copy2(overlay_pdf_path, out_path)
+ compress_pdf_with_ghostscript(out_path)
+
file_hash = sha256_for_file(out_path)
version = DocumentVersion(
diff --git a/app/main.py b/app/main.py
index e68be00..d96da4b 100644
--- a/app/main.py
+++ b/app/main.py
@@ -8,6 +8,7 @@ from app.routes.queue import router as queue_router
from app.routes.trash import router as trash_router
app = FastAPI(title="document-processor")
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), name="files")
diff --git a/app/routes/documents.py b/app/routes/documents.py
index 9f5b381..a91c269 100644
--- a/app/routes/documents.py
+++ b/app/routes/documents.py
@@ -25,6 +25,50 @@ from app.models.text_version import TextVersion
router = APIRouter(prefix="/documents", tags=["documents"])
BASE_DIR = Path(__file__).resolve().parent.parent
+
+
+def _build_queue_navigation(db: Session, document: Document, queue: str | None) -> dict:
+ if not queue:
+ return {"queue": None, "prev_doc": None, "next_doc": None}
+
+ base = db.query(Document).filter(Document.is_trashed.is_(False))
+
+ if queue == "ocr":
+ docs = (
+ base.filter(Document.review_status != "reviewed")
+ .order_by(Document.created_at.asc())
+ .all()
+ )
+ elif queue == "fields":
+ docs = (
+ base.filter(Document.review_status == "reviewed")
+ .all()
+ )
+ filtered = []
+ for d in docs:
+ has_fields = bool(getattr(d, "extracted_fields", None))
+ if not has_fields:
+ filtered.append(d)
+ docs = sorted(filtered, key=lambda d: d.updated_at or d.created_at)
+ elif queue == "recent":
+ docs = (
+ base.order_by(Document.updated_at.desc())
+ .all()
+ )
+ else:
+ return {"queue": None, "prev_doc": None, "next_doc": None}
+
+ ids = [d.document_id for d in docs]
+ if document.document_id not in ids:
+ return {"queue": queue, "prev_doc": None, "next_doc": None}
+
+ idx = ids.index(document.document_id)
+ prev_doc = docs[idx - 1] if idx > 0 else None
+ next_doc = docs[idx + 1] if idx < len(docs) - 1 else None
+
+ return {"queue": queue, "prev_doc": prev_doc, "next_doc": next_doc}
+
+
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
QUALITY_FLAG_OPTIONS = [
@@ -117,6 +161,68 @@ def _apply_reviewed_lines_to_layout(base_layout: dict | None, reviewed_text: str
return new_layout
+
+def _get_queue_navigation(db: Session, document: Document) -> dict:
+ active_docs = (
+ db.query(Document)
+ .filter(Document.is_trashed.is_(False))
+ .order_by(Document.created_at.asc())
+ .all()
+ )
+ doc_ids = [d.document_id for d in active_docs]
+ prev_doc = None
+ next_doc = None
+
+ if document.document_id in doc_ids:
+ idx = doc_ids.index(document.document_id)
+ if idx > 0:
+ prev_doc = active_docs[idx - 1]
+ if idx < len(active_docs) - 1:
+ next_doc = active_docs[idx + 1]
+
+ needs_ocr = (
+ db.query(Document)
+ .filter(Document.is_trashed.is_(False))
+ .filter(Document.review_status != "reviewed")
+ .order_by(Document.created_at.asc())
+ .all()
+ )
+
+ reviewed_no_fields = []
+ for d in (
+ db.query(Document)
+ .options(selectinload(Document.extracted_fields))
+ .filter(Document.is_trashed.is_(False))
+ .filter(Document.review_status == "reviewed")
+ .order_by(Document.updated_at.asc())
+ .all()
+ ):
+ if not d.extracted_fields:
+ reviewed_no_fields.append(d)
+
+ next_ocr = None
+ next_fields = None
+
+ if needs_ocr:
+ for d in needs_ocr:
+ if d.document_id != document.document_id:
+ next_ocr = d
+ break
+
+ if reviewed_no_fields:
+ for d in reviewed_no_fields:
+ if d.document_id != document.document_id:
+ next_fields = d
+ break
+
+ return {
+ "prev_doc": prev_doc,
+ "next_doc": next_doc,
+ "next_ocr_doc": next_ocr,
+ "next_fields_doc": next_fields,
+ }
+
+
def _extracted_field_form_values(document: Document, request: Request) -> dict:
current = get_current_extracted_fields(document)
auto = request.query_params.get("autofill_extracted")
@@ -339,7 +445,7 @@ def save_extracted_fields_route(
@router.get("/{document_id}", response_class=HTMLResponse)
-def document_detail(document_id: str, request: Request, db: Session = Depends(get_db)):
+def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)):
document = (
db.query(Document)
.options(
@@ -381,6 +487,8 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge
extracted_form = _extracted_field_form_values(document, request)
current_extracted = get_current_extracted_fields(document)
+ queue_nav = _get_queue_navigation(db, document)
+
return templates.TemplateResponse(
request=request,
@@ -388,6 +496,10 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge
context={
"request": request,
"document": document,
+ "prev_doc": queue_nav.get("prev_doc"),
+ "next_doc": queue_nav.get("next_doc"),
+ "next_ocr_doc": queue_nav.get("next_ocr") or queue_nav.get("next_ocr_doc"),
+ "next_fields_doc": queue_nav.get("next_fields") or queue_nav.get("next_fields_doc"),
"raw_ocr": raw_ocr,
"reviewed_ocr": reviewed_ocr,
"review_text_value": review_text_value,
diff --git a/app/static/app.css b/app/static/app.css
new file mode 100644
index 0000000..3430237
--- /dev/null
+++ b/app/static/app.css
@@ -0,0 +1,510 @@
+:root {
+ --bg: #f3f5f9;
+ --panel: #ffffff;
+ --panel-muted: #f8fafc;
+ --border: #d8dee8;
+ --text: #1f2937;
+ --text-muted: #6b7280;
+ --accent: #2563eb;
+ --accent-soft: #dbeafe;
+ --success: #166534;
+ --success-soft: #dcfce7;
+ --warn: #92400e;
+ --warn-soft: #fef3c7;
+ --danger: #991b1b;
+ --danger-soft: #fee2e2;
+ --shadow: 0 1px 2px rgba(0,0,0,0.06), 0 8px 20px rgba(0,0,0,0.04);
+ --radius: 12px;
+ --sans: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+ --mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
+ --rail-closed: 52px;
+ --rail-open: 230px;
+}
+
+* { box-sizing: border-box; }
+
+html, body {
+ margin: 0;
+ padding: 0;
+ min-height: 100%;
+ background: var(--bg);
+ color: var(--text);
+ font-family: var(--sans);
+}
+
+body {
+ min-height: 100vh;
+}
+
+a {
+ color: var(--accent);
+ text-decoration: none;
+}
+a:hover {
+ text-decoration: underline;
+}
+
+.app-shell {
+ min-height: 100vh;
+}
+
+.sidebar {
+ position: fixed;
+ inset: 0 auto 0 0;
+ width: var(--rail-closed);
+ background: #08142f;
+ color: #e5e7eb;
+ overflow-x: hidden;
+ overflow-y: auto;
+ padding: 0.75rem 0.45rem 1rem 0.45rem;
+ z-index: 100;
+ transition: width 0.18s ease;
+}
+
+.app-shell.nav-open .sidebar {
+ width: var(--rail-open);
+}
+
+.main {
+ margin-left: var(--rail-closed);
+ min-height: 100vh;
+ padding: 1.25rem;
+ transition: margin-left 0.18s ease;
+}
+
+.app-shell.nav-open .main {
+ margin-left: var(--rail-open);
+}
+
+.sidebar-top {
+ display: flex;
+ align-items: center;
+ gap: 0.7rem;
+ min-height: 36px;
+ margin-bottom: 1rem;
+}
+
+.sidebar-toggle {
+ width: 24px;
+ height: 18px;
+ display: flex;
+ flex-direction: column;
+ justify-content: space-between;
+ cursor: pointer;
+ flex: 0 0 24px;
+ margin-left: 2px;
+}
+
+.sidebar-toggle span {
+ display: block;
+ width: 100%;
+ height: 2px;
+ background: #94a3b8;
+ border-radius: 999px;
+}
+
+.sidebar-toggle:hover span {
+ background: #e2e8f0;
+}
+
+.brand {
+ font-weight: 700;
+ font-size: 1rem;
+ white-space: nowrap;
+ display: none;
+}
+
+.sidebar-section-title {
+ color: #94a3b8;
+ font-size: 0.72rem;
+ text-transform: uppercase;
+ letter-spacing: 0.08em;
+ margin: 1rem 0 0.5rem 0;
+ white-space: nowrap;
+ display: none;
+}
+
+.nav-list {
+ display: flex;
+ flex-direction: column;
+ gap: 0.35rem;
+}
+
+.nav-link {
+ display: flex;
+ align-items: center;
+ gap: 0.65rem;
+ color: #e5e7eb;
+ border-radius: 10px;
+ padding: 0.72rem 0.6rem;
+ white-space: nowrap;
+ overflow: hidden;
+}
+
+.nav-link:hover {
+ background: rgba(255,255,255,0.08);
+ text-decoration: none;
+}
+
+.nav-link.active {
+ background: rgba(59,130,246,0.28);
+ color: #fff;
+}
+
+.nav-link-short {
+ display: inline-flex;
+ min-width: 1rem;
+ justify-content: center;
+ font-weight: 600;
+}
+
+.nav-link-text {
+ display: none;
+}
+
+.app-shell.nav-open .brand,
+.app-shell.nav-open .sidebar-section-title,
+.app-shell.nav-open .nav-link-text {
+ display: initial;
+}
+
+.app-shell:not(.nav-open) .nav-link {
+ justify-content: center;
+}
+
+.topbar {
+ display: flex;
+ justify-content: space-between;
+ align-items: flex-start;
+ gap: 1rem;
+ margin-bottom: 1rem;
+}
+
+.page-title {
+ margin: 0;
+ font-size: 1.7rem;
+}
+
+.page-subtitle {
+ margin: 0.25rem 0 0 0;
+ color: var(--text-muted);
+ font-size: 0.95rem;
+}
+
+.card {
+ background: var(--panel);
+ border: 1px solid var(--border);
+ border-radius: var(--radius);
+ box-shadow: var(--shadow);
+ padding: 1rem;
+ margin-bottom: 1rem;
+}
+
+.card-title {
+ margin: 0 0 0.75rem 0;
+ font-size: 1.05rem;
+}
+
+.button-row {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 0.65rem;
+ align-items: center;
+}
+
+button,
+.button-link {
+ appearance: none;
+ border: 1px solid var(--border);
+ background: var(--panel);
+ color: var(--text);
+ border-radius: 10px;
+ padding: 0.65rem 0.95rem;
+ cursor: pointer;
+ font: inherit;
+}
+
+button:hover,
+.button-link:hover {
+ background: #f3f4f6;
+ text-decoration: none;
+}
+
+button.primary,
+.button-link.primary {
+ background: var(--accent);
+ color: white;
+ border-color: var(--accent);
+}
+
+button.danger {
+ background: var(--danger-soft);
+ color: var(--danger);
+ border-color: #fecaca;
+}
+
+.badges {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 0.5rem;
+}
+
+.badge {
+ display: inline-flex;
+ align-items: center;
+ border-radius: 999px;
+ padding: 0.28rem 0.7rem;
+ font-size: 0.82rem;
+ border: 1px solid var(--border);
+ background: var(--panel-muted);
+ color: var(--text);
+}
+
+.badge.reviewed {
+ background: var(--success-soft);
+ color: var(--success);
+ border-color: #bbf7d0;
+}
+
+.badge.pending {
+ background: var(--warn-soft);
+ color: var(--warn);
+ border-color: #fde68a;
+}
+
+.badge.trashed {
+ background: var(--danger-soft);
+ color: var(--danger);
+ border-color: #fecaca;
+}
+
+.table-wrap {
+ overflow: auto;
+}
+
+table {
+ width: 100%;
+ border-collapse: collapse;
+ font-size: 0.95rem;
+}
+
+th, td {
+ text-align: left;
+ vertical-align: top;
+ border-bottom: 1px solid var(--border);
+ padding: 0.7rem 0.6rem;
+}
+
+th {
+ color: var(--text-muted);
+ font-weight: 600;
+ font-size: 0.83rem;
+ text-transform: uppercase;
+ letter-spacing: 0.04em;
+}
+
+.grid-2 {
+ display: grid;
+ grid-template-columns: minmax(0, 1fr) minmax(0, 1fr);
+ gap: 1rem;
+ align-items: start;
+}
+
+.meta-grid {
+ display: grid;
+ grid-template-columns: repeat(2, minmax(220px, 1fr));
+ gap: 0.65rem 1rem;
+}
+
+.meta-item {
+ background: var(--panel-muted);
+ border: 1px solid var(--border);
+ border-radius: 10px;
+ padding: 0.65rem 0.8rem;
+}
+
+.meta-label {
+ display: block;
+ color: var(--text-muted);
+ font-size: 0.78rem;
+ margin-bottom: 0.25rem;
+}
+
+.preview-frame {
+ width: 100%;
+ height: 950px;
+ border: 1px solid var(--border);
+ border-radius: 10px;
+ background: white;
+}
+
+.preview-image {
+ width: 100%;
+ max-height: 950px;
+ object-fit: contain;
+ border: 1px solid var(--border);
+ border-radius: 10px;
+ background: white;
+}
+
+.sticky-actions {
+ position: sticky;
+ top: 0;
+ z-index: 20;
+ background: rgba(243,245,249,0.94);
+ padding: 0.25rem 0 0.75rem 0;
+}
+
+.form-grid {
+ display: grid;
+ grid-template-columns: 1fr 1fr;
+ gap: 0.85rem 1rem;
+}
+
+.form-field {
+ display: flex;
+ flex-direction: column;
+ gap: 0.35rem;
+}
+
+.form-field.full {
+ grid-column: 1 / -1;
+}
+
+label {
+ font-size: 0.86rem;
+ color: var(--text-muted);
+}
+
+input[type="text"],
+input[type="date"],
+textarea {
+ width: 100%;
+ border: 1px solid var(--border);
+ background: white;
+ border-radius: 10px;
+ padding: 0.7rem 0.75rem;
+ font: inherit;
+ color: var(--text);
+}
+
+textarea {
+ font-family: var(--mono);
+ line-height: 1.45;
+}
+
+.editor-wrap {
+ display: grid;
+ grid-template-columns: 52px 1fr;
+ gap: 0.5rem;
+ align-items: start;
+}
+
+.line-numbers {
+ font-family: var(--mono);
+ white-space: pre;
+ text-align: right;
+ color: var(--text-muted);
+ user-select: none;
+ padding-top: 0.75rem;
+ line-height: 1.45;
+}
+
+pre.codeblock {
+ white-space: pre-wrap;
+ word-break: break-word;
+ font-family: var(--mono);
+ background: #f8fafc;
+ border: 1px solid var(--border);
+ border-radius: 10px;
+ padding: 0.8rem;
+ max-height: 18rem;
+ overflow: auto;
+}
+
+.error-box {
+ background: var(--danger-soft);
+ color: var(--danger);
+ border: 1px solid #fecaca;
+ border-radius: 10px;
+ padding: 0.9rem 1rem;
+ margin-bottom: 1rem;
+}
+
+.empty-state {
+ color: var(--text-muted);
+ padding: 0.4rem 0;
+}
+
+.mobile-header,
+.sidebar-overlay {
+ display: none !important;
+}
+
+@media (max-width: 1100px) {
+ .grid-2,
+ .form-grid,
+ .meta-grid {
+ grid-template-columns: 1fr;
+ }
+
+ .preview-frame {
+ height: 720px;
+ }
+}
+
+
+.doc-header-sticky {
+ position: sticky;
+ top: 0;
+ z-index: 30;
+ background: rgba(243,245,249,0.96);
+ backdrop-filter: blur(8px);
+ padding-bottom: 0.75rem;
+ margin-bottom: 1rem;
+}
+
+.tabbar {
+ display: flex;
+ gap: 0.5rem;
+ flex-wrap: wrap;
+ margin-bottom: 1rem;
+}
+
+.tab-button {
+ appearance: none;
+ border: 1px solid var(--border);
+ background: var(--panel);
+ color: var(--text);
+ border-radius: 999px;
+ padding: 0.5rem 0.85rem;
+ cursor: pointer;
+ font: inherit;
+}
+
+.tab-button.active {
+ background: var(--accent);
+ color: #fff;
+ border-color: var(--accent);
+}
+
+.tab-panel {
+ display: none;
+}
+
+.tab-panel.active {
+ display: block;
+}
+
+.queue-nav-row {
+ display: flex;
+ gap: 0.65rem;
+ flex-wrap: wrap;
+ align-items: center;
+ margin-top: 0.75rem;
+}
+
+.queue-label {
+ color: var(--text-muted);
+ font-size: 0.9rem;
+}
diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html
index 6a74f71..19d9fc5 100644
--- a/app/templates/documents/detail.html
+++ b/app/templates/documents/detail.html
@@ -3,207 +3,390 @@
{{ document.document_id }}
+
- Back to documents
-
- {{ document.document_id }}
-
- {% if error == "line_count_mismatch" %}
-
- Could not save reviewed OCR because line count did not match OCR layout.
- Expected {{ error_expected }}, got {{ error_actual }}.
+
+
-
-
-
Document metadata
-
- - Type: {{ document.document_type }}
- - Source path: {{ document.source_path }}
- - Current path: {{ document.current_path }}
- - Share path: {{ document.share_path or "" }}
- - App URL: {{ app_url }}
- - Original filename: {{ document.original_filename }}
- - Canonical filename: {{ document.canonical_filename }}
- - MIME type: {{ document.mime_type }}
- - File size: {{ document.file_size }}
- - Page count: {{ document.page_count }}
- - Storage status: {{ document.storage_status }}
- - Review status: {{ document.review_status }}
- - Created at: {{ document.created_at }}
- - Updated at: {{ document.updated_at }}
-
-
-
Saved PDF scaffolds
-
-
-
-
Document preview
- {% if file_url %}
- {% if document.mime_type == "application/pdf" %}
-
- {% elif document.mime_type in ["image/jpeg", "image/png"] %}
-

- {% else %}
-
Open file
+
+ {% if error == "line_count_mismatch" %}
+
+ Could not save reviewed OCR because line count did not match OCR layout.
+ Expected {{ error_expected }}, got {{ error_actual }}.
+
+ {% elif error == "save_ocr_corrected_failed" %}
+
+ Could not save OCR-corrected PDF. Check that reviewed OCR line count matches raw OCR line count.
+
+ {% elif error == "rerun_ocr_failed" %}
+ OCR rerun failed.
+ {% elif error == "save_field_enriched_failed" %}
+ Could not save field-enriched PDF.
{% endif %}
- {% else %}
- No preview available.
- {% endif %}
- Document versions
- {% if document.versions %}
-
- {% for version in document.versions %}
- -
- v{{ version.version_number }} —
- {{ version.version_type }} —
- {{ version.file_path }} —
- {{ version.created_at }}
- {% if version.notes %}
{{ version.notes }}{% endif %}
-
- {% endfor %}
-
- {% else %}
- No versions found.
- {% endif %}
+