feat: unified save flow, path override UI, mirror sync, and automatic pruning of old document versions
This commit is contained in:
parent
1cf42242f7
commit
c9fdf953e7
|
|
@ -0,0 +1,84 @@
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
DEFAULT_INGEST_ROOT = "/mnt/data/shared/scans/processed"
|
||||||
|
SETTINGS_FILE = Path("/mnt/storage/document-processor/settings/ingest.json")
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_parent() -> None:
|
||||||
|
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_ingest_root() -> str:
|
||||||
|
try:
|
||||||
|
if SETTINGS_FILE.exists():
|
||||||
|
data = json.loads(SETTINGS_FILE.read_text())
|
||||||
|
value = str(data.get("default_ingest_root") or "").strip()
|
||||||
|
if value:
|
||||||
|
return value
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return DEFAULT_INGEST_ROOT
|
||||||
|
|
||||||
|
|
||||||
|
def set_default_ingest_root(path_str: str) -> str:
|
||||||
|
value = str(path_str or "").strip()
|
||||||
|
if not value:
|
||||||
|
value = DEFAULT_INGEST_ROOT
|
||||||
|
|
||||||
|
_ensure_parent()
|
||||||
|
SETTINGS_FILE.write_text(json.dumps({"default_ingest_root": value}, indent=2))
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def reset_default_ingest_root() -> str:
|
||||||
|
_ensure_parent()
|
||||||
|
SETTINGS_FILE.write_text(json.dumps({"default_ingest_root": DEFAULT_INGEST_ROOT}, indent=2))
|
||||||
|
return DEFAULT_INGEST_ROOT
|
||||||
|
|
||||||
|
|
||||||
|
def browse_directory(path_str: str) -> dict:
|
||||||
|
target = Path(path_str).expanduser().resolve()
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"path": str(target),
|
||||||
|
"exists": target.exists(),
|
||||||
|
"is_dir": target.is_dir(),
|
||||||
|
"parent": "",
|
||||||
|
"entries": [],
|
||||||
|
"error": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
if not target.exists():
|
||||||
|
result["error"] = "Directory does not exist."
|
||||||
|
return result
|
||||||
|
|
||||||
|
if not target.is_dir():
|
||||||
|
result["error"] = "Path is not a directory."
|
||||||
|
return result
|
||||||
|
|
||||||
|
parent = target.parent
|
||||||
|
if parent != target:
|
||||||
|
result["parent"] = str(parent)
|
||||||
|
|
||||||
|
try:
|
||||||
|
children = sorted(
|
||||||
|
list(target.iterdir()),
|
||||||
|
key=lambda p: (not p.is_dir(), p.name.lower()),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
result["error"] = f"Could not read directory: {exc}"
|
||||||
|
return result
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for child in children[:300]:
|
||||||
|
entries.append(
|
||||||
|
{
|
||||||
|
"name": child.name,
|
||||||
|
"path": str(child),
|
||||||
|
"is_dir": child.is_dir(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result["entries"] = entries
|
||||||
|
return result
|
||||||
|
|
@ -2,7 +2,60 @@ from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
def _mirror_to_secondary_owner(document, canonical_path: Path) -> Path | None:
|
||||||
|
additional = document.additional_fields[0] if getattr(document, "additional_fields", None) else None
|
||||||
|
if not additional:
|
||||||
|
return None
|
||||||
|
|
||||||
|
owner_secondary = getattr(additional, "owner_secondary", None)
|
||||||
|
if not owner_secondary:
|
||||||
|
return None
|
||||||
|
|
||||||
|
from app.logic.storage_paths import (
|
||||||
|
_split_person_name,
|
||||||
|
to_owner_filepath_name,
|
||||||
|
build_proposed_storage_path,
|
||||||
|
)
|
||||||
|
from app.core.storage_settings import get_default_save_root
|
||||||
|
|
||||||
|
first, last = _split_person_name(owner_secondary)
|
||||||
|
owner_folder = to_owner_filepath_name(first, last)
|
||||||
|
if not owner_folder:
|
||||||
|
return None
|
||||||
|
|
||||||
|
save_root = get_default_save_root()
|
||||||
|
naming_row = document.naming_fields[0] if getattr(document, "naming_fields", None) else None
|
||||||
|
|
||||||
|
mirror_path = Path(
|
||||||
|
build_proposed_storage_path(
|
||||||
|
document=document,
|
||||||
|
save_root=save_root,
|
||||||
|
naming_row=naming_row,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# replace owner segment
|
||||||
|
parts = list(mirror_path.parts)
|
||||||
|
for i, p in enumerate(parts):
|
||||||
|
if p == "records" and i + 1 < len(parts):
|
||||||
|
parts[i + 1] = owner_folder
|
||||||
|
break
|
||||||
|
|
||||||
|
mirror_path = Path(*parts)
|
||||||
|
mirror_path = mirror_path.with_name(
|
||||||
|
re.sub(r"_v\d+(?=\.[^.]+$)", "", mirror_path.name)
|
||||||
|
)
|
||||||
|
mirror_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if canonical_path.resolve() != mirror_path.resolve():
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(canonical_path, mirror_path)
|
||||||
|
|
||||||
|
return mirror_path
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -21,6 +74,38 @@ from app.models.document_version import DocumentVersion
|
||||||
from app.models.text_version import TextVersion
|
from app.models.text_version import TextVersion
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _prune_old_saved_files(db: Session, document: Document, keep_paths: set[str]) -> None:
|
||||||
|
protected = {str(Path(p).resolve()) for p in keep_paths if p}
|
||||||
|
for p in [getattr(document, "source_path", None), getattr(document, "original_path", None)]:
|
||||||
|
if p:
|
||||||
|
protected.add(str(Path(p).resolve()))
|
||||||
|
|
||||||
|
prior_versions = (
|
||||||
|
db.query(DocumentVersion)
|
||||||
|
.filter(DocumentVersion.document_id == document.id)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
candidate_paths: set[str] = set()
|
||||||
|
for version in prior_versions:
|
||||||
|
if version.file_path:
|
||||||
|
try:
|
||||||
|
candidate_paths.add(str(Path(version.file_path).resolve()))
|
||||||
|
except Exception:
|
||||||
|
candidate_paths.add(version.file_path)
|
||||||
|
|
||||||
|
for candidate in sorted(candidate_paths):
|
||||||
|
if candidate in protected:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
candidate_path = Path(candidate)
|
||||||
|
if candidate_path.exists() and candidate_path.is_file():
|
||||||
|
candidate_path.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def sha256_for_file(path: Path) -> str:
|
def sha256_for_file(path: Path) -> str:
|
||||||
hasher = hashlib.sha256()
|
hasher = hashlib.sha256()
|
||||||
with path.open("rb") as f:
|
with path.open("rb") as f:
|
||||||
|
|
@ -234,6 +319,25 @@ def create_ocr_corrected_pdf_version(db: Session, document: Document, output_pat
|
||||||
compress_pdf_with_ghostscript(out_path)
|
compress_pdf_with_ghostscript(out_path)
|
||||||
|
|
||||||
file_hash = sha256_for_file(out_path)
|
file_hash = sha256_for_file(out_path)
|
||||||
|
try:
|
||||||
|
mirror_path = _mirror_to_secondary_owner(document, out_path)
|
||||||
|
share_path_value = str(mirror_path) if mirror_path else None
|
||||||
|
except Exception as e:
|
||||||
|
share_path_value = None
|
||||||
|
|
||||||
|
document.share_path = share_path_value
|
||||||
|
db.query(Document).filter(Document.id == document.id).update(
|
||||||
|
{"share_path": share_path_value},
|
||||||
|
synchronize_session=False,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
mirror_path = _mirror_to_secondary_owner(document, out_path)
|
||||||
|
document.share_path = str(mirror_path) if mirror_path else None
|
||||||
|
except Exception:
|
||||||
|
document.share_path = None
|
||||||
|
|
||||||
|
db.add(document)
|
||||||
|
|
||||||
|
|
||||||
version = DocumentVersion(
|
version = DocumentVersion(
|
||||||
document_id=document.id,
|
document_id=document.id,
|
||||||
|
|
@ -251,6 +355,12 @@ def create_ocr_corrected_pdf_version(db: Session, document: Document, output_pat
|
||||||
document.sha256_current = file_hash
|
document.sha256_current = file_hash
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
keep_paths = {str(out_path)}
|
||||||
|
if document.share_path:
|
||||||
|
keep_paths.add(str(document.share_path))
|
||||||
|
_prune_old_saved_files(db, document, keep_paths)
|
||||||
|
|
||||||
db.refresh(version)
|
db.refresh(version)
|
||||||
return version
|
return version
|
||||||
|
|
||||||
|
|
@ -268,12 +378,25 @@ def create_field_enriched_pdf_version(db: Session, document: Document, output_pa
|
||||||
out_path = _build_output_path(FIELD_ENRICHED_ROOT, document, "field_enriched", next_version_number)
|
out_path = _build_output_path(FIELD_ENRICHED_ROOT, document, "field_enriched", next_version_number)
|
||||||
else:
|
else:
|
||||||
out_path = Path(output_path)
|
out_path = Path(output_path)
|
||||||
|
|
||||||
|
out_path = out_path.with_name(
|
||||||
|
re.sub(r"_v\d+(?=\.[^.]+$)", "", out_path.name)
|
||||||
|
)
|
||||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if current_file.resolve() != out_path.resolve():
|
if current_file.resolve() != out_path.resolve():
|
||||||
shutil.copy2(current_file, out_path)
|
shutil.copy2(current_file, out_path)
|
||||||
file_hash = sha256_for_file(out_path)
|
file_hash = sha256_for_file(out_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
mirror_path = _mirror_to_secondary_owner(document, out_path)
|
||||||
|
share_path_value = str(mirror_path) if mirror_path else None
|
||||||
|
except Exception:
|
||||||
|
share_path_value = None
|
||||||
|
|
||||||
|
document.share_path = share_path_value
|
||||||
|
db.add(document)
|
||||||
|
|
||||||
version = DocumentVersion(
|
version = DocumentVersion(
|
||||||
document_id=document.id,
|
document_id=document.id,
|
||||||
version_number=next_version_number,
|
version_number=next_version_number,
|
||||||
|
|
@ -290,5 +413,11 @@ def create_field_enriched_pdf_version(db: Session, document: Document, output_pa
|
||||||
document.sha256_current = file_hash
|
document.sha256_current = file_hash
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
keep_paths = {str(out_path)}
|
||||||
|
if document.share_path:
|
||||||
|
keep_paths.add(str(document.share_path))
|
||||||
|
_prune_old_saved_files(db, document, keep_paths)
|
||||||
|
|
||||||
db.refresh(version)
|
db.refresh(version)
|
||||||
return version
|
return version
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ from app.routes.trash import router as trash_router
|
||||||
|
|
||||||
app = FastAPI(title="document-processor")
|
app = FastAPI(title="document-processor")
|
||||||
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
||||||
app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), name="files")
|
app.mount("/files", StaticFiles(directory="/mnt/svr-01/storage"), name="files")
|
||||||
|
|
||||||
app.include_router(health_router)
|
app.include_router(health_router)
|
||||||
app.include_router(documents_router)
|
app.include_router(documents_router)
|
||||||
|
|
|
||||||
|
|
@ -17,3 +17,4 @@ __all__ = [
|
||||||
"DocumentAdditionalField",
|
"DocumentAdditionalField",
|
||||||
"DocumentPreset",
|
"DocumentPreset",
|
||||||
]
|
]
|
||||||
|
from app.models.document_naming_field import DocumentNamingField
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,10 @@ class Document(Base):
|
||||||
back_populates="document",
|
back_populates="document",
|
||||||
cascade="all, delete-orphan",
|
cascade="all, delete-orphan",
|
||||||
)
|
)
|
||||||
|
naming_fields: Mapped[list["DocumentNamingField"]] = relationship(
|
||||||
|
back_populates="document",
|
||||||
|
cascade="all, delete-orphan",
|
||||||
|
)
|
||||||
additional_fields: Mapped[list["DocumentAdditionalField"]] = relationship(
|
additional_fields: Mapped[list["DocumentAdditionalField"]] = relationship(
|
||||||
back_populates="document",
|
back_populates="document",
|
||||||
cascade="all, delete-orphan",
|
cascade="all, delete-orphan",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from sqlalchemy import Boolean, DateTime, ForeignKey, String, Text
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
|
from app.db.base import Base
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentNamingField(Base):
|
||||||
|
__tablename__ = "document_naming_fields"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(primary_key=True, index=True)
|
||||||
|
document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, unique=True, index=True)
|
||||||
|
|
||||||
|
naming_entity: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
naming_account_last4: Mapped[str | None] = mapped_column(String(16), nullable=True)
|
||||||
|
naming_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||||
|
naming_date: Mapped[str | None] = mapped_column(String(16), nullable=True)
|
||||||
|
naming_date_precision: Mapped[str | None] = mapped_column(String(16), nullable=True)
|
||||||
|
|
||||||
|
naming_description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
naming_reference_number: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
naming_variant: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||||
|
|
||||||
|
naming_schema_version: Mapped[str] = mapped_column(String(32), nullable=False, default="v1")
|
||||||
|
naming_locked: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||||
|
|
||||||
|
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||||
|
updated_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime,
|
||||||
|
default=datetime.utcnow,
|
||||||
|
onupdate=datetime.utcnow,
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
document: Mapped["Document"] = relationship(back_populates="naming_fields")
|
||||||
|
|
@ -2,7 +2,6 @@ from copy import deepcopy
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from decimal import Decimal, InvalidOperation
|
from decimal import Decimal, InvalidOperation
|
||||||
import re
|
import re
|
||||||
import traceback
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Form, Query, Request
|
from fastapi import APIRouter, Depends, Form, Query, Request
|
||||||
|
|
@ -538,7 +537,7 @@ def save_ocr_corrected_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
create_ocr_corrected_pdf_version(db, document, output_path=output_path)
|
create_ocr_corrected_pdf_version(db, document, output_path=output_path_obj)
|
||||||
except Exception:
|
except Exception:
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_ocr_corrected_failed", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_ocr_corrected_failed", status_code=303)
|
||||||
|
|
||||||
|
|
@ -558,6 +557,74 @@ def move_to_trash(document_id: str, db: Session = Depends(get_db)):
|
||||||
return RedirectResponse(url="/documents/", status_code=303)
|
return RedirectResponse(url="/documents/", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{document_id}/save-pdf", response_class=RedirectResponse)
|
||||||
|
def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)):
|
||||||
|
document = (
|
||||||
|
db.query(Document)
|
||||||
|
.options(
|
||||||
|
selectinload(Document.text_versions),
|
||||||
|
selectinload(Document.naming_fields),
|
||||||
|
selectinload(Document.extracted_fields),
|
||||||
|
selectinload(Document.additional_fields),
|
||||||
|
)
|
||||||
|
.filter(Document.document_id == document_id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if document is None:
|
||||||
|
return RedirectResponse(url="/documents/", status_code=303)
|
||||||
|
|
||||||
|
save_root = get_default_save_root()
|
||||||
|
naming_row = document.naming_fields[0] if getattr(document, "naming_fields", None) else None
|
||||||
|
|
||||||
|
default_output_path = Path(
|
||||||
|
build_proposed_storage_path(
|
||||||
|
document=document,
|
||||||
|
save_root=save_root,
|
||||||
|
naming_row=naming_row,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
default_output_path = default_output_path.with_name(
|
||||||
|
re.sub(r"(?:_v\d+|_\d+)(?=\.[^.]+$)", "", default_output_path.name)
|
||||||
|
)
|
||||||
|
if default_output_path.suffix.lower() != ".pdf":
|
||||||
|
default_output_path = default_output_path.with_suffix(".pdf")
|
||||||
|
|
||||||
|
output_path_raw = (output_path or "").strip()
|
||||||
|
if output_path_raw:
|
||||||
|
output_path_obj = Path(output_path_raw)
|
||||||
|
else:
|
||||||
|
output_path_obj = default_output_path
|
||||||
|
|
||||||
|
if output_path_obj.suffix.lower() != ".pdf":
|
||||||
|
output_path_obj = output_path_obj.with_suffix(".pdf")
|
||||||
|
|
||||||
|
allowed_root = Path(save_root).resolve()
|
||||||
|
resolved_parent = output_path_obj.parent.resolve()
|
||||||
|
if allowed_root != resolved_parent and allowed_root not in resolved_parent.parents:
|
||||||
|
return RedirectResponse(
|
||||||
|
url=f"/documents/{document.document_id}?error=invalid_output_path",
|
||||||
|
status_code=303,
|
||||||
|
)
|
||||||
|
|
||||||
|
output_path_obj.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
has_extracted = bool(getattr(document, "extracted_fields", None))
|
||||||
|
has_additional = bool(getattr(document, "additional_fields", None))
|
||||||
|
|
||||||
|
try:
|
||||||
|
if has_extracted or has_additional:
|
||||||
|
create_field_enriched_pdf_version(db, document, output_path=output_path_obj)
|
||||||
|
else:
|
||||||
|
create_ocr_corrected_pdf_version(db, document, output_path=output_path_obj)
|
||||||
|
except Exception:
|
||||||
|
return RedirectResponse(
|
||||||
|
url=f"/documents/{document.document_id}?error=save_pdf_failed",
|
||||||
|
status_code=303,
|
||||||
|
)
|
||||||
|
|
||||||
|
return RedirectResponse(url=f"/documents/{document.document_id}?tab=ocr-review", status_code=303)
|
||||||
|
|
||||||
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
|
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
|
||||||
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||||
document = (
|
document = (
|
||||||
|
|
@ -588,10 +655,8 @@ def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
create_field_enriched_pdf_version(db, document, output_path=output_path)
|
create_field_enriched_pdf_version(db, document, output_path=output_path_obj)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("save_field_enriched_pdf failed:", repr(e))
|
|
||||||
traceback.print_exc()
|
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_field_enriched_failed", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_field_enriched_failed", status_code=303)
|
||||||
|
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?tab=extracted-fields", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?tab=extracted-fields", status_code=303)
|
||||||
|
|
@ -784,7 +849,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
||||||
|
|
||||||
file_url = None
|
file_url = None
|
||||||
if document.current_path:
|
if document.current_path:
|
||||||
storage_root = Path("/mnt/storage/document-processor")
|
storage_root = Path("/mnt/svr-01/storage")
|
||||||
current_path = Path(document.current_path)
|
current_path = Path(document.current_path)
|
||||||
try:
|
try:
|
||||||
rel = current_path.relative_to(storage_root)
|
rel = current_path.relative_to(storage_root)
|
||||||
|
|
@ -820,6 +885,11 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
||||||
save_root=default_save_root,
|
save_root=default_save_root,
|
||||||
naming_row=naming_row,
|
naming_row=naming_row,
|
||||||
)
|
)
|
||||||
|
proposed_storage_path = str(
|
||||||
|
Path(proposed_storage_path).with_name(
|
||||||
|
re.sub(r"(?:_v\d+|_\d+)(?=\.[^.]+$)", "", Path(proposed_storage_path).name)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
active_tab = request.query_params.get("tab", "ocr-review")
|
active_tab = request.query_params.get("tab", "ocr-review")
|
||||||
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "versions", "raw-ocr"}:
|
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "versions", "raw-ocr"}:
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, File, Form, Request, UploadFile
|
from fastapi import APIRouter, Depends, File, Form, Request, UploadFile
|
||||||
from fastapi.responses import HTMLResponse
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.core.config import INBOX_ROOT
|
from app.core.ingest_settings import browse_directory, get_default_ingest_root, reset_default_ingest_root, set_default_ingest_root
|
||||||
from app.db.deps import get_db
|
from app.db.deps import get_db
|
||||||
from app.logic.ingest import ingest_directory, ingest_file, ingest_inbox, ingest_uploaded_file
|
from app.logic.ingest import ingest_directory, ingest_file, ingest_inbox, ingest_uploaded_file
|
||||||
|
|
||||||
|
|
@ -16,18 +16,41 @@ templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/", response_class=HTMLResponse)
|
@router.get("/", response_class=HTMLResponse)
|
||||||
def ingest_home(request: Request):
|
def ingest_home(request: Request, browse_path: str = ""):
|
||||||
|
default_ingest_root = get_default_ingest_root()
|
||||||
|
current_browse_path = browse_path.strip() or default_ingest_root
|
||||||
|
browser = browse_directory(current_browse_path)
|
||||||
|
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request=request,
|
request=request,
|
||||||
name="ingest/index.html",
|
name="ingest/index.html",
|
||||||
context={
|
context={
|
||||||
"request": request,
|
"request": request,
|
||||||
"inbox_root": INBOX_ROOT,
|
"default_ingest_root": default_ingest_root,
|
||||||
|
"current_browse_path": current_browse_path,
|
||||||
|
"browser": browser,
|
||||||
"active_page": "ingest",
|
"active_page": "ingest",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/set-default-root", response_class=RedirectResponse)
|
||||||
|
def set_default_root(directory_path: str = Form("")):
|
||||||
|
chosen = Path(directory_path).expanduser().resolve()
|
||||||
|
if chosen.exists() and chosen.is_dir():
|
||||||
|
saved = set_default_ingest_root(str(chosen))
|
||||||
|
return RedirectResponse(url=f"/ingest/?browse_path={saved}", status_code=303)
|
||||||
|
fallback = get_default_ingest_root()
|
||||||
|
return RedirectResponse(url=f"/ingest/?browse_path={fallback}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/reset-default-root", response_class=RedirectResponse)
|
||||||
|
def reset_default_root():
|
||||||
|
saved = reset_default_ingest_root()
|
||||||
|
return RedirectResponse(url=f"/ingest/?browse_path={saved}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/upload-files", response_class=HTMLResponse)
|
@router.post("/upload-files", response_class=HTMLResponse)
|
||||||
async def ingest_upload_files(
|
async def ingest_upload_files(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
|
@ -155,14 +178,20 @@ def ingest_server_directory(
|
||||||
|
|
||||||
@router.post("/inbox", response_class=HTMLResponse)
|
@router.post("/inbox", response_class=HTMLResponse)
|
||||||
def ingest_inbox_route(request: Request, db: Session = Depends(get_db)):
|
def ingest_inbox_route(request: Request, db: Session = Depends(get_db)):
|
||||||
|
default_ingest_root = get_default_ingest_root()
|
||||||
try:
|
try:
|
||||||
docs = ingest_inbox(db=db)
|
docs = ingest_directory(
|
||||||
|
db=db,
|
||||||
|
directory_path=default_ingest_root,
|
||||||
|
recursive=False,
|
||||||
|
source_system="default_root_ingest",
|
||||||
|
)
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request=request,
|
request=request,
|
||||||
name="ingest/result.html",
|
name="ingest/result.html",
|
||||||
context={
|
context={
|
||||||
"request": request,
|
"request": request,
|
||||||
"message": f"Ingested {len(docs)} file(s) from inbox.",
|
"message": f"Ingested {len(docs)} file(s) from default ingest root: {default_ingest_root}",
|
||||||
"documents": docs,
|
"documents": docs,
|
||||||
"errors": [],
|
"errors": [],
|
||||||
},
|
},
|
||||||
|
|
@ -173,7 +202,7 @@ def ingest_inbox_route(request: Request, db: Session = Depends(get_db)):
|
||||||
name="ingest/result.html",
|
name="ingest/result.html",
|
||||||
context={
|
context={
|
||||||
"request": request,
|
"request": request,
|
||||||
"message": f"Error ingesting inbox: {e}",
|
"message": f"Error ingesting default ingest root ({default_ingest_root}): {e}",
|
||||||
"documents": [],
|
"documents": [],
|
||||||
"errors": [],
|
"errors": [],
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -36,43 +36,52 @@
|
||||||
<span class="badge">{{ document.document_type }}</span>
|
<span class="badge">{{ document.document_type }}</span>
|
||||||
<span class="badge">{{ document.mime_type }}</span>
|
<span class="badge">{{ document.mime_type }}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div> <div class="card" style="margin-bottom: 0;">
|
||||||
|
<div style="display:flex; flex-direction:column; gap:0.75rem;">
|
||||||
|
<div style="display:flex; align-items:flex-end; gap:0.6rem; flex-wrap:wrap;">
|
||||||
|
<form method="post" action="/documents/{{ document.document_id }}/save-document-type" style="display:flex; align-items:flex-end; gap:0.6rem; flex-wrap:wrap; margin:0;">
|
||||||
|
<div style="position:relative;">
|
||||||
|
<label for="document_type_input">Document type</label>
|
||||||
|
<input
|
||||||
|
id="document_type_input"
|
||||||
|
type="text"
|
||||||
|
name="document_type"
|
||||||
|
value="{{ document.document_type or '' }}"
|
||||||
|
placeholder="receipt"
|
||||||
|
autocomplete="off"
|
||||||
|
style="min-width:160px; max-width:260px;"
|
||||||
|
>
|
||||||
|
<div id="document-type-suggestions" style="display:none; position:absolute; top:100%; left:0; right:0; z-index:20; background:#fff; border:1px solid #d7dce5; border-radius:12px; margin-top:0.35rem; max-height:220px; overflow-y:auto; box-shadow:0 10px 24px rgba(15,23,42,0.10);"></div>
|
||||||
|
</div>
|
||||||
|
<button type="submit" style="height:38px;">Update</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
<div class="card" style="margin-bottom: 0;">
|
<form method="post" action="/documents/{{ document.document_id }}/move-to-trash" style="margin:0;">
|
||||||
<div class="button-row">
|
<button class="danger" type="submit" style="height:38px;">Move to trash</button>
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/rerun-ocr">
|
</form>
|
||||||
<button type="submit">Re-run OCR</button>
|
</div>
|
||||||
</form>
|
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/save-ocr-corrected-pdf">
|
<form method="post" action="/documents/{{ document.document_id }}/save-pdf" style="display:flex; align-items:flex-end; gap:0.6rem; flex-wrap:wrap; margin:0;">
|
||||||
<button class="primary" type="submit">Save OCR-corrected PDF</button>
|
<div style="flex:1; min-width:260px;">
|
||||||
</form>
|
<label for="proposed_storage_path_input">Proposed path</label>
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/save-field-enriched-pdf">
|
<input
|
||||||
<button type="submit">Save field-enriched PDF</button>
|
id="proposed_storage_path_input"
|
||||||
</form>
|
type="text"
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/move-to-trash">
|
name="output_path"
|
||||||
<button class="danger" type="submit">Move to trash</button>
|
value="{{ proposed_storage_path }}"
|
||||||
|
data-default-path="{{ proposed_storage_path }}"
|
||||||
|
readonly
|
||||||
|
style="width:100%;"
|
||||||
|
>
|
||||||
|
<div id="path-override-hint" style="margin-top:0.35rem; font-size:0.85rem; color:#6b7280;">
|
||||||
|
Uses the system path unless manually edited.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<button type="button" id="toggle-path-edit" style="height:38px;">Edit path</button>
|
||||||
|
<button type="submit" class="btn btn-primary" style="height:38px;">Save Document</button>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/save-document-type" style="margin-top: 1rem;">
|
|
||||||
<div style="display:flex; align-items:flex-end; gap:0.6rem; flex-wrap:wrap;">
|
|
||||||
<div style="position:relative;">
|
|
||||||
<label for="document_type_input">Document type</label>
|
|
||||||
<input
|
|
||||||
id="document_type_input"
|
|
||||||
type="text"
|
|
||||||
name="document_type"
|
|
||||||
value="{{ document.document_type or '' }}"
|
|
||||||
placeholder="receipt"
|
|
||||||
autocomplete="off"
|
|
||||||
style="min-width:160px; max-width:260px;"
|
|
||||||
>
|
|
||||||
<div id="document-type-suggestions" style="display:none; position:absolute; top:100%; left:0; right:0; z-index:20; background:#fff; border:1px solid #d7dce5; border-radius:12px; margin-top:0.35rem; max-height:220px; overflow-y:auto; box-shadow:0 10px 24px rgba(15,23,42,0.10);"></div>
|
|
||||||
</div>
|
|
||||||
<button type="submit" style="height:38px;">Save</button>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
|
|
||||||
<div class="queue-nav-row">
|
<div class="queue-nav-row">
|
||||||
{% if prev_doc %}
|
{% if prev_doc %}
|
||||||
<a class="button-link" href="/documents/{{ prev_doc.document_id }}">← Previous</a>
|
<a class="button-link" href="/documents/{{ prev_doc.document_id }}">← Previous</a>
|
||||||
|
|
@ -505,5 +514,47 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.addEventListener("DOMContentLoaded", function () {
|
||||||
|
const pathInput = document.getElementById("proposed_storage_path_input");
|
||||||
|
const toggleBtn = document.getElementById("toggle-path-edit");
|
||||||
|
const hint = document.getElementById("path-override-hint");
|
||||||
|
if (!pathInput || !toggleBtn) return;
|
||||||
|
|
||||||
|
const defaultPath = pathInput.dataset.defaultPath || pathInput.value;
|
||||||
|
|
||||||
|
function refreshHint() {
|
||||||
|
const isReadonly = pathInput.hasAttribute("readonly");
|
||||||
|
const isDefault = pathInput.value === defaultPath;
|
||||||
|
if (!hint) return;
|
||||||
|
|
||||||
|
if (isReadonly || isDefault) {
|
||||||
|
hint.textContent = "Uses the system path unless manually edited.";
|
||||||
|
hint.style.color = "#6b7280";
|
||||||
|
} else {
|
||||||
|
hint.textContent = "Manual override active.";
|
||||||
|
hint.style.color = "#b45309";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
toggleBtn.addEventListener("click", function () {
|
||||||
|
if (pathInput.hasAttribute("readonly")) {
|
||||||
|
pathInput.removeAttribute("readonly");
|
||||||
|
toggleBtn.textContent = "Use default";
|
||||||
|
pathInput.focus();
|
||||||
|
} else {
|
||||||
|
pathInput.value = defaultPath;
|
||||||
|
pathInput.setAttribute("readonly", "readonly");
|
||||||
|
toggleBtn.textContent = "Edit path";
|
||||||
|
}
|
||||||
|
refreshHint();
|
||||||
|
});
|
||||||
|
|
||||||
|
pathInput.addEventListener("input", refreshHint);
|
||||||
|
refreshHint();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,112 @@
|
||||||
<div class="topbar">
|
<div class="topbar">
|
||||||
<div>
|
<div>
|
||||||
<h1 class="page-title">Ingest</h1>
|
<h1 class="page-title">Ingest</h1>
|
||||||
<p class="page-subtitle">Upload files or ingest from server-side paths.</p>
|
<p class="page-subtitle">Upload files, ingest from server-side paths, or use the current default ingest root.</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<h2 class="card-title">Default ingest root</h2>
|
||||||
|
|
||||||
|
<form method="post" action="/ingest/set-default-root">
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-field full">
|
||||||
|
<label for="default_ingest_root">Default ingest root</label>
|
||||||
|
<input id="default_ingest_root" type="text" name="directory_path" value="{{ default_ingest_root }}" placeholder="/mnt/data/shared/scans/processed">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="button-row" style="margin-top: 1rem;">
|
||||||
|
<button class="primary" type="submit">Save default</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div class="button-row" style="margin-top: 1rem;">
|
||||||
|
<form method="post" action="/ingest/reset-default-root">
|
||||||
|
<button type="submit">Revert to /mnt/data/shared/scans/processed</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<form method="get" action="/ingest/">
|
||||||
|
<input type="hidden" name="browse_path" value="{{ default_ingest_root }}">
|
||||||
|
<button type="submit">Browse current default</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<form method="post" action="/ingest/inbox">
|
||||||
|
<button type="submit">Ingest current default root</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<h2 class="card-title">Browse directories</h2>
|
||||||
|
|
||||||
|
<form method="get" action="/ingest/">
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-field full">
|
||||||
|
<label for="browse_path">Browse path</label>
|
||||||
|
<input id="browse_path" type="text" name="browse_path" value="{{ current_browse_path }}" placeholder="/mnt/data/shared/scans/processed">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="button-row" style="margin-top: 1rem;">
|
||||||
|
<button class="primary" type="submit">Browse</button>
|
||||||
|
{% if browser.parent %}
|
||||||
|
<a class="button-link" href="/ingest/?browse_path={{ browser.parent }}">Up one level</a>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{% if browser.error %}
|
||||||
|
<p class="empty-state">{{ browser.error }}</p>
|
||||||
|
<p class="page-subtitle">This app can only browse directories that exist on svr-02 or are mounted there.</p>
|
||||||
|
{% else %}
|
||||||
|
<p class="page-subtitle">Current path: {{ browser.path }}</p>
|
||||||
|
|
||||||
|
{% if browser.entries %}
|
||||||
|
<div class="table-wrap">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Kind</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for entry in browser.entries %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ entry.name }}</td>
|
||||||
|
<td>{% if entry.is_dir %}Directory{% else %}File{% endif %}</td>
|
||||||
|
<td>
|
||||||
|
<div class="button-row">
|
||||||
|
{% if entry.is_dir %}
|
||||||
|
<a class="button-link" href="/ingest/?browse_path={{ entry.path }}">Open</a>
|
||||||
|
<form method="post" action="/ingest/set-default-root">
|
||||||
|
<input type="hidden" name="directory_path" value="{{ entry.path }}">
|
||||||
|
<button type="submit">Set default</button>
|
||||||
|
</form>
|
||||||
|
<form method="post" action="/ingest/server-directory">
|
||||||
|
<input type="hidden" name="directory_path" value="{{ entry.path }}">
|
||||||
|
<input type="hidden" name="recursive" value="1">
|
||||||
|
<button type="submit">Ingest directory</button>
|
||||||
|
</form>
|
||||||
|
{% else %}
|
||||||
|
<form method="post" action="/ingest/server-file">
|
||||||
|
<input type="hidden" name="file_path" value="{{ entry.path }}">
|
||||||
|
<button type="submit">Ingest file</button>
|
||||||
|
</form>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<p class="empty-state">No entries found in this directory.</p>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2 class="card-title">Upload files</h2>
|
<h2 class="card-title">Upload files</h2>
|
||||||
<form method="post" action="/ingest/upload-files" enctype="multipart/form-data">
|
<form method="post" action="/ingest/upload-files" enctype="multipart/form-data">
|
||||||
|
|
@ -36,7 +138,7 @@
|
||||||
<form method="post" action="/ingest/server-file" style="margin-bottom: 1.25rem;">
|
<form method="post" action="/ingest/server-file" style="margin-bottom: 1.25rem;">
|
||||||
<div class="form-field full">
|
<div class="form-field full">
|
||||||
<label>Ingest one server file</label>
|
<label>Ingest one server file</label>
|
||||||
<input type="text" name="file_path" placeholder="/mnt/storage/.../file.pdf" required>
|
<input type="text" name="file_path" placeholder="/mnt/data/.../file.pdf" required>
|
||||||
</div>
|
</div>
|
||||||
<div class="button-row" style="margin-top: 1rem;">
|
<div class="button-row" style="margin-top: 1rem;">
|
||||||
<button type="submit">Ingest file</button>
|
<button type="submit">Ingest file</button>
|
||||||
|
|
@ -46,7 +148,7 @@
|
||||||
<form method="post" action="/ingest/server-directory" style="margin-bottom: 1.25rem;">
|
<form method="post" action="/ingest/server-directory" style="margin-bottom: 1.25rem;">
|
||||||
<div class="form-field full">
|
<div class="form-field full">
|
||||||
<label>Ingest one server directory</label>
|
<label>Ingest one server directory</label>
|
||||||
<input type="text" name="directory_path" placeholder="/mnt/storage/.../incoming" required>
|
<input type="text" name="directory_path" placeholder="/mnt/data/.../incoming" required>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-field">
|
<div class="form-field">
|
||||||
<label><input type="checkbox" name="recursive" value="1"> Recursive</label>
|
<label><input type="checkbox" name="recursive" value="1"> Recursive</label>
|
||||||
|
|
@ -55,16 +157,6 @@
|
||||||
<button type="submit">Ingest directory</button>
|
<button type="submit">Ingest directory</button>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
<form method="post" action="/ingest/inbox">
|
|
||||||
<div class="form-field full">
|
|
||||||
<label>Inbox root</label>
|
|
||||||
<input type="text" value="{{ inbox_root }}" readonly>
|
|
||||||
</div>
|
|
||||||
<div class="button-row" style="margin-top: 1rem;">
|
|
||||||
<button type="submit">Ingest inbox</button>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
</div>
|
</div>
|
||||||
</main>
|
</main>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue