diff --git a/app/main.py b/app/main.py index 365fc3b..e68be00 100644 --- a/app/main.py +++ b/app/main.py @@ -4,6 +4,8 @@ from fastapi.staticfiles import StaticFiles from app.routes.documents import router as documents_router from app.routes.health import router as health_router from app.routes.ingest import router as ingest_router +from app.routes.queue import router as queue_router +from app.routes.trash import router as trash_router app = FastAPI(title="document-processor") @@ -12,6 +14,8 @@ app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), na app.include_router(health_router) app.include_router(documents_router) app.include_router(ingest_router) +app.include_router(queue_router) +app.include_router(trash_router) @app.get("/") diff --git a/app/models/document.py b/app/models/document.py index e0422ae..13a4c23 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -1,5 +1,5 @@ from datetime import datetime -from sqlalchemy import String, Integer, DateTime, Text +from sqlalchemy import String, Integer, DateTime, Text, Boolean from sqlalchemy.orm import Mapped, mapped_column, relationship from app.db.base import Base @@ -31,6 +31,9 @@ class Document(Base): storage_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False) review_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False) + is_trashed: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) + trashed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False) updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) diff --git a/app/routes/documents.py b/app/routes/documents.py index 906573e..9f5b381 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -161,7 +161,7 @@ def _extracted_field_form_values(document: Document, request: Request) -> dict: @router.get("/", response_class=HTMLResponse) def list_documents(request: Request, db: Session = Depends(get_db)): - documents = db.query(Document).order_by(Document.created_at.desc()).all() + documents = db.query(Document).filter(Document.is_trashed.is_(False)).order_by(Document.created_at.desc()).all() return templates.TemplateResponse( request=request, name="documents/list.html", @@ -198,6 +198,21 @@ def save_ocr_corrected_pdf(document_id: str, db: Session = Depends(get_db)): return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) + +@router.post("/{document_id}/move-to-trash", response_class=RedirectResponse) +def move_to_trash(document_id: str, db: Session = Depends(get_db)): + document = db.query(Document).filter(Document.document_id == document_id).first() + if document is None: + return RedirectResponse(url="/documents/", status_code=303) + + from datetime import datetime + document.is_trashed = True + document.trashed_at = datetime.utcnow() + db.commit() + + return RedirectResponse(url="/documents/", status_code=303) + + @router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse) def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)): document = db.query(Document).filter(Document.document_id == document_id).first() diff --git a/app/routes/queue.py b/app/routes/queue.py new file mode 100644 index 0000000..87855f2 --- /dev/null +++ b/app/routes/queue.py @@ -0,0 +1,58 @@ +from pathlib import Path + +from fastapi import APIRouter, Depends, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates +from sqlalchemy import exists +from sqlalchemy.orm import Session, selectinload + +from app.db.deps import get_db +from app.models.document import Document +from app.models.extracted_field import ExtractedField + +router = APIRouter(prefix="/queue", tags=["queue"]) + +BASE_DIR = Path(__file__).resolve().parent.parent +templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) + + +@router.get("/", response_class=HTMLResponse) +def review_queue(request: Request, db: Session = Depends(get_db)): + needs_ocr_review = ( + db.query(Document) + .filter(Document.is_trashed.is_(False)).filter(Document.review_status != "reviewed") + .order_by(Document.created_at.asc()) + .all() + ) + + needs_field_extraction = ( + db.query(Document) + .options(selectinload(Document.extracted_fields)) + .filter(Document.is_trashed.is_(False)).filter(Document.review_status == "reviewed") + .filter(~exists().where(ExtractedField.document_id == Document.id)) + .order_by(Document.updated_at.asc()) + .all() + ) + + recently_updated = ( + db.query(Document) + .filter(Document.is_trashed.is_(False)).order_by(Document.updated_at.desc()) + .limit(25) + .all() + ) + + next_ocr = needs_ocr_review[0] if needs_ocr_review else None + next_fields = needs_field_extraction[0] if needs_field_extraction else None + + return templates.TemplateResponse( + request=request, + name="queue/index.html", + context={ + "request": request, + "needs_ocr_review": needs_ocr_review, + "needs_field_extraction": needs_field_extraction, + "recently_updated": recently_updated, + "next_ocr": next_ocr, + "next_fields": next_fields, + }, + ) diff --git a/app/routes/trash.py b/app/routes/trash.py new file mode 100644 index 0000000..1457476 --- /dev/null +++ b/app/routes/trash.py @@ -0,0 +1,65 @@ +from datetime import datetime +from pathlib import Path + +from fastapi import APIRouter, Depends, Request +from fastapi.responses import HTMLResponse, RedirectResponse +from fastapi.templating import Jinja2Templates +from sqlalchemy.orm import Session + +from app.db.deps import get_db +from app.models.document import Document +from app.models.document_version import DocumentVersion +from app.models.extracted_field import ExtractedField +from app.models.layer1_candidate import Layer1Candidate +from app.models.text_version import TextVersion + +router = APIRouter(prefix="/trash", tags=["trash"]) + +BASE_DIR = Path(__file__).resolve().parent.parent +templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) + + +@router.get("/", response_class=HTMLResponse) +def trash_index(request: Request, db: Session = Depends(get_db)): + documents = ( + db.query(Document) + .filter(Document.is_trashed.is_(True)) + .order_by(Document.trashed_at.desc(), Document.updated_at.desc()) + .all() + ) + return templates.TemplateResponse( + request=request, + name="trash/index.html", + context={"request": request, "documents": documents}, + ) + + +@router.post("/{document_id}/restore", response_class=RedirectResponse) +def restore_document(document_id: str, db: Session = Depends(get_db)): + document = db.query(Document).filter(Document.document_id == document_id).first() + if document is None: + return RedirectResponse(url="/trash/", status_code=303) + + document.is_trashed = False + document.trashed_at = None + db.commit() + + return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) + + +@router.post("/{document_id}/delete", response_class=RedirectResponse) +def permanently_delete_document(document_id: str, db: Session = Depends(get_db)): + document = db.query(Document).filter(Document.document_id == document_id).first() + if document is None: + return RedirectResponse(url="/trash/", status_code=303) + + doc_pk = document.id + + db.query(Layer1Candidate).filter(Layer1Candidate.document_id == doc_pk).delete() + db.query(ExtractedField).filter(ExtractedField.document_id == doc_pk).delete() + db.query(TextVersion).filter(TextVersion.document_id == doc_pk).delete() + db.query(DocumentVersion).filter(DocumentVersion.document_id == doc_pk).delete() + db.delete(document) + db.commit() + + return RedirectResponse(url="/trash/", status_code=303) diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html index 61b775a..6a74f71 100644 --- a/app/templates/documents/detail.html +++ b/app/templates/documents/detail.html @@ -57,6 +57,16 @@ {% endif %} + +
+ Open review queue | + Open trash +
+ + +| Document | +Type | +Review status | +Updated | +
|---|---|---|---|
| {{ doc.document_id }} | +{{ doc.document_type }} | +{{ doc.review_status }} | +{{ doc.updated_at }} | +
No documents currently need OCR review.
+ {% endif %} + +| Document | +Type | +Review status | +Updated | +
|---|---|---|---|
| {{ doc.document_id }} | +{{ doc.document_type }} | +{{ doc.review_status }} | +{{ doc.updated_at }} | +
No reviewed documents are waiting on field extraction.
+ {% endif %} + +| Document | +Type | +Review status | +Current path | +Updated | +
|---|---|---|---|---|
| {{ doc.document_id }} | +{{ doc.document_type }} | +{{ doc.review_status }} | +{{ doc.current_path }} | +{{ doc.updated_at }} | +
Back to documents | Open review queue
+ +| Document | +Type | +Review status | +Trashed at | +Current path | +Actions | +
|---|---|---|---|---|---|
| {{ doc.document_id }} | +{{ doc.document_type }} | +{{ doc.review_status }} | +{{ doc.trashed_at }} | +{{ doc.current_path }} | ++ + + | +
Trash is empty.
+ {% endif %} + +