feat: add review queue and trash workflow

This commit is contained in:
Sean McElwain 2026-04-03 18:22:18 -05:00
parent 5b9e8f0d01
commit bb22e2585a
9 changed files with 322 additions and 2 deletions

View File

@ -4,6 +4,8 @@ from fastapi.staticfiles import StaticFiles
from app.routes.documents import router as documents_router from app.routes.documents import router as documents_router
from app.routes.health import router as health_router from app.routes.health import router as health_router
from app.routes.ingest import router as ingest_router from app.routes.ingest import router as ingest_router
from app.routes.queue import router as queue_router
from app.routes.trash import router as trash_router
app = FastAPI(title="document-processor") app = FastAPI(title="document-processor")
@ -12,6 +14,8 @@ app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), na
app.include_router(health_router) app.include_router(health_router)
app.include_router(documents_router) app.include_router(documents_router)
app.include_router(ingest_router) app.include_router(ingest_router)
app.include_router(queue_router)
app.include_router(trash_router)
@app.get("/") @app.get("/")

View File

@ -1,5 +1,5 @@
from datetime import datetime from datetime import datetime
from sqlalchemy import String, Integer, DateTime, Text from sqlalchemy import String, Integer, DateTime, Text, Boolean
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base from app.db.base import Base
@ -31,6 +31,9 @@ class Document(Base):
storage_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False) storage_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False)
review_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False) review_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False)
is_trashed: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
trashed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)

View File

@ -161,7 +161,7 @@ def _extracted_field_form_values(document: Document, request: Request) -> dict:
@router.get("/", response_class=HTMLResponse) @router.get("/", response_class=HTMLResponse)
def list_documents(request: Request, db: Session = Depends(get_db)): def list_documents(request: Request, db: Session = Depends(get_db)):
documents = db.query(Document).order_by(Document.created_at.desc()).all() documents = db.query(Document).filter(Document.is_trashed.is_(False)).order_by(Document.created_at.desc()).all()
return templates.TemplateResponse( return templates.TemplateResponse(
request=request, request=request,
name="documents/list.html", name="documents/list.html",
@ -198,6 +198,21 @@ def save_ocr_corrected_pdf(document_id: str, db: Session = Depends(get_db)):
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
@router.post("/{document_id}/move-to-trash", response_class=RedirectResponse)
def move_to_trash(document_id: str, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
if document is None:
return RedirectResponse(url="/documents/", status_code=303)
from datetime import datetime
document.is_trashed = True
document.trashed_at = datetime.utcnow()
db.commit()
return RedirectResponse(url="/documents/", status_code=303)
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse) @router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)): def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first() document = db.query(Document).filter(Document.document_id == document_id).first()

58
app/routes/queue.py Normal file
View File

@ -0,0 +1,58 @@
from pathlib import Path
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy import exists
from sqlalchemy.orm import Session, selectinload
from app.db.deps import get_db
from app.models.document import Document
from app.models.extracted_field import ExtractedField
router = APIRouter(prefix="/queue", tags=["queue"])
BASE_DIR = Path(__file__).resolve().parent.parent
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
@router.get("/", response_class=HTMLResponse)
def review_queue(request: Request, db: Session = Depends(get_db)):
needs_ocr_review = (
db.query(Document)
.filter(Document.is_trashed.is_(False)).filter(Document.review_status != "reviewed")
.order_by(Document.created_at.asc())
.all()
)
needs_field_extraction = (
db.query(Document)
.options(selectinload(Document.extracted_fields))
.filter(Document.is_trashed.is_(False)).filter(Document.review_status == "reviewed")
.filter(~exists().where(ExtractedField.document_id == Document.id))
.order_by(Document.updated_at.asc())
.all()
)
recently_updated = (
db.query(Document)
.filter(Document.is_trashed.is_(False)).order_by(Document.updated_at.desc())
.limit(25)
.all()
)
next_ocr = needs_ocr_review[0] if needs_ocr_review else None
next_fields = needs_field_extraction[0] if needs_field_extraction else None
return templates.TemplateResponse(
request=request,
name="queue/index.html",
context={
"request": request,
"needs_ocr_review": needs_ocr_review,
"needs_field_extraction": needs_field_extraction,
"recently_updated": recently_updated,
"next_ocr": next_ocr,
"next_fields": next_fields,
},
)

65
app/routes/trash.py Normal file
View File

@ -0,0 +1,65 @@
from datetime import datetime
from pathlib import Path
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session
from app.db.deps import get_db
from app.models.document import Document
from app.models.document_version import DocumentVersion
from app.models.extracted_field import ExtractedField
from app.models.layer1_candidate import Layer1Candidate
from app.models.text_version import TextVersion
router = APIRouter(prefix="/trash", tags=["trash"])
BASE_DIR = Path(__file__).resolve().parent.parent
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
@router.get("/", response_class=HTMLResponse)
def trash_index(request: Request, db: Session = Depends(get_db)):
documents = (
db.query(Document)
.filter(Document.is_trashed.is_(True))
.order_by(Document.trashed_at.desc(), Document.updated_at.desc())
.all()
)
return templates.TemplateResponse(
request=request,
name="trash/index.html",
context={"request": request, "documents": documents},
)
@router.post("/{document_id}/restore", response_class=RedirectResponse)
def restore_document(document_id: str, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
if document is None:
return RedirectResponse(url="/trash/", status_code=303)
document.is_trashed = False
document.trashed_at = None
db.commit()
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
@router.post("/{document_id}/delete", response_class=RedirectResponse)
def permanently_delete_document(document_id: str, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
if document is None:
return RedirectResponse(url="/trash/", status_code=303)
doc_pk = document.id
db.query(Layer1Candidate).filter(Layer1Candidate.document_id == doc_pk).delete()
db.query(ExtractedField).filter(ExtractedField.document_id == doc_pk).delete()
db.query(TextVersion).filter(TextVersion.document_id == doc_pk).delete()
db.query(DocumentVersion).filter(DocumentVersion.document_id == doc_pk).delete()
db.delete(document)
db.commit()
return RedirectResponse(url="/trash/", status_code=303)

View File

@ -57,6 +57,16 @@
</div> </div>
{% endif %} {% endif %}
<p>
<a href="/queue/">Open review queue</a> |
<a href="/trash/">Open trash</a>
</p>
<form method="post" action="/documents/{{ document.document_id }}/move-to-trash" style="margin-bottom: 1rem;">
<button type="submit">Move to trash</button>
</form>
<h2>Document metadata</h2> <h2>Document metadata</h2>
<ul> <ul>
<li>Type: {{ document.document_type }}</li> <li>Type: {{ document.document_type }}</li>

View File

@ -5,6 +5,8 @@
<title>Documents</title> <title>Documents</title>
</head> </head>
<body> <body>
<p><a href="/trash/">Open trash</a></p>
<p><a href="/queue/">Open review queue</a></p>
<h1>Documents</h1> <h1>Documents</h1>
<p><a href="/documents/test-ingest">Create test ingest</a></p> <p><a href="/documents/test-ingest">Create test ingest</a></p>

View File

@ -0,0 +1,108 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Review Queue</title>
<style>
body { font-family: sans-serif; }
table { border-collapse: collapse; width: 100%; margin-bottom: 2rem; }
th, td { border: 1px solid #ccc; padding: 0.5rem; text-align: left; vertical-align: top; }
th { background: #f3f3f3; }
.actions { margin-bottom: 1.5rem; }
</style>
</head>
<body>
<p><a href="/trash/">Open trash</a></p>
<p><a href="/documents/">Back to documents</a></p>
<h1>Review Queue</h1>
<div class="actions">
{% if next_ocr %}
<a href="/documents/{{ next_ocr.document_id }}">Next needing OCR review</a>
{% endif %}
{% if next_ocr and next_fields %} | {% endif %}
{% if next_fields %}
<a href="/documents/{{ next_fields.document_id }}">Next needing field extraction</a>
{% endif %}
</div>
<h2>Needs OCR review ({{ needs_ocr_review|length }})</h2>
{% if needs_ocr_review %}
<table>
<thead>
<tr>
<th>Document</th>
<th>Type</th>
<th>Review status</th>
<th>Updated</th>
</tr>
</thead>
<tbody>
{% for doc in needs_ocr_review %}
<tr>
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
<td>{{ doc.document_type }}</td>
<td>{{ doc.review_status }}</td>
<td>{{ doc.updated_at }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p>No documents currently need OCR review.</p>
{% endif %}
<h2>Needs field extraction ({{ needs_field_extraction|length }})</h2>
{% if needs_field_extraction %}
<table>
<thead>
<tr>
<th>Document</th>
<th>Type</th>
<th>Review status</th>
<th>Updated</th>
</tr>
</thead>
<tbody>
{% for doc in needs_field_extraction %}
<tr>
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
<td>{{ doc.document_type }}</td>
<td>{{ doc.review_status }}</td>
<td>{{ doc.updated_at }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p>No reviewed documents are waiting on field extraction.</p>
{% endif %}
<h2>Recently updated</h2>
{% if recently_updated %}
<table>
<thead>
<tr>
<th>Document</th>
<th>Type</th>
<th>Review status</th>
<th>Current path</th>
<th>Updated</th>
</tr>
</thead>
<tbody>
{% for doc in recently_updated %}
<tr>
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
<td>{{ doc.document_type }}</td>
<td>{{ doc.review_status }}</td>
<td>{{ doc.current_path }}</td>
<td>{{ doc.updated_at }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endif %}
</body>
</html>

View File

@ -0,0 +1,55 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Trash</title>
<style>
body { font-family: sans-serif; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ccc; padding: 0.5rem; text-align: left; vertical-align: top; }
th { background: #f3f3f3; }
form { display: inline; }
</style>
</head>
<body>
<p><a href="/documents/">Back to documents</a> | <a href="/queue/">Open review queue</a></p>
<h1>Trash</h1>
{% if documents %}
<table>
<thead>
<tr>
<th>Document</th>
<th>Type</th>
<th>Review status</th>
<th>Trashed at</th>
<th>Current path</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for doc in documents %}
<tr>
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
<td>{{ doc.document_type }}</td>
<td>{{ doc.review_status }}</td>
<td>{{ doc.trashed_at }}</td>
<td>{{ doc.current_path }}</td>
<td>
<form method="post" action="/trash/{{ doc.document_id }}/restore">
<button type="submit">Restore</button>
</form>
<form method="post" action="/trash/{{ doc.document_id }}/delete" style="margin-left: 0.5rem;">
<button type="submit">Delete permanently</button>
</form>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p>Trash is empty.</p>
{% endif %}
</body>
</html>