feat: add review queue and trash workflow
This commit is contained in:
parent
5b9e8f0d01
commit
bb22e2585a
|
|
@ -4,6 +4,8 @@ from fastapi.staticfiles import StaticFiles
|
||||||
from app.routes.documents import router as documents_router
|
from app.routes.documents import router as documents_router
|
||||||
from app.routes.health import router as health_router
|
from app.routes.health import router as health_router
|
||||||
from app.routes.ingest import router as ingest_router
|
from app.routes.ingest import router as ingest_router
|
||||||
|
from app.routes.queue import router as queue_router
|
||||||
|
from app.routes.trash import router as trash_router
|
||||||
|
|
||||||
app = FastAPI(title="document-processor")
|
app = FastAPI(title="document-processor")
|
||||||
|
|
||||||
|
|
@ -12,6 +14,8 @@ app.mount("/files", StaticFiles(directory="/mnt/storage/document-processor"), na
|
||||||
app.include_router(health_router)
|
app.include_router(health_router)
|
||||||
app.include_router(documents_router)
|
app.include_router(documents_router)
|
||||||
app.include_router(ingest_router)
|
app.include_router(ingest_router)
|
||||||
|
app.include_router(queue_router)
|
||||||
|
app.include_router(trash_router)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from sqlalchemy import String, Integer, DateTime, Text
|
from sqlalchemy import String, Integer, DateTime, Text, Boolean
|
||||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
from app.db.base import Base
|
from app.db.base import Base
|
||||||
|
|
@ -31,6 +31,9 @@ class Document(Base):
|
||||||
storage_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False)
|
storage_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False)
|
||||||
review_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False)
|
review_status: Mapped[str] = mapped_column(String(50), default="ingested", nullable=False)
|
||||||
|
|
||||||
|
is_trashed: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
|
||||||
|
trashed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||||
|
|
||||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,7 @@ def _extracted_field_form_values(document: Document, request: Request) -> dict:
|
||||||
|
|
||||||
@router.get("/", response_class=HTMLResponse)
|
@router.get("/", response_class=HTMLResponse)
|
||||||
def list_documents(request: Request, db: Session = Depends(get_db)):
|
def list_documents(request: Request, db: Session = Depends(get_db)):
|
||||||
documents = db.query(Document).order_by(Document.created_at.desc()).all()
|
documents = db.query(Document).filter(Document.is_trashed.is_(False)).order_by(Document.created_at.desc()).all()
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request=request,
|
request=request,
|
||||||
name="documents/list.html",
|
name="documents/list.html",
|
||||||
|
|
@ -198,6 +198,21 @@ def save_ocr_corrected_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{document_id}/move-to-trash", response_class=RedirectResponse)
|
||||||
|
def move_to_trash(document_id: str, db: Session = Depends(get_db)):
|
||||||
|
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||||
|
if document is None:
|
||||||
|
return RedirectResponse(url="/documents/", status_code=303)
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
document.is_trashed = True
|
||||||
|
document.trashed_at = datetime.utcnow()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return RedirectResponse(url="/documents/", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
|
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
|
||||||
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||||
document = db.query(Document).filter(Document.document_id == document_id).first()
|
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, Request
|
||||||
|
from fastapi.responses import HTMLResponse
|
||||||
|
from fastapi.templating import Jinja2Templates
|
||||||
|
from sqlalchemy import exists
|
||||||
|
from sqlalchemy.orm import Session, selectinload
|
||||||
|
|
||||||
|
from app.db.deps import get_db
|
||||||
|
from app.models.document import Document
|
||||||
|
from app.models.extracted_field import ExtractedField
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/queue", tags=["queue"])
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/", response_class=HTMLResponse)
|
||||||
|
def review_queue(request: Request, db: Session = Depends(get_db)):
|
||||||
|
needs_ocr_review = (
|
||||||
|
db.query(Document)
|
||||||
|
.filter(Document.is_trashed.is_(False)).filter(Document.review_status != "reviewed")
|
||||||
|
.order_by(Document.created_at.asc())
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
needs_field_extraction = (
|
||||||
|
db.query(Document)
|
||||||
|
.options(selectinload(Document.extracted_fields))
|
||||||
|
.filter(Document.is_trashed.is_(False)).filter(Document.review_status == "reviewed")
|
||||||
|
.filter(~exists().where(ExtractedField.document_id == Document.id))
|
||||||
|
.order_by(Document.updated_at.asc())
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
recently_updated = (
|
||||||
|
db.query(Document)
|
||||||
|
.filter(Document.is_trashed.is_(False)).order_by(Document.updated_at.desc())
|
||||||
|
.limit(25)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
next_ocr = needs_ocr_review[0] if needs_ocr_review else None
|
||||||
|
next_fields = needs_field_extraction[0] if needs_field_extraction else None
|
||||||
|
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
request=request,
|
||||||
|
name="queue/index.html",
|
||||||
|
context={
|
||||||
|
"request": request,
|
||||||
|
"needs_ocr_review": needs_ocr_review,
|
||||||
|
"needs_field_extraction": needs_field_extraction,
|
||||||
|
"recently_updated": recently_updated,
|
||||||
|
"next_ocr": next_ocr,
|
||||||
|
"next_fields": next_fields,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,65 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, Request
|
||||||
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||||
|
from fastapi.templating import Jinja2Templates
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.db.deps import get_db
|
||||||
|
from app.models.document import Document
|
||||||
|
from app.models.document_version import DocumentVersion
|
||||||
|
from app.models.extracted_field import ExtractedField
|
||||||
|
from app.models.layer1_candidate import Layer1Candidate
|
||||||
|
from app.models.text_version import TextVersion
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/trash", tags=["trash"])
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/", response_class=HTMLResponse)
|
||||||
|
def trash_index(request: Request, db: Session = Depends(get_db)):
|
||||||
|
documents = (
|
||||||
|
db.query(Document)
|
||||||
|
.filter(Document.is_trashed.is_(True))
|
||||||
|
.order_by(Document.trashed_at.desc(), Document.updated_at.desc())
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
request=request,
|
||||||
|
name="trash/index.html",
|
||||||
|
context={"request": request, "documents": documents},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{document_id}/restore", response_class=RedirectResponse)
|
||||||
|
def restore_document(document_id: str, db: Session = Depends(get_db)):
|
||||||
|
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||||
|
if document is None:
|
||||||
|
return RedirectResponse(url="/trash/", status_code=303)
|
||||||
|
|
||||||
|
document.is_trashed = False
|
||||||
|
document.trashed_at = None
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{document_id}/delete", response_class=RedirectResponse)
|
||||||
|
def permanently_delete_document(document_id: str, db: Session = Depends(get_db)):
|
||||||
|
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||||
|
if document is None:
|
||||||
|
return RedirectResponse(url="/trash/", status_code=303)
|
||||||
|
|
||||||
|
doc_pk = document.id
|
||||||
|
|
||||||
|
db.query(Layer1Candidate).filter(Layer1Candidate.document_id == doc_pk).delete()
|
||||||
|
db.query(ExtractedField).filter(ExtractedField.document_id == doc_pk).delete()
|
||||||
|
db.query(TextVersion).filter(TextVersion.document_id == doc_pk).delete()
|
||||||
|
db.query(DocumentVersion).filter(DocumentVersion.document_id == doc_pk).delete()
|
||||||
|
db.delete(document)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return RedirectResponse(url="/trash/", status_code=303)
|
||||||
|
|
@ -57,6 +57,16 @@
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<a href="/queue/">Open review queue</a> |
|
||||||
|
<a href="/trash/">Open trash</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<form method="post" action="/documents/{{ document.document_id }}/move-to-trash" style="margin-bottom: 1rem;">
|
||||||
|
<button type="submit">Move to trash</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
<h2>Document metadata</h2>
|
<h2>Document metadata</h2>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Type: {{ document.document_type }}</li>
|
<li>Type: {{ document.document_type }}</li>
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
<title>Documents</title>
|
<title>Documents</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
<p><a href="/trash/">Open trash</a></p>
|
||||||
|
<p><a href="/queue/">Open review queue</a></p>
|
||||||
<h1>Documents</h1>
|
<h1>Documents</h1>
|
||||||
|
|
||||||
<p><a href="/documents/test-ingest">Create test ingest</a></p>
|
<p><a href="/documents/test-ingest">Create test ingest</a></p>
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Review Queue</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: sans-serif; }
|
||||||
|
table { border-collapse: collapse; width: 100%; margin-bottom: 2rem; }
|
||||||
|
th, td { border: 1px solid #ccc; padding: 0.5rem; text-align: left; vertical-align: top; }
|
||||||
|
th { background: #f3f3f3; }
|
||||||
|
.actions { margin-bottom: 1.5rem; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p><a href="/trash/">Open trash</a></p>
|
||||||
|
<p><a href="/documents/">Back to documents</a></p>
|
||||||
|
|
||||||
|
<h1>Review Queue</h1>
|
||||||
|
|
||||||
|
<div class="actions">
|
||||||
|
{% if next_ocr %}
|
||||||
|
<a href="/documents/{{ next_ocr.document_id }}">Next needing OCR review</a>
|
||||||
|
{% endif %}
|
||||||
|
{% if next_ocr and next_fields %} | {% endif %}
|
||||||
|
{% if next_fields %}
|
||||||
|
<a href="/documents/{{ next_fields.document_id }}">Next needing field extraction</a>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h2>Needs OCR review ({{ needs_ocr_review|length }})</h2>
|
||||||
|
{% if needs_ocr_review %}
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Document</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Review status</th>
|
||||||
|
<th>Updated</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for doc in needs_ocr_review %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
|
||||||
|
<td>{{ doc.document_type }}</td>
|
||||||
|
<td>{{ doc.review_status }}</td>
|
||||||
|
<td>{{ doc.updated_at }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<p>No documents currently need OCR review.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<h2>Needs field extraction ({{ needs_field_extraction|length }})</h2>
|
||||||
|
{% if needs_field_extraction %}
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Document</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Review status</th>
|
||||||
|
<th>Updated</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for doc in needs_field_extraction %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
|
||||||
|
<td>{{ doc.document_type }}</td>
|
||||||
|
<td>{{ doc.review_status }}</td>
|
||||||
|
<td>{{ doc.updated_at }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<p>No reviewed documents are waiting on field extraction.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<h2>Recently updated</h2>
|
||||||
|
{% if recently_updated %}
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Document</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Review status</th>
|
||||||
|
<th>Current path</th>
|
||||||
|
<th>Updated</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for doc in recently_updated %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
|
||||||
|
<td>{{ doc.document_type }}</td>
|
||||||
|
<td>{{ doc.review_status }}</td>
|
||||||
|
<td>{{ doc.current_path }}</td>
|
||||||
|
<td>{{ doc.updated_at }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endif %}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Trash</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: sans-serif; }
|
||||||
|
table { border-collapse: collapse; width: 100%; }
|
||||||
|
th, td { border: 1px solid #ccc; padding: 0.5rem; text-align: left; vertical-align: top; }
|
||||||
|
th { background: #f3f3f3; }
|
||||||
|
form { display: inline; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p><a href="/documents/">Back to documents</a> | <a href="/queue/">Open review queue</a></p>
|
||||||
|
|
||||||
|
<h1>Trash</h1>
|
||||||
|
|
||||||
|
{% if documents %}
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Document</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Review status</th>
|
||||||
|
<th>Trashed at</th>
|
||||||
|
<th>Current path</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for doc in documents %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="/documents/{{ doc.document_id }}">{{ doc.document_id }}</a></td>
|
||||||
|
<td>{{ doc.document_type }}</td>
|
||||||
|
<td>{{ doc.review_status }}</td>
|
||||||
|
<td>{{ doc.trashed_at }}</td>
|
||||||
|
<td>{{ doc.current_path }}</td>
|
||||||
|
<td>
|
||||||
|
<form method="post" action="/trash/{{ doc.document_id }}/restore">
|
||||||
|
<button type="submit">Restore</button>
|
||||||
|
</form>
|
||||||
|
<form method="post" action="/trash/{{ doc.document_id }}/delete" style="margin-left: 0.5rem;">
|
||||||
|
<button type="submit">Delete permanently</button>
|
||||||
|
</form>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% else %}
|
||||||
|
<p>Trash is empty.</p>
|
||||||
|
{% endif %}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Loading…
Reference in New Issue