document-processor/app/routes/queue.py

133 lines
4.2 KiB
Python

from pathlib import Path
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy import exists
from sqlalchemy.orm import Session, selectinload
from app.db.deps import get_db
from app.models.document import Document
from app.models.extracted_field import ExtractedField
from app.models.receipt_line_item import ReceiptLineItem
router = APIRouter(prefix="/queue", tags=["queue"])
BASE_DIR = Path(__file__).resolve().parent.parent
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
def _needs_quality_review(item: ReceiptLineItem) -> bool:
if (item.item_category or "").lower() != "cocktail":
return False
extra = item.extra_json or {}
status = str(extra.get("quality_status") or "").strip().lower()
rating = str(extra.get("quality_rating") or "").strip()
if status == "na":
return False
if rating:
return False
return True
def _quality_row(item: ReceiptLineItem) -> dict | None:
document = item.document
if document is None:
return None
extracted = document.extracted_fields[0] if document.extracted_fields else None
transaction_date = ""
merchant = ""
if extracted is not None:
if extracted.transaction_date:
transaction_date = extracted.transaction_date.isoformat()
merchant = extracted.merchant_normalized or extracted.merchant_raw or ""
extra = item.extra_json or {}
return {
"line_item_id": item.id,
"document_id": document.document_id,
"transaction_date": transaction_date,
"merchant": merchant,
"description": item.normalized_description or item.raw_description or "",
"raw_description": item.raw_description or "",
"line_total": str(item.line_total) if item.line_total is not None else "",
"category": item.item_category or "",
"quality_rating": str(extra.get("quality_rating") or ""),
"quality_note": str(extra.get("quality_note") or ""),
}
@router.get("/", response_class=HTMLResponse)
def review_queue(request: Request, tab: str = "ocr", db: Session = Depends(get_db)):
if tab not in {"ocr", "fields", "quality", "recent"}:
tab = "ocr"
needs_ocr_review = (
db.query(Document)
.filter(Document.is_trashed.is_(False))
.filter(Document.review_status != "reviewed")
.order_by(Document.created_at.asc())
.all()
)
needs_field_extraction = (
db.query(Document)
.options(selectinload(Document.extracted_fields))
.filter(Document.is_trashed.is_(False))
.filter(Document.review_status == "reviewed")
.filter(~exists().where(ExtractedField.document_id == Document.id))
.order_by(Document.updated_at.asc())
.all()
)
recently_updated = (
db.query(Document)
.filter(Document.is_trashed.is_(False))
.order_by(Document.updated_at.desc())
.limit(25)
.all()
)
quality_candidates = (
db.query(ReceiptLineItem)
.options(
selectinload(ReceiptLineItem.document).selectinload(Document.extracted_fields)
)
.order_by(ReceiptLineItem.id.asc())
.all()
)
needs_quality_review = []
for item in quality_candidates:
if _needs_quality_review(item):
row = _quality_row(item)
if row is not None:
needs_quality_review.append(row)
next_ocr = needs_ocr_review[0] if needs_ocr_review else None
next_fields = needs_field_extraction[0] if needs_field_extraction else None
next_quality = needs_quality_review[0] if needs_quality_review else None
return templates.TemplateResponse(
request=request,
name="queue/index.html",
context={
"request": request,
"needs_ocr_review": needs_ocr_review,
"needs_field_extraction": needs_field_extraction,
"needs_quality_review": needs_quality_review,
"recently_updated": recently_updated,
"next_ocr": next_ocr,
"next_fields": next_fields,
"next_quality": next_quality,
"active_page": "queue",
"active_tab": tab,
},
)