document-processor/app/routes/line_items.py

384 lines
12 KiB
Python

from pathlib import Path
from decimal import Decimal, InvalidOperation
from fastapi import APIRouter, Depends, Form, Query, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy import func
from sqlalchemy.orm import Session, selectinload
from app.db.deps import get_db
from app.logic.extraction import get_current_extracted_fields
from app.models.document import Document
from app.models.receipt_line_item import ReceiptLineItem
router = APIRouter(prefix="/line-items", tags=["line-items"])
BASE_DIR = Path(__file__).resolve().parent.parent
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
def _decimal_to_str(value: Decimal | None) -> str:
if value is None:
return ""
return str(value)
def _to_decimal(value: str | None) -> Decimal | None:
if value is None:
return None
cleaned = str(value).strip()
if not cleaned:
return None
try:
return Decimal(cleaned)
except (InvalidOperation, TypeError):
return None
def _line_item_extra(item: ReceiptLineItem) -> dict:
return dict(item.extra_json or {})
def _line_item_quality_rating(item: ReceiptLineItem) -> str:
value = _line_item_extra(item).get("quality_rating")
return "" if value is None else str(value)
def _line_item_quality_note(item: ReceiptLineItem) -> str:
value = _line_item_extra(item).get("quality_note")
return "" if value is None else str(value)
def _line_item_quality_status(item: ReceiptLineItem) -> str:
value = _line_item_extra(item).get("quality_status")
return "" if value is None else str(value)
def _is_quality_queue_candidate(item: ReceiptLineItem) -> bool:
if (item.item_category or "").lower() != "cocktail":
return False
extra = _line_item_extra(item)
status = str(extra.get("quality_status") or "").strip().lower()
rating = str(extra.get("quality_rating") or "").strip()
if status == "na":
return False
if rating:
return False
return True
def _build_row(item: ReceiptLineItem) -> dict | None:
document = item.document
if document is None:
return None
extracted = get_current_extracted_fields(document)
merchant_value = ""
transaction_date = ""
if extracted is not None:
merchant_value = (
extracted.merchant_normalized
or extracted.merchant_raw
or ""
)
if extracted.transaction_date:
transaction_date = extracted.transaction_date.isoformat()
if not transaction_date and document.created_at:
transaction_date = document.created_at.date().isoformat()
return {
"line_item_id": item.id,
"document_id": document.document_id,
"transaction_date": transaction_date,
"merchant": merchant_value,
"description": item.normalized_description or item.raw_description or "",
"raw_description": item.raw_description or "",
"quantity": _decimal_to_str(item.quantity),
"line_total": _decimal_to_str(item.line_total),
"category": item.item_category or "",
"confidence": _decimal_to_str(item.confidence),
"quality_rating": _line_item_quality_rating(item),
"quality_note": _line_item_quality_note(item),
"quality_status": _line_item_quality_status(item),
}
@router.post("/{line_item_id}/review", response_class=RedirectResponse)
def save_line_item_review(
line_item_id: int,
q: str = Form(""),
merchant: str = Form(""),
category: str = Form(""),
date_from: str = Form(""),
date_to: str = Form(""),
rating_min: str = Form(""),
rating_max: str = Form(""),
return_to: str = Form("list"),
quality_rating: str = Form(""),
quality_note: str = Form(""),
quality_status: str = Form(""),
db: Session = Depends(get_db),
):
item = db.query(ReceiptLineItem).filter(ReceiptLineItem.id == line_item_id).first()
if item is None:
return RedirectResponse(url="/line-items/", status_code=303)
extra = _line_item_extra(item)
rating_clean = quality_rating.strip()
note_clean = quality_note.strip()
status_clean = quality_status.strip().lower()
if status_clean == "na":
extra["quality_status"] = "na"
extra.pop("quality_rating", None)
if note_clean:
extra["quality_note"] = note_clean
else:
extra.pop("quality_note", None)
else:
if rating_clean:
extra["quality_rating"] = rating_clean
extra["quality_status"] = "rated"
else:
extra.pop("quality_rating", None)
if status_clean == "rated":
extra["quality_status"] = "rated"
else:
extra.pop("quality_status", None)
if note_clean:
extra["quality_note"] = note_clean
else:
extra.pop("quality_note", None)
item.extra_json = extra
db.commit()
if return_to == "quality_queue":
return RedirectResponse(url="/queue/?tab=quality", status_code=303)
redirect_url = (
f"/line-items/?q={q}&merchant={merchant}&category={category}"
f"&date_from={date_from}&date_to={date_to}"
f"&rating_min={rating_min}&rating_max={rating_max}"
)
return RedirectResponse(url=redirect_url, status_code=303)
@router.get("/queue", response_class=HTMLResponse)
def quality_queue(
request: Request,
db: Session = Depends(get_db),
):
items = (
db.query(ReceiptLineItem)
.options(
selectinload(ReceiptLineItem.document).selectinload(Document.extracted_fields)
)
.order_by(ReceiptLineItem.id.asc())
.all()
)
rows = []
for item in items:
if not _is_quality_queue_candidate(item):
continue
row = _build_row(item)
if row is not None:
rows.append(row)
rows.sort(
key=lambda row: (
row["transaction_date"] or "",
row["merchant"] or "",
row["description"] or "",
)
)
next_row = rows[0] if rows else None
return templates.TemplateResponse(
request=request,
name="line_items/queue.html",
context={
"request": request,
"rows": rows,
"next_row": next_row,
"active_page": "line_items",
},
)
@router.get("/", response_class=HTMLResponse)
def list_line_items(
request: Request,
q: str = Query("", description="Item description contains"),
merchant: str = Query("", description="Merchant contains"),
category: str = Query("", description="Category equals"),
date_from: str = Query("", description="YYYY-MM-DD"),
date_to: str = Query("", description="YYYY-MM-DD"),
rating_min: str = Query("", description="Minimum rating"),
rating_max: str = Query("", description="Maximum rating"),
db: Session = Depends(get_db),
):
items = (
db.query(ReceiptLineItem)
.options(
selectinload(ReceiptLineItem.document).selectinload(Document.extracted_fields)
)
.order_by(ReceiptLineItem.id.desc())
.all()
)
q_norm = q.strip().lower()
merchant_norm = merchant.strip().lower()
category_norm = category.strip().lower()
rating_min_dec = _to_decimal(rating_min)
rating_max_dec = _to_decimal(rating_max)
rows: list[dict] = []
for item in items:
row = _build_row(item)
if row is None:
continue
quality_rating_dec = _to_decimal(row["quality_rating"])
if q_norm and q_norm not in row["description"].lower():
continue
if merchant_norm and merchant_norm not in row["merchant"].lower():
continue
if category_norm and category_norm not in row["category"].lower():
continue
if date_from and (not row["transaction_date"] or row["transaction_date"] < date_from):
continue
if date_to and (not row["transaction_date"] or row["transaction_date"] > date_to):
continue
if rating_min_dec is not None:
if quality_rating_dec is None or quality_rating_dec < rating_min_dec:
continue
if rating_max_dec is not None:
if quality_rating_dec is None or quality_rating_dec > rating_max_dec:
continue
rows.append(row)
rows.sort(
key=lambda row: (
row["transaction_date"] or "",
row["merchant"] or "",
row["description"] or "",
),
reverse=True,
)
return templates.TemplateResponse(
request=request,
name="line_items/list.html",
context={
"request": request,
"rows": rows,
"q": q,
"merchant": merchant,
"category": category,
"date_from": date_from,
"date_to": date_to,
"rating_min": rating_min,
"rating_max": rating_max,
"active_page": "line_items",
},
)
@router.get("/summary", response_class=HTMLResponse)
def summarize_line_items(
request: Request,
q: str = Query("", description="Item contains"),
db: Session = Depends(get_db),
):
query = (
db.query(
ReceiptLineItem.normalized_description.label("item"),
func.count().label("count"),
func.avg(ReceiptLineItem.line_total).label("avg_price"),
func.min(ReceiptLineItem.line_total).label("min_price"),
func.max(ReceiptLineItem.line_total).label("max_price"),
)
)
if q:
query = query.filter(
ReceiptLineItem.normalized_description.ilike(f"%{q}%")
)
query = query.group_by(ReceiptLineItem.normalized_description)
results = query.all()
rating_query = db.query(
ReceiptLineItem.normalized_description,
ReceiptLineItem.extra_json,
)
if q:
rating_query = rating_query.filter(
ReceiptLineItem.normalized_description.ilike(f"%{q}%")
)
rating_rows = rating_query.all()
rating_map: dict[str, dict[str, Decimal | int]] = {}
for item_name, extra_json in rating_rows:
key = item_name or ""
rating_info = rating_map.setdefault(
key,
{"rated_count": 0, "rating_sum": Decimal("0")}
)
extra = extra_json or {}
rating_dec = _to_decimal(extra.get("quality_rating"))
if rating_dec is not None:
rating_info["rated_count"] += 1
rating_info["rating_sum"] += rating_dec
rows = []
for r in results:
item_name = r.item or ""
rating_info = rating_map.get(item_name, {"rated_count": 0, "rating_sum": Decimal("0")})
rated_count = int(rating_info["rated_count"])
rating_sum = rating_info["rating_sum"]
avg_rating = ""
if rated_count > 0:
avg_rating = str((rating_sum / rated_count).quantize(Decimal("0.01")))
rows.append(
{
"item": item_name,
"count": r.count,
"avg_price": str(round(r.avg_price, 2)) if r.avg_price is not None else "",
"min_price": str(r.min_price) if r.min_price is not None else "",
"max_price": str(r.max_price) if r.max_price is not None else "",
"rated_count": rated_count,
"avg_rating": avg_rating,
}
)
rows.sort(key=lambda x: (x["count"], x["item"]), reverse=True)
return templates.TemplateResponse(
request=request,
name="line_items/summary.html",
context={
"request": request,
"rows": rows,
"q": q,
"active_page": "line_item_summary",
},
)