document-processor/app/routes/line_items.py

299 lines
9.5 KiB
Python

from pathlib import Path
from decimal import Decimal, InvalidOperation
from fastapi import APIRouter, Depends, Form, Query, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy import func
from sqlalchemy.orm import Session, selectinload
from app.db.deps import get_db
from app.logic.extraction import get_current_extracted_fields
from app.models.document import Document
from app.models.receipt_line_item import ReceiptLineItem
router = APIRouter(prefix="/line-items", tags=["line-items"])
BASE_DIR = Path(__file__).resolve().parent.parent
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
def _decimal_to_str(value: Decimal | None) -> str:
if value is None:
return ""
return str(value)
def _to_decimal(value: str | None) -> Decimal | None:
if value is None:
return None
cleaned = str(value).strip()
if not cleaned:
return None
try:
return Decimal(cleaned)
except (InvalidOperation, TypeError):
return None
def _line_item_quality_rating(item: ReceiptLineItem) -> str:
extra = item.extra_json or {}
value = extra.get("quality_rating")
return "" if value is None else str(value)
def _line_item_quality_note(item: ReceiptLineItem) -> str:
extra = item.extra_json or {}
value = extra.get("quality_note")
return "" if value is None else str(value)
@router.post("/{line_item_id}/review", response_class=RedirectResponse)
def save_line_item_review(
line_item_id: int,
q: str = Form(""),
merchant: str = Form(""),
category: str = Form(""),
date_from: str = Form(""),
date_to: str = Form(""),
rating_min: str = Form(""),
rating_max: str = Form(""),
quality_rating: str = Form(""),
quality_note: str = Form(""),
db: Session = Depends(get_db),
):
item = db.query(ReceiptLineItem).filter(ReceiptLineItem.id == line_item_id).first()
if item is None:
return RedirectResponse(url="/line-items/", status_code=303)
extra = dict(item.extra_json or {})
rating_clean = quality_rating.strip()
note_clean = quality_note.strip()
if rating_clean:
extra["quality_rating"] = rating_clean
else:
extra.pop("quality_rating", None)
if note_clean:
extra["quality_note"] = note_clean
else:
extra.pop("quality_note", None)
item.extra_json = extra
db.commit()
redirect_url = (
f"/line-items/?q={q}&merchant={merchant}&category={category}"
f"&date_from={date_from}&date_to={date_to}"
f"&rating_min={rating_min}&rating_max={rating_max}"
)
return RedirectResponse(url=redirect_url, status_code=303)
@router.get("/", response_class=HTMLResponse)
def list_line_items(
request: Request,
q: str = Query("", description="Item description contains"),
merchant: str = Query("", description="Merchant contains"),
category: str = Query("", description="Category equals"),
date_from: str = Query("", description="YYYY-MM-DD"),
date_to: str = Query("", description="YYYY-MM-DD"),
rating_min: str = Query("", description="Minimum rating"),
rating_max: str = Query("", description="Maximum rating"),
db: Session = Depends(get_db),
):
items = (
db.query(ReceiptLineItem)
.options(
selectinload(ReceiptLineItem.document).selectinload(Document.extracted_fields)
)
.order_by(ReceiptLineItem.id.desc())
.all()
)
q_norm = q.strip().lower()
merchant_norm = merchant.strip().lower()
category_norm = category.strip().lower()
rating_min_dec = _to_decimal(rating_min)
rating_max_dec = _to_decimal(rating_max)
rows: list[dict] = []
for item in items:
document = item.document
if document is None:
continue
extracted = get_current_extracted_fields(document)
merchant_value = ""
transaction_date = ""
if extracted is not None:
merchant_value = (
extracted.merchant_normalized
or extracted.merchant_raw
or ""
)
if extracted.transaction_date:
transaction_date = extracted.transaction_date.isoformat()
if not transaction_date and document.created_at:
transaction_date = document.created_at.date().isoformat()
description_value = (
item.normalized_description
or item.raw_description
or ""
)
category_value = item.item_category or ""
quality_rating_value = _line_item_quality_rating(item)
quality_note_value = _line_item_quality_note(item)
quality_rating_dec = _to_decimal(quality_rating_value)
if q_norm and q_norm not in description_value.lower():
continue
if merchant_norm and merchant_norm not in merchant_value.lower():
continue
if category_norm and category_norm not in category_value.lower():
continue
if date_from and (not transaction_date or transaction_date < date_from):
continue
if date_to and (not transaction_date or transaction_date > date_to):
continue
if rating_min_dec is not None:
if quality_rating_dec is None or quality_rating_dec < rating_min_dec:
continue
if rating_max_dec is not None:
if quality_rating_dec is None or quality_rating_dec > rating_max_dec:
continue
rows.append(
{
"line_item_id": item.id,
"document_id": document.document_id,
"transaction_date": transaction_date,
"merchant": merchant_value,
"description": description_value,
"raw_description": item.raw_description or "",
"quantity": _decimal_to_str(item.quantity),
"line_total": _decimal_to_str(item.line_total),
"category": category_value,
"confidence": _decimal_to_str(item.confidence),
"quality_rating": quality_rating_value,
"quality_note": quality_note_value,
}
)
rows.sort(
key=lambda row: (
row["transaction_date"] or "",
row["merchant"] or "",
row["description"] or "",
),
reverse=True,
)
return templates.TemplateResponse(
request=request,
name="line_items/list.html",
context={
"request": request,
"rows": rows,
"q": q,
"merchant": merchant,
"category": category,
"date_from": date_from,
"date_to": date_to,
"rating_min": rating_min,
"rating_max": rating_max,
"active_page": "line_items",
},
)
@router.get("/summary", response_class=HTMLResponse)
def summarize_line_items(
request: Request,
q: str = Query("", description="Item contains"),
db: Session = Depends(get_db),
):
query = (
db.query(
ReceiptLineItem.normalized_description.label("item"),
func.count().label("count"),
func.avg(ReceiptLineItem.line_total).label("avg_price"),
func.min(ReceiptLineItem.line_total).label("min_price"),
func.max(ReceiptLineItem.line_total).label("max_price"),
)
)
if q:
query = query.filter(
ReceiptLineItem.normalized_description.ilike(f"%{q}%")
)
query = query.group_by(ReceiptLineItem.normalized_description)
results = query.all()
rating_query = db.query(
ReceiptLineItem.normalized_description,
ReceiptLineItem.extra_json,
)
if q:
rating_query = rating_query.filter(
ReceiptLineItem.normalized_description.ilike(f"%{q}%")
)
rating_rows = rating_query.all()
rating_map: dict[str, dict[str, Decimal | int]] = {}
for item_name, extra_json in rating_rows:
key = item_name or ""
rating_info = rating_map.setdefault(
key,
{"rated_count": 0, "rating_sum": Decimal("0")}
)
extra = extra_json or {}
rating_dec = _to_decimal(extra.get("quality_rating"))
if rating_dec is not None:
rating_info["rated_count"] += 1
rating_info["rating_sum"] += rating_dec
rows = []
for r in results:
item_name = r.item or ""
rating_info = rating_map.get(item_name, {"rated_count": 0, "rating_sum": Decimal("0")})
rated_count = int(rating_info["rated_count"])
rating_sum = rating_info["rating_sum"]
avg_rating = ""
if rated_count > 0:
avg_rating = str((rating_sum / rated_count).quantize(Decimal("0.01")))
rows.append(
{
"item": item_name,
"count": r.count,
"avg_price": str(round(r.avg_price, 2)) if r.avg_price is not None else "",
"min_price": str(r.min_price) if r.min_price is not None else "",
"max_price": str(r.max_price) if r.max_price is not None else "",
"rated_count": rated_count,
"avg_rating": avg_rating,
}
)
rows.sort(key=lambda x: (x["count"], x["item"]), reverse=True)
return templates.TemplateResponse(
request=request,
name="line_items/summary.html",
context={
"request": request,
"rows": rows,
"q": q,
"active_page": "line_item_summary",
},
)