feat: Phase 4.4 line item review workflow

- added queue tab to line items page
- added reviewed/approved/excluded/na review toggles
- made reviewed items leave the queue
- added line item source restore controls
- added regenerate line items from OCR
This commit is contained in:
Sean McElwain 2026-04-18 12:21:36 -05:00
parent fcce99a091
commit 2521ebd503
5 changed files with 402 additions and 34 deletions

View File

@ -42,7 +42,7 @@ ADDRESS_HINT_RE = re.compile(
)
PHONE_RE = re.compile(r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}")
QTY_PREFIX_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s+(.+?)\s*$")
ITEM_LINE_RE = re.compile(r"^(.*?)([0-9]+\.[0-9]{2})\s*$")
ITEM_LINE_RE = re.compile(r"^(.*?)([0-9]+\.[0-9]{2})(?:\s+\S+)?\s*$")
@dataclass
@ -748,6 +748,77 @@ def _extract_receipt_line_items(lines: list[DocumentLine]) -> list[dict]:
used_line_indexes.add(line.line_index)
used_line_indexes.add(prev_line.line_index)
fallback_description_lines: list[DocumentLine] = []
fallback_price_lines: list[DocumentLine] = []
for line in lines:
if line.line_index in used_line_indexes:
continue
if line.line_index in protected_amount_indexes:
continue
text = line.text.strip()
normalized = line.normalized
if _candidate_item_description_line(line):
fallback_description_lines.append(line)
continue
if _is_price_only_line(line) and not _is_non_item_line(normalized):
amount = _extract_line_amount(line)
if amount is not None:
fallback_price_lines.append(line)
pair_count = min(len(fallback_description_lines), len(fallback_price_lines))
for i in range(pair_count):
desc_line = fallback_description_lines[i]
price_line = fallback_price_lines[i]
if desc_line.line_index in used_line_indexes or price_line.line_index in used_line_indexes:
continue
description = desc_line.text.strip()
quantity = None
qty_match = QTY_PREFIX_RE.match(description)
if qty_match:
quantity = _to_decimal(qty_match.group(1))
description = qty_match.group(2).strip()
description = _clean_item_description(description)
line_total = _extract_line_amount(price_line)
if not description or line_total is None:
continue
confidence = Decimal("70.00")
if quantity is not None:
confidence = Decimal("74.00")
items.append(
{
"line_index": desc_line.line_index,
"raw_description": description,
"normalized_description": _normalize_item_description(description),
"quantity": str(quantity) if quantity is not None else "",
"unit_price": "",
"line_total": str(line_total),
"item_category": _infer_item_category(description) or "",
"confidence": str(confidence),
"extra_json": {
"page": desc_line.page,
"bbox": desc_line.bbox,
"price_line_index": price_line.line_index,
"price_bbox": price_line.bbox,
"price_text": price_line.text,
"source_text": desc_line.text,
"source_confidence": desc_line.confidence,
"match_type": "fallback_ordered_block",
},
}
)
used_line_indexes.add(desc_line.line_index)
used_line_indexes.add(price_line.line_index)
items.sort(key=lambda x: x.get("line_index", 0))
return items

View File

@ -28,6 +28,10 @@ from app.logic.extraction import (
auto_extract_from_document,
get_current_extracted_fields,
save_extracted_fields,
_extract_receipt_line_items,
_get_current_reviewed_text,
_get_document_lines,
_replace_document_line_items,
)
from app.logic.ingest import compute_quality_score, rerun_ocr_for_document
from app.models.document import Document
@ -1117,7 +1121,7 @@ def save_reviewed_text(
document.review_status = "reviewed"
db.commit()
return RedirectResponse(url=f"/documents/{document.document_id}?editor_source=reviewed&tab=ocr-review", status_code=303)
return RedirectResponse(url=f"/documents/{document.document_id}?tab=line-items&success=saved_reviewed_ocr", status_code=303)
@router.post("/{document_id}/save-extracted-fields", response_class=RedirectResponse)
@ -1254,6 +1258,82 @@ def save_additional_fields_route(
@router.post("/{document_id}/regenerate-line-items", response_class=RedirectResponse)
def regenerate_line_items(document_id: str, db: Session = Depends(get_db)):
document = (
db.query(Document)
.options(
selectinload(Document.text_versions),
selectinload(Document.line_item_set).selectinload(DocumentLineItemSet.items),
selectinload(Document.line_item_set_versions),
)
.filter(Document.document_id == document_id)
.first()
)
if document is None:
return RedirectResponse(url="/documents/", status_code=303)
text_version = _get_current_reviewed_text(document)
if text_version is None:
return RedirectResponse(
url=f"/documents/{document.document_id}?tab=line-items&error=regenerate_line_items_failed",
status_code=303,
)
try:
lines = _get_document_lines(text_version)
items = _extract_receipt_line_items(lines)
_replace_document_line_items(db, document, items)
db.flush()
next_version = max([v.version_number for v in document.line_item_set_versions], default=0) + 1
version = DocumentLineItemSetVersion(
document_id=document.id,
version_number=next_version,
schema_type=document.line_item_set.schema_type if document.line_item_set else (document.document_type or "generic"),
created_by="regenerate_line_items",
notes="Regenerated line items from current OCR text.",
)
db.add(version)
db.flush()
current_items = (
db.query(DocumentLineItem)
.filter(DocumentLineItem.line_item_set_id == document.line_item_set.id)
.order_by(DocumentLineItem.line_number.asc())
.all()
)
for item in current_items:
db.add(DocumentLineItemVersionItem(
set_version_id=version.id,
line_number=item.line_number,
entry_date=item.entry_date,
description=item.description,
quantity=item.quantity,
unit_price=item.unit_price,
line_total=item.line_total,
tax_amount=item.tax_amount,
category=item.category,
notes=item.notes,
raw_json=item.raw_json,
))
db.commit()
except Exception:
traceback.print_exc()
db.rollback()
return RedirectResponse(
url=f"/documents/{document.document_id}?tab=line-items&error=regenerate_line_items_failed",
status_code=303,
)
return RedirectResponse(
url=f"/documents/{document.document_id}?tab=line-items&success=regenerated_line_items",
status_code=303,
)
@router.post("/{document_id}/save-line-items", response_class=RedirectResponse)
async def save_line_items(
document_id: str,
@ -1675,6 +1755,59 @@ def _get_current_additional_version_number(document: Document) -> int | None:
return v.version_number
return None
def _clear_line_items(db: Session, document: Document) -> bool:
if not document.line_item_set:
return False
had_items = bool(document.line_item_set.items)
document.line_item_set.items.clear()
db.flush()
return had_items
def _restore_line_items_from_version_number(db: Session, document: Document, target_version_number: int) -> bool:
version = (
db.query(DocumentLineItemSetVersion)
.options(selectinload(DocumentLineItemSetVersion.items))
.filter(
DocumentLineItemSetVersion.document_id == document.id,
DocumentLineItemSetVersion.version_number == target_version_number,
)
.first()
)
if version is None:
return False
if document.line_item_set is None:
document.line_item_set = DocumentLineItemSet(
document_id=document.id,
schema_type=version.schema_type or document.document_type or "generic",
)
db.add(document.line_item_set)
db.flush()
document.line_item_set.schema_type = version.schema_type or document.document_type or "generic"
document.line_item_set.items.clear()
db.flush()
for vi in sorted(version.items, key=lambda x: x.line_number):
db.add(DocumentLineItem(
line_item_set_id=document.line_item_set.id,
line_number=vi.line_number,
entry_date=vi.entry_date,
description=vi.description,
quantity=vi.quantity,
unit_price=vi.unit_price,
line_total=vi.line_total,
tax_amount=vi.tax_amount,
category=vi.category,
notes=vi.notes,
raw_json=vi.raw_json,
))
return True
def _parse_restore_choice(value: str) -> tuple[str, int | None]:
if not value or value == "none":
return ("none", None)
@ -1694,6 +1827,7 @@ def apply_source_options(
ocr_restore_choice: str = Form("none"),
extracted_restore_choice: str = Form("none"),
additional_restore_choice: str = Form("none"),
line_item_restore_choice: str = Form("none"),
db: Session = Depends(get_db),
):
document = (
@ -1770,6 +1904,18 @@ def apply_source_options(
if _restore_additional_from_version_number(db, document, additional_version):
changed = True
if line_item_restore_choice == "clear":
if _clear_line_items(db, document):
changed = True
elif line_item_restore_choice.startswith("version:"):
try:
target_line_item_version = int(line_item_restore_choice.split(":", 1)[1])
except ValueError:
target_line_item_version = None
if target_line_item_version is not None:
if _restore_line_items_from_version_number(db, document, target_line_item_version):
changed = True
if changed:
db.commit()
else:

View File

@ -1,4 +1,5 @@
from pathlib import Path
from datetime import datetime
from decimal import Decimal, InvalidOperation
from fastapi import APIRouter, Depends, Form, Query, Request
@ -57,21 +58,15 @@ def _line_item_quality_status(item: DocumentLineItem) -> str:
def _is_quality_queue_candidate(item: DocumentLineItem) -> bool:
if (item.category or "").lower() != "cocktail":
return False
extra = _line_item_extra(item)
status = str(extra.get("quality_status") or "").strip().lower()
rating = str(extra.get("quality_rating") or "").strip()
if status == "na":
if bool(extra.get("is_na")):
return False
if rating:
if extra.get("reviewed_at"):
return False
return True
def _build_row(item: DocumentLineItem) -> dict | None:
line_item_set = item.line_item_set
document = line_item_set.document if line_item_set is not None else None
@ -110,6 +105,11 @@ def _build_row(item: DocumentLineItem) -> dict | None:
"quality_rating": _line_item_quality_rating(item),
"quality_note": _line_item_quality_note(item),
"quality_status": _line_item_quality_status(item),
"is_reviewed": bool(_line_item_extra(item).get("reviewed_at")),
"is_approved": bool(_line_item_extra(item).get("is_approved")),
"is_excluded": bool(_line_item_extra(item).get("is_excluded")),
"is_na": bool(_line_item_extra(item).get("is_na")),
"reviewed_at": _line_item_extra(item).get("reviewed_at") or "",
}
@ -260,7 +260,9 @@ def save_line_item_review(
return_to: str = Form("list"),
quality_rating: str = Form(""),
quality_note: str = Form(""),
quality_status: str = Form(""),
is_approved: str = Form(""),
is_excluded: str = Form(""),
is_na: str = Form(""),
db: Session = Depends(get_db),
):
item = db.query(DocumentLineItem).filter(DocumentLineItem.id == line_item_id).first()
@ -271,36 +273,36 @@ def save_line_item_review(
rating_clean = quality_rating.strip()
note_clean = quality_note.strip()
status_clean = quality_status.strip().lower()
approved_checked = bool(is_approved)
excluded_checked = bool(is_excluded)
na_checked = bool(is_na)
if status_clean == "na":
extra["quality_status"] = "na"
extra["is_approved"] = approved_checked
extra["is_excluded"] = excluded_checked
extra["is_na"] = na_checked
extra["reviewed_at"] = datetime.utcnow().isoformat()
if na_checked:
extra.pop("quality_rating", None)
if note_clean:
extra["quality_note"] = note_clean
else:
extra.pop("quality_note", None)
else:
if rating_clean:
extra["quality_rating"] = rating_clean
extra["quality_status"] = "rated"
else:
extra.pop("quality_rating", None)
if status_clean == "rated":
extra["quality_status"] = "rated"
else:
extra.pop("quality_status", None)
if note_clean:
extra["quality_note"] = note_clean
else:
extra.pop("quality_note", None)
item.extra_json = extra
extra.pop("quality_status", None)
item.raw_json = extra
db.commit()
if return_to == "quality_queue":
return RedirectResponse(url="/queue/?tab=quality", status_code=303)
if return_to in {"quality_queue", "queue"}:
return RedirectResponse(url="/line-items/?tab=queue", status_code=303)
redirect_url = (
f"/line-items/?tab=advanced-search"
@ -351,7 +353,23 @@ def list_line_items(
summary_rows = _build_summary_rows(items=items, q=q)
if tab not in {"summary", "advanced-search"}:
queue_rows = []
for item in items:
if not _is_quality_queue_candidate(item):
continue
row = _build_row(item)
if row is not None:
queue_rows.append(row)
queue_rows.sort(
key=lambda row: (
row["transaction_date"] or "",
row["merchant"] or "",
row["description"] or "",
)
)
if tab not in {"summary", "advanced-search", "queue"}:
tab = "summary"
if tab == "summary" and any([merchant.strip(), category.strip(), date_from.strip(), date_to.strip(), rating_min.strip(), rating_max.strip()]):
@ -364,6 +382,7 @@ def list_line_items(
"request": request,
"rows": detail_rows,
"summary_rows": summary_rows,
"queue_rows": queue_rows,
"q": q,
"merchant": merchant,
"category": category,

View File

@ -21,6 +21,12 @@
</div>
{% elif success == "rerun_ocr" %}
<div class="success-message">OCR rerun successfully.</div>
{% elif success == "regenerated_line_items" %}
<div class="success-message">Line items regenerated successfully.</div>
{% elif success == "saved_reviewed_ocr" %}
<div class="success-message">Reviewed OCR saved.</div>
{% elif success == "saved_reviewed_ocr" %}
<div class="success-message">Reviewed OCR saved.</div>
{% elif error == "rerun_ocr_failed" %}
<div class="error-box">OCR rerun failed.</div>
{% elif error == "save_field_enriched_failed" %}
@ -362,6 +368,12 @@
<p class="empty-state">No line items saved yet.</p>
{% endif %}
<div class="button-row" style="margin-bottom: 0.75rem;">
<form method="post" action="/documents/{{ document.document_id }}/regenerate-line-items" style="display:inline;">
<button type="submit">Regenerate Line Items</button>
</form>
</div>
<form method="post" action="/documents/{{ document.document_id }}/save-line-items">
{% set base_count = line_items|length %}
{% set row_count = base_count + 3 if base_count > 0 else 12 %}
@ -505,7 +517,6 @@ function addRow() {
<div class="card" style="padding:1rem;">
<h3 style="margin-top:0;">Data Reset</h3>
<div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center; margin-bottom:0.75rem;">
<strong>OCR</strong>
<select name="ocr_restore_choice">
@ -528,7 +539,7 @@ function addRow() {
</select>
</div>
<div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center;">
<div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center; margin-bottom:0.75rem;">
<strong>Additional fields</strong>
<select name="additional_restore_choice">
<option value="none" selected>No change</option>
@ -538,6 +549,17 @@ function addRow() {
{% endfor %}
</select>
</div>
<div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center;">
<strong>Line items</strong>
<select name="line_item_restore_choice">
<option value="none" selected>No change</option>
<option value="clear">Clear</option>
{% for version in document.line_item_set_versions %}
<option value="version:{{ version.version_number }}">v{{ version.version_number }} — {{ version.created_at }}</option>
{% endfor %}
</select>
</div>
</div>
<div>

View File

@ -21,6 +21,7 @@
<div class="right-pane-tabs">
<button class="tab-button{% if active_tab == 'summary' %} active{% endif %}" type="button" data-tab="summary">Summary</button>
<button class="tab-button{% if active_tab == 'advanced-search' %} active{% endif %}" type="button" data-tab="advanced-search">Advanced Search</button>
<button class="tab-button{% if active_tab == 'queue' %} active{% endif %}" type="button" data-tab="queue">Queue</button>
</div>
<div class="tab-panel{% if active_tab == 'summary' %} active{% endif %}" data-panel="summary">
@ -152,6 +153,18 @@
{% if row.quality_rating %}
<span class="badge reviewed">Rating {{ row.quality_rating }}</span>
{% endif %}
{% if row.is_reviewed %}
<span class="badge">Reviewed</span>
{% endif %}
{% if row.is_approved %}
<span class="badge reviewed">Approved</span>
{% endif %}
{% if row.is_excluded %}
<span class="badge">Excluded</span>
{% endif %}
{% if row.is_na %}
<span class="badge">N/A</span>
{% endif %}
</div>
</div>
@ -170,7 +183,16 @@
<label for="quality_rating_{{ row.line_item_id }}">Quality rating</label>
<input id="quality_rating_{{ row.line_item_id }}" type="text" name="quality_rating" value="{{ row.quality_rating }}" placeholder="e.g. 8.5 or 4/5">
</div>
<div class="form-field">
<label>Review flags</label>
<div style="display:flex; gap:0.75rem; flex-wrap:wrap; margin-top:0.35rem;">
<label><input type="checkbox" name="is_approved" value="1" {% if row.is_approved %}checked{% endif %}> Approved</label>
<label><input type="checkbox" name="is_excluded" value="1" {% if row.is_excluded %}checked{% endif %}> Excluded</label>
<label><input type="checkbox" name="is_na" value="1" {% if row.is_na %}checked{% endif %} onchange="toggleReviewNA(this)"> N/A</label>
</div>
</div>
<div class="form-field full">
<label for="quality_note_{{ row.line_item_id }}">Quality note</label>
<textarea id="quality_note_{{ row.line_item_id }}" name="quality_note" rows="3" placeholder="Taste, portion, texture, service notes...">{{ row.quality_note }}</textarea>
</div>
@ -188,6 +210,79 @@
{% endif %}
</div>
</div>
<div class="tab-panel{% if active_tab == 'queue' %} active{% endif %}" data-panel="queue">
<h2 class="card-title">Quality Review Queue</h2>
{% if queue_rows %}
{% for row in queue_rows %}
<div class="card" style="margin-bottom: 1rem;">
<div class="topbar" style="margin-bottom: 0.75rem;">
<div>
<div class="page-subtitle">{{ row.transaction_date }} · {{ row.merchant }}</div>
<h3 class="card-title" style="margin: 0.2rem 0 0 0;">{{ row.description }}</h3>
<div class="page-subtitle">{{ row.raw_description }}</div>
</div>
<div class="badges">
{% if row.category %}
<span class="badge">{{ row.category }}</span>
{% endif %}
{% if row.quantity %}
<span class="badge">Qty {{ row.quantity }}</span>
{% endif %}
{% if row.line_total %}
<span class="badge">${{ row.line_total }}</span>
{% endif %}
{% if row.confidence %}
<span class="badge">Conf {{ row.confidence }}</span>
{% endif %}
{% if row.quality_rating %}
<span class="badge reviewed">Rating {{ row.quality_rating }}</span>
{% endif %}
</div>
</div>
<form method="post" action="/line-items/{{ row.line_item_id }}/review">
<input type="hidden" name="q" value="">
<input type="hidden" name="merchant" value="">
<input type="hidden" name="category" value="">
<input type="hidden" name="date_from" value="">
<input type="hidden" name="date_to" value="">
<input type="hidden" name="rating_min" value="">
<input type="hidden" name="rating_max" value="">
<input type="hidden" name="return_to" value="queue">
<div class="form-grid">
<div class="form-field">
<label for="queue_quality_rating_{{ row.line_item_id }}">Quality rating</label>
<input id="queue_quality_rating_{{ row.line_item_id }}" type="text" name="quality_rating" value="{{ row.quality_rating }}" placeholder="e.g. 8.5 or 4/5">
</div>
<div class="form-field">
<label>Review flags</label>
<div style="display:flex; gap:0.75rem; flex-wrap:wrap; margin-top:0.35rem;">
<label><input type="checkbox" name="is_approved" value="1" {% if row.is_approved %}checked{% endif %}> Approved</label>
<label><input type="checkbox" name="is_excluded" value="1" {% if row.is_excluded %}checked{% endif %}> Excluded</label>
<label><input type="checkbox" name="is_na" value="1" {% if row.is_na %}checked{% endif %} onchange="toggleReviewNA(this)"> N/A</label>
</div>
</div>
<div class="form-field full">
<label for="queue_quality_note_{{ row.line_item_id }}">Quality note</label>
<textarea id="queue_quality_note_{{ row.line_item_id }}" name="quality_note" rows="3" placeholder="Taste, portion, texture, service notes...">{{ row.quality_note }}</textarea>
</div>
</div>
<div class="button-row" style="margin-top: 1rem;">
<button class="primary" type="submit">Save rating/note</button>
<a class="button-link" href="/documents/{{ row.document_id }}?tab=line-items">Open document</a>
</div>
</form>
</div>
{% endfor %}
{% else %}
<p class="empty-state">No line items are waiting for quality review.</p>
{% endif %}
</div>
</div>
</main>
</div>
@ -211,6 +306,21 @@
const tabButtons = document.querySelectorAll("[data-tab]");
const tabPanels = document.querySelectorAll("[data-panel]");
function toggleReviewNA(el) {
const form = el.closest("form");
if (!form) return;
const disabled = el.checked;
form.querySelectorAll('input[type="text"], textarea').forEach(function (field) {
if (field.name === "quality_rating" || field.name === "quality_note") {
field.disabled = disabled;
}
});
}
document.querySelectorAll('input[name="is_na"]').forEach(function (el) {
if (el.checked) toggleReviewNA(el);
});
function activateTab(target) {
tabButtons.forEach(function (b) {
b.classList.toggle("active", b.getAttribute("data-tab") === target);