feat: add receipt line item model and document wiring
This commit is contained in:
parent
c7dab22f16
commit
d14ee39cc8
|
|
@ -3,6 +3,7 @@ from app.models.document_version import DocumentVersion
|
|||
from app.models.text_version import TextVersion
|
||||
from app.models.extracted_field import ExtractedField
|
||||
from app.models.layer1_candidate import Layer1Candidate
|
||||
from app.models.receipt_line_item import ReceiptLineItem
|
||||
|
||||
__all__ = [
|
||||
"Document",
|
||||
|
|
@ -10,4 +11,5 @@ __all__ = [
|
|||
"TextVersion",
|
||||
"ExtractedField",
|
||||
"Layer1Candidate",
|
||||
"ReceiptLineItem",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from datetime import datetime
|
||||
from sqlalchemy import String, Integer, DateTime, Text, Boolean
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, Integer, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
|
|
@ -35,7 +36,12 @@ class Document(Base):
|
|||
trashed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime,
|
||||
default=datetime.utcnow,
|
||||
onupdate=datetime.utcnow,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
versions: Mapped[list["DocumentVersion"]] = relationship(
|
||||
back_populates="document",
|
||||
|
|
@ -53,3 +59,7 @@ class Document(Base):
|
|||
back_populates="document",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
receipt_line_items: Mapped[list["ReceiptLineItem"]] = relationship(
|
||||
back_populates="document",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, JSON, Numeric, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
class ReceiptLineItem(Base):
|
||||
__tablename__ = "receipt_line_items"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, index=True)
|
||||
document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True)
|
||||
|
||||
line_index: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
raw_description: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
normalized_description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
quantity: Mapped[Decimal | None] = mapped_column(Numeric(12, 3), nullable=True)
|
||||
unit_price: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
|
||||
line_total: Mapped[Decimal | None] = mapped_column(Numeric(12, 2), nullable=True)
|
||||
|
||||
item_category: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
confidence: Mapped[Decimal | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
extra_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime,
|
||||
default=datetime.utcnow,
|
||||
onupdate=datetime.utcnow,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
document: Mapped["Document"] = relationship(back_populates="receipt_line_items")
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, Request
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||
|
|
@ -19,58 +19,14 @@ from app.logic.extraction import (
|
|||
)
|
||||
from app.logic.ingest import compute_quality_score, rerun_ocr_for_document
|
||||
from app.models.document import Document
|
||||
from app.models.document_version import DocumentVersion
|
||||
from app.models.text_version import TextVersion
|
||||
|
||||
router = APIRouter(prefix="/documents", tags=["documents"])
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _build_queue_navigation(db: Session, document: Document, queue: str | None) -> dict:
|
||||
if not queue:
|
||||
return {"queue": None, "prev_doc": None, "next_doc": None}
|
||||
|
||||
base = db.query(Document).filter(Document.is_trashed.is_(False))
|
||||
|
||||
if queue == "ocr":
|
||||
docs = (
|
||||
base.filter(Document.review_status != "reviewed")
|
||||
.order_by(Document.created_at.asc())
|
||||
.all()
|
||||
)
|
||||
elif queue == "fields":
|
||||
docs = (
|
||||
base.filter(Document.review_status == "reviewed")
|
||||
.all()
|
||||
)
|
||||
filtered = []
|
||||
for d in docs:
|
||||
has_fields = bool(getattr(d, "extracted_fields", None))
|
||||
if not has_fields:
|
||||
filtered.append(d)
|
||||
docs = sorted(filtered, key=lambda d: d.updated_at or d.created_at)
|
||||
elif queue == "recent":
|
||||
docs = (
|
||||
base.order_by(Document.updated_at.desc())
|
||||
.all()
|
||||
)
|
||||
else:
|
||||
return {"queue": None, "prev_doc": None, "next_doc": None}
|
||||
|
||||
ids = [d.document_id for d in docs]
|
||||
if document.document_id not in ids:
|
||||
return {"queue": queue, "prev_doc": None, "next_doc": None}
|
||||
|
||||
idx = ids.index(document.document_id)
|
||||
prev_doc = docs[idx - 1] if idx > 0 else None
|
||||
next_doc = docs[idx + 1] if idx < len(docs) - 1 else None
|
||||
|
||||
return {"queue": queue, "prev_doc": prev_doc, "next_doc": next_doc}
|
||||
|
||||
|
||||
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
|
||||
|
||||
|
||||
QUALITY_FLAG_OPTIONS = [
|
||||
"bad_embedded_text",
|
||||
"ocr_garbled",
|
||||
|
|
@ -93,6 +49,13 @@ QUALITY_FLAG_OPTIONS = [
|
|||
]
|
||||
|
||||
|
||||
def _document_url(document_id: str, **params) -> str:
|
||||
clean_params = {k: v for k, v in params.items() if v not in (None, "", False)}
|
||||
if not clean_params:
|
||||
return f"/documents/{document_id}"
|
||||
return f"/documents/{document_id}?{urlencode(clean_params)}"
|
||||
|
||||
|
||||
def _get_current_text_versions(document: Document) -> tuple[TextVersion | None, TextVersion | None]:
|
||||
sorted_text_versions = sorted(
|
||||
document.text_versions,
|
||||
|
|
@ -161,7 +124,6 @@ def _apply_reviewed_lines_to_layout(base_layout: dict | None, reviewed_text: str
|
|||
return new_layout
|
||||
|
||||
|
||||
|
||||
def _get_queue_navigation(db: Session, document: Document) -> dict:
|
||||
active_docs = (
|
||||
db.query(Document)
|
||||
|
|
@ -285,24 +247,37 @@ def rerun_ocr(document_id: str, db: Session = Depends(get_db)):
|
|||
try:
|
||||
rerun_ocr_for_document(db, document)
|
||||
except Exception:
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=rerun_ocr_failed", status_code=303)
|
||||
return RedirectResponse(
|
||||
url=_document_url(document.document_id, error="rerun_ocr_failed", tab="ocr-review"),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?editor_source=raw", status_code=303)
|
||||
return RedirectResponse(
|
||||
url=_document_url(document.document_id, editor_source="raw", tab="ocr-review"),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{document_id}/save-ocr-corrected-pdf", response_class=RedirectResponse)
|
||||
def save_ocr_corrected_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||
document = db.query(Document).options(selectinload(Document.text_versions)).filter(Document.document_id == document_id).first()
|
||||
document = (
|
||||
db.query(Document)
|
||||
.options(selectinload(Document.text_versions))
|
||||
.filter(Document.document_id == document_id)
|
||||
.first()
|
||||
)
|
||||
if document is None:
|
||||
return RedirectResponse(url="/documents/", status_code=303)
|
||||
|
||||
try:
|
||||
create_ocr_corrected_pdf_version(db, document)
|
||||
except Exception:
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_ocr_corrected_failed", status_code=303)
|
||||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
||||
return RedirectResponse(
|
||||
url=_document_url(document.document_id, error="save_ocr_corrected_failed", tab="ocr-review"),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
return RedirectResponse(url=_document_url(document.document_id, tab="ocr-review"), status_code=303)
|
||||
|
||||
|
||||
@router.post("/{document_id}/move-to-trash", response_class=RedirectResponse)
|
||||
|
|
@ -328,9 +303,12 @@ def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
|||
try:
|
||||
create_field_enriched_pdf_version(db, document)
|
||||
except Exception:
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_field_enriched_failed", status_code=303)
|
||||
return RedirectResponse(
|
||||
url=_document_url(document.document_id, error="save_field_enriched_failed", tab="extracted-fields"),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
||||
return RedirectResponse(url=_document_url(document.document_id, tab="extracted-fields"), status_code=303)
|
||||
|
||||
|
||||
@router.post("/{document_id}/review-text", response_class=RedirectResponse)
|
||||
|
|
@ -357,7 +335,13 @@ def save_reviewed_text(
|
|||
|
||||
if expected_line_count and actual_line_count != expected_line_count:
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?error=line_count_mismatch&expected={expected_line_count}&actual={actual_line_count}",
|
||||
url=_document_url(
|
||||
document.document_id,
|
||||
error="line_count_mismatch",
|
||||
expected=expected_line_count,
|
||||
actual=actual_line_count,
|
||||
tab="ocr-review",
|
||||
),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
|
|
@ -393,7 +377,10 @@ def save_reviewed_text(
|
|||
|
||||
db.commit()
|
||||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?editor_source=reviewed", status_code=303)
|
||||
return RedirectResponse(
|
||||
url=_document_url(document.document_id, editor_source="reviewed", tab="ocr-review"),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{document_id}/save-extracted-fields", response_class=RedirectResponse)
|
||||
|
|
@ -441,7 +428,10 @@ def save_extracted_fields_route(
|
|||
extra_json=extra_json,
|
||||
)
|
||||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?autofill_extracted=0", status_code=303)
|
||||
return RedirectResponse(
|
||||
url=_document_url(document.document_id, autofill_extracted=0, tab="extracted-fields"),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{document_id}", response_class=HTMLResponse)
|
||||
|
|
@ -464,6 +454,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
|
||||
|
||||
editor_source = request.query_params.get("editor_source", "reviewed")
|
||||
active_tab = request.query_params.get("tab", "ocr-review")
|
||||
review_text_value = _build_review_text_value(raw_ocr, reviewed_ocr, editor_source)
|
||||
|
||||
expected_line_count = _line_count_from_layout(raw_ocr.layout_json if raw_ocr else None)
|
||||
|
|
@ -489,7 +480,6 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
current_extracted = get_current_extracted_fields(document)
|
||||
queue_nav = _get_queue_navigation(db, document)
|
||||
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request=request,
|
||||
name="documents/detail.html",
|
||||
|
|
@ -498,13 +488,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
"document": document,
|
||||
"prev_doc": queue_nav.get("prev_doc"),
|
||||
"next_doc": queue_nav.get("next_doc"),
|
||||
"next_ocr_doc": queue_nav.get("next_ocr") or queue_nav.get("next_ocr_doc"),
|
||||
"next_fields_doc": queue_nav.get("next_fields") or queue_nav.get("next_fields_doc"),
|
||||
"next_ocr_doc": queue_nav.get("next_ocr_doc"),
|
||||
"next_fields_doc": queue_nav.get("next_fields_doc"),
|
||||
"raw_ocr": raw_ocr,
|
||||
"reviewed_ocr": reviewed_ocr,
|
||||
"review_text_value": review_text_value,
|
||||
"file_url": file_url,
|
||||
"app_url": app_url,
|
||||
"active_tab": active_tab,
|
||||
"quality_flag_options": QUALITY_FLAG_OPTIONS,
|
||||
"current_quality_flags": raw_ocr.quality_flags if raw_ocr and raw_ocr.quality_flags else [],
|
||||
"current_quality_note": raw_ocr.quality_note if raw_ocr and raw_ocr.quality_note else "",
|
||||
|
|
|
|||
Loading…
Reference in New Issue