feat: WIP field versioning (extracted + additional snapshots wired, preparing restore/reset)

This commit is contained in:
Sean McElwain 2026-04-11 22:59:56 -05:00
parent 871ae5401f
commit e15612184d
6 changed files with 211 additions and 22 deletions

View File

@ -4,6 +4,8 @@ from sqlalchemy import Boolean, DateTime, Integer, String, Text
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base from app.db.base import Base
from app.models.document_additional_field_version import DocumentAdditionalFieldVersion
from app.models.extracted_field_version import ExtractedFieldVersion
from app.models.document_line_item_set import DocumentLineItemSet from app.models.document_line_item_set import DocumentLineItemSet
from app.models.document_line_item_set_version import DocumentLineItemSetVersion from app.models.document_line_item_set_version import DocumentLineItemSetVersion
@ -73,6 +75,16 @@ class Document(Base):
back_populates="document", back_populates="document",
cascade="all, delete-orphan", cascade="all, delete-orphan",
) )
extracted_field_versions: Mapped[list["ExtractedFieldVersion"]] = relationship(
back_populates="document",
cascade="all, delete-orphan",
order_by="ExtractedFieldVersion.version_number",
)
additional_field_versions: Mapped[list["DocumentAdditionalFieldVersion"]] = relationship(
back_populates="document",
cascade="all, delete-orphan",
order_by="DocumentAdditionalFieldVersion.version_number",
)
line_item_set: Mapped["DocumentLineItemSet | None"] = relationship( line_item_set: Mapped["DocumentLineItemSet | None"] = relationship(
back_populates="document", back_populates="document",
cascade="all, delete-orphan", cascade="all, delete-orphan",

View File

@ -0,0 +1,38 @@
from datetime import datetime, date
from decimal import Decimal
from sqlalchemy import Boolean, Date, DateTime, ForeignKey, Integer, Numeric, String, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base
class DocumentAdditionalFieldVersion(Base):
__tablename__ = "document_additional_field_versions"
__table_args__ = (
UniqueConstraint("document_id", "version_number", name="uq_document_additional_field_versions_doc_ver"),
)
id: Mapped[int] = mapped_column(primary_key=True, index=True)
document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True)
version_number: Mapped[int] = mapped_column(Integer, nullable=False)
owner_primary: Mapped[str | None] = mapped_column(String(255), nullable=True)
owner_secondary: Mapped[str | None] = mapped_column(String(255), nullable=True)
paid_by_person: Mapped[str | None] = mapped_column(String(255), nullable=True)
occasion_note: Mapped[str | None] = mapped_column(Text, nullable=True)
is_shared_expense: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
covered_people: Mapped[list | None] = mapped_column(JSONB, nullable=True)
attendees: Mapped[list | None] = mapped_column(JSONB, nullable=True)
reimbursement_expected_from: Mapped[list | None] = mapped_column(JSONB, nullable=True)
reimbursement_paid_by: Mapped[str | None] = mapped_column(String(255), nullable=True)
reimbursement_paid_to: Mapped[str | None] = mapped_column(String(255), nullable=True)
reimbursement_paid_amount: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
reimbursement_paid_date: Mapped[date | None] = mapped_column(Date, nullable=True)
reimbursement_note: Mapped[str | None] = mapped_column(Text, nullable=True)
created_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
document: Mapped["Document"] = relationship(back_populates="additional_field_versions")

View File

@ -0,0 +1,38 @@
from datetime import datetime, date
from decimal import Decimal
from sqlalchemy import Date, DateTime, ForeignKey, Integer, Numeric, String, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db.base import Base
class ExtractedFieldVersion(Base):
__tablename__ = "extracted_field_versions"
__table_args__ = (
UniqueConstraint("document_id", "version_number", name="uq_extracted_field_versions_doc_ver"),
)
id: Mapped[int] = mapped_column(primary_key=True, index=True)
document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True)
version_number: Mapped[int] = mapped_column(Integer, nullable=False)
merchant_raw: Mapped[str | None] = mapped_column(Text, nullable=True)
merchant_normalized: Mapped[str | None] = mapped_column(Text, nullable=True)
transaction_date: Mapped[date | None] = mapped_column(Date, nullable=True)
transaction_time: Mapped[str | None] = mapped_column(String(50), nullable=True)
subtotal: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
tax: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
total: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
currency: Mapped[str | None] = mapped_column(String(20), nullable=True)
payment_method: Mapped[str | None] = mapped_column(String(100), nullable=True)
receipt_number: Mapped[str | None] = mapped_column(String(100), nullable=True)
location: Mapped[str | None] = mapped_column(Text, nullable=True)
counterparty: Mapped[str | None] = mapped_column(Text, nullable=True)
extra_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True)
created_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
document: Mapped["Document"] = relationship(back_populates="extracted_field_versions")

View File

@ -32,6 +32,8 @@ from app.logic.extraction import (
from app.logic.ingest import compute_quality_score, rerun_ocr_for_document from app.logic.ingest import compute_quality_score, rerun_ocr_for_document
from app.models.document import Document from app.models.document import Document
from app.models.document_additional_field import DocumentAdditionalField from app.models.document_additional_field import DocumentAdditionalField
from app.models.document_additional_field_version import DocumentAdditionalFieldVersion
from app.models.extracted_field_version import ExtractedFieldVersion
from app.models.document_preset import DocumentPreset from app.models.document_preset import DocumentPreset
from app.models.document_version import DocumentVersion from app.models.document_version import DocumentVersion
from app.models.text_version import TextVersion from app.models.text_version import TextVersion
@ -209,6 +211,64 @@ BASE_DIR = Path(__file__).resolve().parent.parent
templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
templates.env.globals["human_size"] = human_size templates.env.globals["human_size"] = human_size
def _next_extracted_field_version_number(db: Session, document_id: int) -> int:
return (db.query(func.max(ExtractedFieldVersion.version_number))
.filter(ExtractedFieldVersion.document_id == document_id)
.scalar() or 0) + 1
def _next_additional_field_version_number(db: Session, document_id: int) -> int:
return (db.query(func.max(DocumentAdditionalFieldVersion.version_number))
.filter(DocumentAdditionalFieldVersion.document_id == document_id)
.scalar() or 0) + 1
def _snapshot_extracted_field(db: Session, document: Document, row, created_by: str, notes: str | None = None) -> None:
version = ExtractedFieldVersion(
document_id=document.id,
version_number=_next_extracted_field_version_number(db, document.id),
merchant_raw=row.merchant_raw,
merchant_normalized=row.merchant_normalized,
transaction_date=row.transaction_date,
transaction_time=row.transaction_time,
subtotal=row.subtotal,
tax=row.tax,
total=row.total,
currency=row.currency,
payment_method=row.payment_method,
receipt_number=row.receipt_number,
location=row.location,
counterparty=row.counterparty,
extra_json=row.extra_json,
created_by=created_by,
notes=notes,
)
db.add(version)
def _snapshot_additional_field(db: Session, document: Document, row, created_by: str, notes: str | None = None) -> None:
version = DocumentAdditionalFieldVersion(
document_id=document.id,
version_number=_next_additional_field_version_number(db, document.id),
owner_primary=row.owner_primary,
owner_secondary=row.owner_secondary,
paid_by_person=row.paid_by_person,
occasion_note=row.occasion_note,
is_shared_expense=row.is_shared_expense,
covered_people=row.covered_people,
attendees=row.attendees,
reimbursement_expected_from=row.reimbursement_expected_from,
reimbursement_paid_by=row.reimbursement_paid_by,
reimbursement_paid_to=row.reimbursement_paid_to,
reimbursement_paid_amount=row.reimbursement_paid_amount,
reimbursement_paid_date=row.reimbursement_paid_date,
reimbursement_note=row.reimbursement_note,
created_by=created_by,
notes=notes,
)
db.add(version)
QUALITY_FLAG_OPTIONS = [ QUALITY_FLAG_OPTIONS = [
"bad_embedded_text", "bad_embedded_text",
"ocr_garbled", "ocr_garbled",
@ -1050,7 +1110,10 @@ def save_extracted_fields_route(
): ):
document = ( document = (
db.query(Document) db.query(Document)
.options(selectinload(Document.extracted_fields), selectinload(Document.text_versions)) .options(
selectinload(Document.extracted_fields),
selectinload(Document.receipt_line_items),
)
.filter(Document.document_id == document_id) .filter(Document.document_id == document_id)
.first() .first()
) )
@ -1075,8 +1138,22 @@ def save_extracted_fields_route(
extra_json=extra_json, extra_json=extra_json,
) )
return RedirectResponse(url=f"/documents/{document.document_id}?autofill_extracted=0&tab=extracted-fields", status_code=303) db.refresh(document)
current_extracted = get_current_extracted_fields(document)
if current_extracted is not None:
_snapshot_extracted_field(
db,
document,
current_extracted,
created_by="save_extracted_fields",
notes="Saved extracted fields from document detail form.",
)
db.commit()
return RedirectResponse(
url=f"/documents/{document.document_id}?autofill_extracted=0&tab=extracted-fields",
status_code=303,
)
@router.post("/{document_id}/save-additional-fields", response_class=RedirectResponse) @router.post("/{document_id}/save-additional-fields", response_class=RedirectResponse)
def save_additional_fields_route( def save_additional_fields_route(
@ -1105,28 +1182,45 @@ def save_additional_fields_route(
if document is None: if document is None:
return RedirectResponse(url="/documents/", status_code=303) return RedirectResponse(url="/documents/", status_code=303)
current = _get_current_additional_fields(document) additional = document.additional_fields[0] if getattr(document, "additional_fields", None) else None
if current is None: if additional is None:
current = DocumentAdditionalField(document_id=document.id) additional = DocumentAdditionalField(document_id=document.id)
db.add(current) db.add(additional)
db.flush()
current.owner_primary = owner_primary.strip() or None additional.owner_primary = owner_primary or None
current.owner_secondary = owner_secondary.strip() or None additional.owner_secondary = owner_secondary or None
current.paid_by_person = paid_by_person.strip() or None additional.paid_by_person = paid_by_person or None
current.covered_people = _parse_people_list(covered_people) additional.covered_people = [v.strip() for v in covered_people.split(",") if v.strip()] or None
current.attendees = _parse_people_list(attendees) additional.attendees = [v.strip() for v in attendees.split(",") if v.strip()] or None
current.occasion_note = occasion_note.strip() or None additional.occasion_note = occasion_note or None
current.is_shared_expense = bool(is_shared_expense) additional.is_shared_expense = bool(is_shared_expense)
current.reimbursement_expected_from = _parse_people_list(reimbursement_expected_from) additional.reimbursement_expected_from = [v.strip() for v in reimbursement_expected_from.split(",") if v.strip()] or None
current.reimbursement_paid_by = reimbursement_paid_by.strip() or None additional.reimbursement_paid_by = reimbursement_paid_by or None
current.reimbursement_paid_to = reimbursement_paid_to.strip() or None additional.reimbursement_paid_to = reimbursement_paid_to or None
current.reimbursement_paid_amount = _to_decimal(reimbursement_paid_amount) additional.reimbursement_paid_amount = Decimal(reimbursement_paid_amount) if reimbursement_paid_amount.strip() else None
current.reimbursement_paid_date = datetime.strptime(reimbursement_paid_date, "%Y-%m-%d").date() if reimbursement_paid_date else None additional.reimbursement_paid_date = datetime.strptime(reimbursement_paid_date, "%Y-%m-%d").date() if reimbursement_paid_date.strip() else None
current.reimbursement_note = reimbursement_note.strip() or None additional.reimbursement_note = reimbursement_note or None
db.add(additional)
db.commit() db.commit()
return RedirectResponse(url=f"/documents/{document.document_id}?tab=additional-fields", status_code=303)
db.refresh(document)
current_additional = document.additional_fields[0] if getattr(document, "additional_fields", None) else None
if current_additional is not None:
_snapshot_additional_field(
db,
document,
current_additional,
created_by="save_additional_fields",
notes="Saved additional fields from document detail form.",
)
db.commit()
return RedirectResponse(
url=f"/documents/{document.document_id}?tab=additional-fields",
status_code=303,
)
@router.get("/{document_id}", response_class=HTMLResponse) @router.get("/{document_id}", response_class=HTMLResponse)
def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)): def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)):

View File

@ -366,7 +366,7 @@
<h3 style="margin-top:0;">File Source</h3> <h3 style="margin-top:0;">File Source</h3>
<div style="display:flex; flex-direction:column; gap:0.75rem;"> <div style="display:flex; flex-direction:column; gap:0.75rem;">
<label style="display:flex; align-items:center; gap:0.5rem;"> <label style="display:flex; align-items:center; gap:0.5rem;">
<input type="radio" name="file_action" value="revert_original" checked> <input type="radio" name="file_action" value="revert_original">
<span>Revert to original file</span> <span>Revert to original file</span>
</label> </label>
@ -376,7 +376,7 @@
</label> </label>
<label style="display:flex; align-items:center; gap:0.5rem;"> <label style="display:flex; align-items:center; gap:0.5rem;">
<input type="radio" name="file_action" value="none"> <input type="radio" name="file_action" value="none" checked>
<span>No file change</span> <span>No file change</span>
</label> </label>
</div> </div>

7
ions Normal file
View File

@ -0,0 +1,7 @@
document_id | version_number | created_by | created_at
-------------+----------------+-----------------------+----------------------------
22 | 3 | save_extracted_fields | 2026-04-12 03:53:09.819059
22 | 2 | save_extracted_fields | 2026-04-12 03:52:55.113218
22 | 1 | save_extracted_fields | 2026-04-12 03:52:49.494512
(3 rows)