diff --git a/app/models/document.py b/app/models/document.py index 708930a..180d548 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -4,6 +4,8 @@ from sqlalchemy import Boolean, DateTime, Integer, String, Text from sqlalchemy.orm import Mapped, mapped_column, relationship from app.db.base import Base +from app.models.document_additional_field_version import DocumentAdditionalFieldVersion +from app.models.extracted_field_version import ExtractedFieldVersion from app.models.document_line_item_set import DocumentLineItemSet from app.models.document_line_item_set_version import DocumentLineItemSetVersion @@ -73,6 +75,16 @@ class Document(Base): back_populates="document", cascade="all, delete-orphan", ) + extracted_field_versions: Mapped[list["ExtractedFieldVersion"]] = relationship( + back_populates="document", + cascade="all, delete-orphan", + order_by="ExtractedFieldVersion.version_number", + ) + additional_field_versions: Mapped[list["DocumentAdditionalFieldVersion"]] = relationship( + back_populates="document", + cascade="all, delete-orphan", + order_by="DocumentAdditionalFieldVersion.version_number", + ) line_item_set: Mapped["DocumentLineItemSet | None"] = relationship( back_populates="document", cascade="all, delete-orphan", diff --git a/app/models/document_additional_field_version.py b/app/models/document_additional_field_version.py new file mode 100644 index 0000000..f4f4e10 --- /dev/null +++ b/app/models/document_additional_field_version.py @@ -0,0 +1,38 @@ +from datetime import datetime, date +from decimal import Decimal +from sqlalchemy import Boolean, Date, DateTime, ForeignKey, Integer, Numeric, String, Text, UniqueConstraint +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base + + +class DocumentAdditionalFieldVersion(Base): + __tablename__ = "document_additional_field_versions" + __table_args__ = ( + UniqueConstraint("document_id", "version_number", name="uq_document_additional_field_versions_doc_ver"), + ) + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True) + version_number: Mapped[int] = mapped_column(Integer, nullable=False) + + owner_primary: Mapped[str | None] = mapped_column(String(255), nullable=True) + owner_secondary: Mapped[str | None] = mapped_column(String(255), nullable=True) + paid_by_person: Mapped[str | None] = mapped_column(String(255), nullable=True) + occasion_note: Mapped[str | None] = mapped_column(Text, nullable=True) + is_shared_expense: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) + covered_people: Mapped[list | None] = mapped_column(JSONB, nullable=True) + attendees: Mapped[list | None] = mapped_column(JSONB, nullable=True) + reimbursement_expected_from: Mapped[list | None] = mapped_column(JSONB, nullable=True) + reimbursement_paid_by: Mapped[str | None] = mapped_column(String(255), nullable=True) + reimbursement_paid_to: Mapped[str | None] = mapped_column(String(255), nullable=True) + reimbursement_paid_amount: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) + reimbursement_paid_date: Mapped[date | None] = mapped_column(Date, nullable=True) + reimbursement_note: Mapped[str | None] = mapped_column(Text, nullable=True) + + created_by: Mapped[str | None] = mapped_column(String(100), nullable=True) + notes: Mapped[str | None] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False) + + document: Mapped["Document"] = relationship(back_populates="additional_field_versions") diff --git a/app/models/extracted_field_version.py b/app/models/extracted_field_version.py new file mode 100644 index 0000000..19698ed --- /dev/null +++ b/app/models/extracted_field_version.py @@ -0,0 +1,38 @@ +from datetime import datetime, date +from decimal import Decimal +from sqlalchemy import Date, DateTime, ForeignKey, Integer, Numeric, String, Text, UniqueConstraint +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.db.base import Base + + +class ExtractedFieldVersion(Base): + __tablename__ = "extracted_field_versions" + __table_args__ = ( + UniqueConstraint("document_id", "version_number", name="uq_extracted_field_versions_doc_ver"), + ) + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True) + version_number: Mapped[int] = mapped_column(Integer, nullable=False) + + merchant_raw: Mapped[str | None] = mapped_column(Text, nullable=True) + merchant_normalized: Mapped[str | None] = mapped_column(Text, nullable=True) + transaction_date: Mapped[date | None] = mapped_column(Date, nullable=True) + transaction_time: Mapped[str | None] = mapped_column(String(50), nullable=True) + subtotal: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) + tax: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) + total: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True) + currency: Mapped[str | None] = mapped_column(String(20), nullable=True) + payment_method: Mapped[str | None] = mapped_column(String(100), nullable=True) + receipt_number: Mapped[str | None] = mapped_column(String(100), nullable=True) + location: Mapped[str | None] = mapped_column(Text, nullable=True) + counterparty: Mapped[str | None] = mapped_column(Text, nullable=True) + extra_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) + + created_by: Mapped[str | None] = mapped_column(String(100), nullable=True) + notes: Mapped[str | None] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False) + + document: Mapped["Document"] = relationship(back_populates="extracted_field_versions") diff --git a/app/routes/documents.py b/app/routes/documents.py index 322458a..ec840a5 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -32,6 +32,8 @@ from app.logic.extraction import ( from app.logic.ingest import compute_quality_score, rerun_ocr_for_document from app.models.document import Document from app.models.document_additional_field import DocumentAdditionalField +from app.models.document_additional_field_version import DocumentAdditionalFieldVersion +from app.models.extracted_field_version import ExtractedFieldVersion from app.models.document_preset import DocumentPreset from app.models.document_version import DocumentVersion from app.models.text_version import TextVersion @@ -209,6 +211,64 @@ BASE_DIR = Path(__file__).resolve().parent.parent templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) templates.env.globals["human_size"] = human_size + +def _next_extracted_field_version_number(db: Session, document_id: int) -> int: + return (db.query(func.max(ExtractedFieldVersion.version_number)) + .filter(ExtractedFieldVersion.document_id == document_id) + .scalar() or 0) + 1 + + +def _next_additional_field_version_number(db: Session, document_id: int) -> int: + return (db.query(func.max(DocumentAdditionalFieldVersion.version_number)) + .filter(DocumentAdditionalFieldVersion.document_id == document_id) + .scalar() or 0) + 1 + + +def _snapshot_extracted_field(db: Session, document: Document, row, created_by: str, notes: str | None = None) -> None: + version = ExtractedFieldVersion( + document_id=document.id, + version_number=_next_extracted_field_version_number(db, document.id), + merchant_raw=row.merchant_raw, + merchant_normalized=row.merchant_normalized, + transaction_date=row.transaction_date, + transaction_time=row.transaction_time, + subtotal=row.subtotal, + tax=row.tax, + total=row.total, + currency=row.currency, + payment_method=row.payment_method, + receipt_number=row.receipt_number, + location=row.location, + counterparty=row.counterparty, + extra_json=row.extra_json, + created_by=created_by, + notes=notes, + ) + db.add(version) + + +def _snapshot_additional_field(db: Session, document: Document, row, created_by: str, notes: str | None = None) -> None: + version = DocumentAdditionalFieldVersion( + document_id=document.id, + version_number=_next_additional_field_version_number(db, document.id), + owner_primary=row.owner_primary, + owner_secondary=row.owner_secondary, + paid_by_person=row.paid_by_person, + occasion_note=row.occasion_note, + is_shared_expense=row.is_shared_expense, + covered_people=row.covered_people, + attendees=row.attendees, + reimbursement_expected_from=row.reimbursement_expected_from, + reimbursement_paid_by=row.reimbursement_paid_by, + reimbursement_paid_to=row.reimbursement_paid_to, + reimbursement_paid_amount=row.reimbursement_paid_amount, + reimbursement_paid_date=row.reimbursement_paid_date, + reimbursement_note=row.reimbursement_note, + created_by=created_by, + notes=notes, + ) + db.add(version) + QUALITY_FLAG_OPTIONS = [ "bad_embedded_text", "ocr_garbled", @@ -1050,7 +1110,10 @@ def save_extracted_fields_route( ): document = ( db.query(Document) - .options(selectinload(Document.extracted_fields), selectinload(Document.text_versions)) + .options( + selectinload(Document.extracted_fields), + selectinload(Document.receipt_line_items), + ) .filter(Document.document_id == document_id) .first() ) @@ -1075,8 +1138,22 @@ def save_extracted_fields_route( extra_json=extra_json, ) - return RedirectResponse(url=f"/documents/{document.document_id}?autofill_extracted=0&tab=extracted-fields", status_code=303) + db.refresh(document) + current_extracted = get_current_extracted_fields(document) + if current_extracted is not None: + _snapshot_extracted_field( + db, + document, + current_extracted, + created_by="save_extracted_fields", + notes="Saved extracted fields from document detail form.", + ) + db.commit() + return RedirectResponse( + url=f"/documents/{document.document_id}?autofill_extracted=0&tab=extracted-fields", + status_code=303, + ) @router.post("/{document_id}/save-additional-fields", response_class=RedirectResponse) def save_additional_fields_route( @@ -1105,28 +1182,45 @@ def save_additional_fields_route( if document is None: return RedirectResponse(url="/documents/", status_code=303) - current = _get_current_additional_fields(document) - if current is None: - current = DocumentAdditionalField(document_id=document.id) - db.add(current) + additional = document.additional_fields[0] if getattr(document, "additional_fields", None) else None + if additional is None: + additional = DocumentAdditionalField(document_id=document.id) + db.add(additional) + db.flush() - current.owner_primary = owner_primary.strip() or None - current.owner_secondary = owner_secondary.strip() or None - current.paid_by_person = paid_by_person.strip() or None - current.covered_people = _parse_people_list(covered_people) - current.attendees = _parse_people_list(attendees) - current.occasion_note = occasion_note.strip() or None - current.is_shared_expense = bool(is_shared_expense) - current.reimbursement_expected_from = _parse_people_list(reimbursement_expected_from) - current.reimbursement_paid_by = reimbursement_paid_by.strip() or None - current.reimbursement_paid_to = reimbursement_paid_to.strip() or None - current.reimbursement_paid_amount = _to_decimal(reimbursement_paid_amount) - current.reimbursement_paid_date = datetime.strptime(reimbursement_paid_date, "%Y-%m-%d").date() if reimbursement_paid_date else None - current.reimbursement_note = reimbursement_note.strip() or None + additional.owner_primary = owner_primary or None + additional.owner_secondary = owner_secondary or None + additional.paid_by_person = paid_by_person or None + additional.covered_people = [v.strip() for v in covered_people.split(",") if v.strip()] or None + additional.attendees = [v.strip() for v in attendees.split(",") if v.strip()] or None + additional.occasion_note = occasion_note or None + additional.is_shared_expense = bool(is_shared_expense) + additional.reimbursement_expected_from = [v.strip() for v in reimbursement_expected_from.split(",") if v.strip()] or None + additional.reimbursement_paid_by = reimbursement_paid_by or None + additional.reimbursement_paid_to = reimbursement_paid_to or None + additional.reimbursement_paid_amount = Decimal(reimbursement_paid_amount) if reimbursement_paid_amount.strip() else None + additional.reimbursement_paid_date = datetime.strptime(reimbursement_paid_date, "%Y-%m-%d").date() if reimbursement_paid_date.strip() else None + additional.reimbursement_note = reimbursement_note or None + db.add(additional) db.commit() - return RedirectResponse(url=f"/documents/{document.document_id}?tab=additional-fields", status_code=303) + db.refresh(document) + current_additional = document.additional_fields[0] if getattr(document, "additional_fields", None) else None + if current_additional is not None: + _snapshot_additional_field( + db, + document, + current_additional, + created_by="save_additional_fields", + notes="Saved additional fields from document detail form.", + ) + db.commit() + + return RedirectResponse( + url=f"/documents/{document.document_id}?tab=additional-fields", + status_code=303, + ) @router.get("/{document_id}", response_class=HTMLResponse) def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)): diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html index 438dcf0..dc7def1 100644 --- a/app/templates/documents/detail.html +++ b/app/templates/documents/detail.html @@ -366,7 +366,7 @@

File Source

@@ -376,7 +376,7 @@
diff --git a/ions b/ions new file mode 100644 index 0000000..61de503 --- /dev/null +++ b/ions @@ -0,0 +1,7 @@ + document_id | version_number | created_by | created_at +-------------+----------------+-----------------------+---------------------------- + 22 | 3 | save_extracted_fields | 2026-04-12 03:53:09.819059 + 22 | 2 | save_extracted_fields | 2026-04-12 03:52:55.113218 + 22 | 1 | save_extracted_fields | 2026-04-12 03:52:49.494512 +(3 rows) +