feat: WIP line item system (schema + models + initial integration, mapper fixes pending)
This commit is contained in:
parent
1e37a80894
commit
871ae5401f
|
|
@ -4,6 +4,8 @@ from sqlalchemy import Boolean, DateTime, Integer, String, Text
|
|||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.document_line_item_set import DocumentLineItemSet
|
||||
from app.models.document_line_item_set_version import DocumentLineItemSetVersion
|
||||
|
||||
|
||||
class Document(Base):
|
||||
|
|
@ -71,3 +73,13 @@ class Document(Base):
|
|||
back_populates="document",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
line_item_set: Mapped["DocumentLineItemSet | None"] = relationship(
|
||||
back_populates="document",
|
||||
cascade="all, delete-orphan",
|
||||
uselist=False,
|
||||
)
|
||||
line_item_set_versions: Mapped[list["DocumentLineItemSetVersion"]] = relationship(
|
||||
back_populates="document",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="DocumentLineItemSetVersion.version_number",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from sqlalchemy import JSON, Date, DateTime, ForeignKey, Integer, Numeric, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
class DocumentLineItem(Base):
|
||||
__tablename__ = "document_line_items"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, index=True)
|
||||
line_item_set_id: Mapped[int] = mapped_column(ForeignKey("document_line_item_sets.id"), nullable=False, index=True)
|
||||
|
||||
line_number: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
entry_date: Mapped[date | None] = mapped_column(Date, nullable=True)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
quantity: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
unit_price: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
line_total: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
tax_amount: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
category: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||||
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
raw_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
line_item_set: Mapped["DocumentLineItemSet"] = relationship(back_populates="items")
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
from datetime import datetime
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.document_line_item import DocumentLineItem
|
||||
|
||||
|
||||
class DocumentLineItemSet(Base):
|
||||
__tablename__ = "document_line_item_sets"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("document_id", name="uq_document_line_item_sets_document_id"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, index=True)
|
||||
document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True)
|
||||
schema_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
document: Mapped["Document"] = relationship(back_populates="line_item_set")
|
||||
items: Mapped[list["DocumentLineItem"]] = relationship(
|
||||
back_populates="line_item_set",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="DocumentLineItem.line_number",
|
||||
)
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
from datetime import datetime
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, String, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
from app.models.document_line_item_version_item import DocumentLineItemVersionItem
|
||||
|
||||
|
||||
class DocumentLineItemSetVersion(Base):
|
||||
__tablename__ = "document_line_item_set_versions"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("document_id", "version_number", name="uq_document_line_item_set_versions_doc_ver"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, index=True)
|
||||
document_id: Mapped[int] = mapped_column(ForeignKey("documents.id"), nullable=False, index=True)
|
||||
version_number: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
schema_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
created_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||||
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
document: Mapped["Document"] = relationship(back_populates="line_item_set_versions")
|
||||
items: Mapped[list["DocumentLineItemVersionItem"]] = relationship(
|
||||
back_populates="set_version",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="DocumentLineItemVersionItem.line_number",
|
||||
)
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from sqlalchemy import JSON, Date, DateTime, ForeignKey, Integer, Numeric, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db.base import Base
|
||||
|
||||
|
||||
class DocumentLineItemVersionItem(Base):
|
||||
__tablename__ = "document_line_item_version_items"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, index=True)
|
||||
set_version_id: Mapped[int] = mapped_column(ForeignKey("document_line_item_set_versions.id"), nullable=False, index=True)
|
||||
|
||||
line_number: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
entry_date: Mapped[date | None] = mapped_column(Date, nullable=True)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
quantity: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
unit_price: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
line_total: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
tax_amount: Mapped[Decimal | None] = mapped_column(Numeric(18, 4), nullable=True)
|
||||
category: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||||
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
raw_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
set_version: Mapped["DocumentLineItemSetVersion"] = relationship(back_populates="items")
|
||||
|
|
@ -13,6 +13,7 @@ from fastapi import APIRouter, Depends, Form, Query, Request
|
|||
from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from sqlalchemy import distinct
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
from pypdf import PdfReader
|
||||
|
||||
|
|
@ -32,6 +33,10 @@ from app.logic.ingest import compute_quality_score, rerun_ocr_for_document
|
|||
from app.models.document import Document
|
||||
from app.models.document_additional_field import DocumentAdditionalField
|
||||
from app.models.document_preset import DocumentPreset
|
||||
from app.models.document_version import DocumentVersion
|
||||
from app.models.text_version import TextVersion
|
||||
from app.models.extracted_field import ExtractedField
|
||||
from app.models.document_additional_field import DocumentAdditionalField
|
||||
from app.models.text_version import TextVersion
|
||||
from app.utils.filesize import human_size
|
||||
|
||||
|
|
@ -174,6 +179,32 @@ def _document_export_payload(document) -> dict:
|
|||
"versions": versions,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def _latest_raw_ocr(document):
|
||||
rows = [tv for tv in getattr(document, "text_versions", []) if getattr(tv, "version_type", None) == "raw_ocr"]
|
||||
rows.sort(key=lambda x: x.version_number)
|
||||
return rows[-1] if rows else None
|
||||
|
||||
|
||||
def _clear_current_extracted(db: Session, document: Document) -> None:
|
||||
db.query(ExtractedField).filter(
|
||||
ExtractedField.document_id == document.id
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
|
||||
def _clear_current_additional(db: Session, document: Document) -> None:
|
||||
db.query(DocumentAdditionalField).filter(
|
||||
DocumentAdditionalField.document_id == document.id
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
|
||||
def _reset_ocr_to_raw(db: Session, document: Document) -> None:
|
||||
db.query(TextVersion).filter(
|
||||
TextVersion.document_id == document.id
|
||||
).delete(synchronize_session=False)
|
||||
document.review_status = "pending"
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
|
||||
templates.env.globals["human_size"] = human_size
|
||||
|
|
@ -777,6 +808,131 @@ def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depend
|
|||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?tab=ocr-review", status_code=303)
|
||||
|
||||
|
||||
@router.post("/{document_id}/source-options", response_class=RedirectResponse)
|
||||
def apply_source_options(
|
||||
document_id: str,
|
||||
file_action: str = Form("none"),
|
||||
reset_ocr: str | None = Form(None),
|
||||
clear_extracted: str | None = Form(None),
|
||||
clear_additional: str | None = Form(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
document = (
|
||||
db.query(Document)
|
||||
.options(
|
||||
selectinload(Document.text_versions),
|
||||
selectinload(Document.naming_fields),
|
||||
selectinload(Document.extracted_fields),
|
||||
selectinload(Document.additional_fields),
|
||||
selectinload(Document.versions),
|
||||
)
|
||||
.filter(Document.document_id == document_id)
|
||||
.first()
|
||||
)
|
||||
if document is None:
|
||||
return RedirectResponse(url="/documents/", status_code=303)
|
||||
|
||||
try:
|
||||
changed = False
|
||||
|
||||
if file_action == "revert_original":
|
||||
original_path = document.original_path or document.source_path
|
||||
if original_path:
|
||||
original_file = Path(original_path)
|
||||
if original_file.exists():
|
||||
document.current_path = str(original_file)
|
||||
document.canonical_filename = original_file.name
|
||||
document.sha256_current = _sha256_for_file(original_file)
|
||||
db.add(document)
|
||||
|
||||
next_version_number = (
|
||||
db.query(func.max(DocumentVersion.version_number))
|
||||
.filter(DocumentVersion.document_id == document.id)
|
||||
.scalar() or 0
|
||||
) + 1
|
||||
|
||||
version = DocumentVersion(
|
||||
document_id=document.id,
|
||||
version_number=next_version_number,
|
||||
version_type="reverted_original",
|
||||
file_path=str(original_file),
|
||||
sha256=document.sha256_current,
|
||||
file_size_bytes=original_file.stat().st_size,
|
||||
created_by="source_options",
|
||||
notes="Reverted current file to original source file.",
|
||||
)
|
||||
db.add(version)
|
||||
changed = True
|
||||
|
||||
elif file_action == "revert_current_version":
|
||||
latest_version = (
|
||||
db.query(DocumentVersion)
|
||||
.filter(
|
||||
DocumentVersion.document_id == document.id,
|
||||
DocumentVersion.version_type.in_(["original", "ocr_corrected", "field_enriched"])
|
||||
)
|
||||
.order_by(DocumentVersion.version_number.desc())
|
||||
.first()
|
||||
)
|
||||
if latest_version and latest_version.file_path:
|
||||
version_file = Path(latest_version.file_path)
|
||||
if version_file.exists():
|
||||
document.current_path = str(version_file)
|
||||
document.canonical_filename = version_file.name
|
||||
document.sha256_current = _sha256_for_file(version_file)
|
||||
db.add(document)
|
||||
|
||||
next_version_number = (
|
||||
db.query(func.max(DocumentVersion.version_number))
|
||||
.filter(DocumentVersion.document_id == document.id)
|
||||
.scalar() or 0
|
||||
) + 1
|
||||
|
||||
version = DocumentVersion(
|
||||
document_id=document.id,
|
||||
version_number=next_version_number,
|
||||
version_type="reverted_current_version",
|
||||
file_path=str(version_file),
|
||||
sha256=document.sha256_current,
|
||||
file_size_bytes=version_file.stat().st_size,
|
||||
created_by="source_options",
|
||||
notes=f"Reverted current file to latest saved version v{latest_version.version_number}.",
|
||||
)
|
||||
db.add(version)
|
||||
changed = True
|
||||
|
||||
if reset_ocr:
|
||||
_reset_ocr_to_raw(db, document)
|
||||
changed = True
|
||||
|
||||
if clear_extracted:
|
||||
_clear_current_extracted(db, document)
|
||||
changed = True
|
||||
|
||||
if clear_additional:
|
||||
_clear_current_additional(db, document)
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
db.commit()
|
||||
else:
|
||||
db.rollback()
|
||||
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?tab=source-options",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print("source_options failed:", repr(e), flush=True)
|
||||
traceback.print_exc()
|
||||
db.rollback()
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?error=source_options_failed&tab=source-options",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
|
||||
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
||||
document = (
|
||||
|
|
@ -1050,7 +1206,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
version_rows.append((version, file_exists))
|
||||
|
||||
active_tab = request.query_params.get("tab", "ocr-review")
|
||||
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "versions", "raw-ocr"}:
|
||||
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "versions", "raw-ocr", "source-options"}:
|
||||
active_tab = "ocr-review"
|
||||
|
||||
return templates.TemplateResponse(
|
||||
|
|
@ -1126,3 +1282,56 @@ def export_reviewed_jsonl(db: Session = Depends(get_db)):
|
|||
filename=out_path.name,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{document_id}/source-options", response_class=RedirectResponse)
|
||||
def apply_source_options(
|
||||
document_id: str,
|
||||
file_action: str = Form("none"),
|
||||
reset_ocr: str | None = Form(None),
|
||||
clear_extracted: str | None = Form(None),
|
||||
clear_additional: str | None = Form(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||
if not document:
|
||||
return RedirectResponse(url="/documents/", status_code=303)
|
||||
|
||||
try:
|
||||
# ---- File revert ----
|
||||
if file_action == "revert_original":
|
||||
if document.original_path:
|
||||
document.current_path = document.original_path
|
||||
|
||||
# ---- Reset OCR ----
|
||||
if reset_ocr:
|
||||
db.query(TextVersion).filter(
|
||||
TextVersion.document_id == document.id
|
||||
).delete()
|
||||
document.review_status = "pending"
|
||||
|
||||
# ---- Clear extracted ----
|
||||
if clear_extracted:
|
||||
db.query(ExtractedField).filter(
|
||||
ExtractedField.document_id == document.id
|
||||
).delete()
|
||||
|
||||
# ---- Clear additional ----
|
||||
if clear_additional:
|
||||
db.query(DocumentAdditionalField).filter(
|
||||
DocumentAdditionalField.document_id == document.id
|
||||
).delete()
|
||||
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
print("source-options failed:", repr(e), flush=True)
|
||||
db.rollback()
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?error=source_options_failed&tab=source-options",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?tab=source-options",
|
||||
status_code=303,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -134,6 +134,7 @@
|
|||
<button class="tab-button{% if active_tab == 'additional-fields' %} active{% endif %}" type="button" data-tab="additional-fields">Additional Fields</button>
|
||||
<button class="tab-button{% if active_tab == 'versions' %} active{% endif %}" type="button" data-tab="versions">Versions</button>
|
||||
<button class="tab-button{% if active_tab == 'raw-ocr' %} active{% endif %}" type="button" data-tab="raw-ocr">Raw OCR</button>
|
||||
<button class="tab-button{% if active_tab == 'source-options' %} active{% endif %}" type="button" data-tab="source-options">Source Options</button>
|
||||
</div>
|
||||
|
||||
<div class="tab-panel{% if active_tab == 'ocr-review' %} active{% endif %}" data-panel="ocr-review">
|
||||
|
|
@ -356,6 +357,55 @@
|
|||
{% endif %}
|
||||
</div>
|
||||
|
||||
|
||||
<div class="tab-panel{% if active_tab == 'source-options' %} active{% endif %}" data-panel="source-options">
|
||||
<h2 class="card-title">Source Options</h2>
|
||||
|
||||
<form method="post" action="/documents/{{ document.document_id }}/source-options" style="display:flex; flex-direction:column; gap:1rem;" enctype="multipart/form-data">
|
||||
<div class="card" style="padding:1rem;">
|
||||
<h3 style="margin-top:0;">File Source</h3>
|
||||
<div style="display:flex; flex-direction:column; gap:0.75rem;">
|
||||
<label style="display:flex; align-items:center; gap:0.5rem;">
|
||||
<input type="radio" name="file_action" value="revert_original" checked>
|
||||
<span>Revert to original file</span>
|
||||
</label>
|
||||
|
||||
<label style="display:flex; align-items:center; gap:0.5rem;">
|
||||
<input type="radio" name="file_action" value="revert_current_version">
|
||||
<span>Revert to current saved version</span>
|
||||
</label>
|
||||
|
||||
<label style="display:flex; align-items:center; gap:0.5rem;">
|
||||
<input type="radio" name="file_action" value="none">
|
||||
<span>No file change</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card" style="padding:1rem;">
|
||||
<h3 style="margin-top:0;">Data Reset</h3>
|
||||
<div style="display:flex; flex-direction:column; gap:0.75rem;">
|
||||
<label style="display:flex; align-items:center; gap:0.5rem;">
|
||||
<input type="checkbox" name="reset_ocr" value="1">
|
||||
<span>Reset OCR</span>
|
||||
</label>
|
||||
<label style="display:flex; align-items:center; gap:0.5rem;">
|
||||
<input type="checkbox" name="clear_extracted" value="1">
|
||||
<span>Clear extracted fields</span>
|
||||
</label>
|
||||
<label style="display:flex; align-items:center; gap:0.5rem;">
|
||||
<input type="checkbox" name="clear_additional" value="1">
|
||||
<span>Clear additional fields</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<button class="btn btn-primary" type="submit">Apply Source Options</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="tab-panel{% if active_tab == 'raw-ocr' %} active{% endif %}" data-panel="raw-ocr">
|
||||
<h2 class="card-title">Raw OCR</h2>
|
||||
{% if raw_ocr %}
|
||||
|
|
|
|||
Loading…
Reference in New Issue