feat: Phase 4.2 version display + source options (OCR/extracted/additional)

This commit is contained in:
Sean McElwain 2026-04-17 10:03:31 -05:00
parent e15612184d
commit bb8cde4c47
2 changed files with 466 additions and 186 deletions

View File

@ -46,10 +46,16 @@ router = APIRouter(prefix="/documents", tags=["documents"])
def _storage_available() -> bool: def _storage_available() -> bool:
storage_root = Path("/mnt/svr-01/storage") candidate_roots = [
Path("/mnt/storage"),
Path("/mnt/svr-01/storage"),
]
try: try:
return storage_root.exists() and storage_root.is_mount() and storage_root.is_dir() and os.access(storage_root, os.R_OK | os.X_OK) for root in candidate_roots:
if root.exists() and root.is_dir() and os.access(root, os.R_OK | os.X_OK):
return True
except Exception: except Exception:
pass
return False return False
@ -247,6 +253,149 @@ def _snapshot_extracted_field(db: Session, document: Document, row, created_by:
db.add(version) db.add(version)
# =========================
# RESTORE HELPERS (NO SNAPSHOT)
# =========================
def _restore_extracted_to_original(db: Session, document: Document) -> bool:
return _restore_extracted_from_version_number(db, document, 1)
def _restore_extracted_from_version_number(db: Session, document: Document, target_version_number: int) -> bool:
version = (
db.query(ExtractedFieldVersion)
.filter(
ExtractedFieldVersion.document_id == document.id,
ExtractedFieldVersion.version_number == target_version_number,
)
.first()
)
if not version:
return False
row = (
db.query(ExtractedField)
.filter(ExtractedField.document_id == document.id)
.first()
)
if not row:
return False
# overwrite live row (NO NEW VERSION)
row.merchant_raw = version.merchant_raw
row.merchant_normalized = version.merchant_normalized
row.transaction_date = version.transaction_date
row.transaction_time = version.transaction_time
row.subtotal = version.subtotal
row.tax = version.tax
row.total = version.total
row.currency = version.currency
row.payment_method = version.payment_method
row.receipt_number = version.receipt_number
row.location = version.location
row.counterparty = version.counterparty
row.extra_json = version.extra_json
db.add(row)
return True
row = (
db.query(ExtractedField)
.filter(ExtractedField.document_id == document.id)
.first()
)
if row is None:
row = ExtractedField(document_id=document.id)
db.add(row)
row.merchant_raw = target.merchant_raw
row.merchant_normalized = target.merchant_normalized
row.transaction_date = target.transaction_date
row.transaction_time = target.transaction_time
row.subtotal = target.subtotal
row.tax = target.tax
row.total = target.total
row.currency = target.currency
row.payment_method = target.payment_method
row.receipt_number = target.receipt_number
row.location = target.location
row.counterparty = target.counterparty
row.extra_json = target.extra_json
db.add(row)
return True
def _restore_additional_to_original(db: Session, document: Document) -> bool:
return _restore_additional_from_version_number(db, document, 1)
def _restore_additional_from_version_number(db: Session, document: Document, target_version_number: int) -> bool:
version = (
db.query(DocumentAdditionalFieldVersion)
.filter(
DocumentAdditionalFieldVersion.document_id == document.id,
DocumentAdditionalFieldVersion.version_number == target_version_number,
)
.first()
)
if not version:
return False
row = (
db.query(DocumentAdditionalField)
.filter(DocumentAdditionalField.document_id == document.id)
.first()
)
if not row:
return False
# overwrite live row (NO NEW VERSION)
row.owner_primary = version.owner_primary
row.owner_secondary = version.owner_secondary
row.paid_by_person = version.paid_by_person
row.occasion_note = version.occasion_note
row.is_shared_expense = version.is_shared_expense
row.covered_people = version.covered_people
row.attendees = version.attendees
row.reimbursement_expected_from = version.reimbursement_expected_from
row.reimbursement_paid_by = version.reimbursement_paid_by
row.reimbursement_paid_to = version.reimbursement_paid_to
row.reimbursement_paid_amount = version.reimbursement_paid_amount
row.reimbursement_paid_date = version.reimbursement_paid_date
row.reimbursement_note = version.reimbursement_note
db.add(row)
return True
row = (
db.query(DocumentAdditionalField)
.filter(DocumentAdditionalField.document_id == document.id)
.first()
)
if row is None:
row = DocumentAdditionalField(document_id=document.id)
db.add(row)
row.owner_primary = target.owner_primary
row.owner_secondary = target.owner_secondary
row.paid_by_person = target.paid_by_person
row.occasion_note = target.occasion_note
row.is_shared_expense = target.is_shared_expense
row.covered_people = target.covered_people
row.attendees = target.attendees
row.reimbursement_expected_from = target.reimbursement_expected_from
row.reimbursement_paid_by = target.reimbursement_paid_by
row.reimbursement_paid_to = target.reimbursement_paid_to
row.reimbursement_paid_amount = target.reimbursement_paid_amount
row.reimbursement_paid_date = target.reimbursement_paid_date
row.reimbursement_note = target.reimbursement_note
db.add(row)
return True
def _snapshot_additional_field(db: Session, document: Document, row, created_by: str, notes: str | None = None) -> None: def _snapshot_additional_field(db: Session, document: Document, row, created_by: str, notes: str | None = None) -> None:
version = DocumentAdditionalFieldVersion( version = DocumentAdditionalFieldVersion(
document_id=document.id, document_id=document.id,
@ -869,129 +1018,6 @@ def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depend
return RedirectResponse(url=f"/documents/{document.document_id}?tab=ocr-review", status_code=303) return RedirectResponse(url=f"/documents/{document.document_id}?tab=ocr-review", status_code=303)
@router.post("/{document_id}/source-options", response_class=RedirectResponse)
def apply_source_options(
document_id: str,
file_action: str = Form("none"),
reset_ocr: str | None = Form(None),
clear_extracted: str | None = Form(None),
clear_additional: str | None = Form(None),
db: Session = Depends(get_db),
):
document = (
db.query(Document)
.options(
selectinload(Document.text_versions),
selectinload(Document.naming_fields),
selectinload(Document.extracted_fields),
selectinload(Document.additional_fields),
selectinload(Document.versions),
)
.filter(Document.document_id == document_id)
.first()
)
if document is None:
return RedirectResponse(url="/documents/", status_code=303)
try:
changed = False
if file_action == "revert_original":
original_path = document.original_path or document.source_path
if original_path:
original_file = Path(original_path)
if original_file.exists():
document.current_path = str(original_file)
document.canonical_filename = original_file.name
document.sha256_current = _sha256_for_file(original_file)
db.add(document)
next_version_number = (
db.query(func.max(DocumentVersion.version_number))
.filter(DocumentVersion.document_id == document.id)
.scalar() or 0
) + 1
version = DocumentVersion(
document_id=document.id,
version_number=next_version_number,
version_type="reverted_original",
file_path=str(original_file),
sha256=document.sha256_current,
file_size_bytes=original_file.stat().st_size,
created_by="source_options",
notes="Reverted current file to original source file.",
)
db.add(version)
changed = True
elif file_action == "revert_current_version":
latest_version = (
db.query(DocumentVersion)
.filter(
DocumentVersion.document_id == document.id,
DocumentVersion.version_type.in_(["original", "ocr_corrected", "field_enriched"])
)
.order_by(DocumentVersion.version_number.desc())
.first()
)
if latest_version and latest_version.file_path:
version_file = Path(latest_version.file_path)
if version_file.exists():
document.current_path = str(version_file)
document.canonical_filename = version_file.name
document.sha256_current = _sha256_for_file(version_file)
db.add(document)
next_version_number = (
db.query(func.max(DocumentVersion.version_number))
.filter(DocumentVersion.document_id == document.id)
.scalar() or 0
) + 1
version = DocumentVersion(
document_id=document.id,
version_number=next_version_number,
version_type="reverted_current_version",
file_path=str(version_file),
sha256=document.sha256_current,
file_size_bytes=version_file.stat().st_size,
created_by="source_options",
notes=f"Reverted current file to latest saved version v{latest_version.version_number}.",
)
db.add(version)
changed = True
if reset_ocr:
_reset_ocr_to_raw(db, document)
changed = True
if clear_extracted:
_clear_current_extracted(db, document)
changed = True
if clear_additional:
_clear_current_additional(db, document)
changed = True
if changed:
db.commit()
else:
db.rollback()
return RedirectResponse(
url=f"/documents/{document.document_id}?tab=source-options",
status_code=303,
)
except Exception as e:
print("source_options failed:", repr(e), flush=True)
traceback.print_exc()
db.rollback()
return RedirectResponse(
url=f"/documents/{document.document_id}?error=source_options_failed&tab=source-options",
status_code=303,
)
@router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse) @router.post("/{document_id}/save-field-enriched-pdf", response_class=RedirectResponse)
def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)): def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
@ -1222,6 +1248,21 @@ def save_additional_fields_route(
status_code=303, status_code=303,
) )
@router.get("/{document_id}/preview-file")
def document_preview_file(document_id: str, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
if document is None or not document.current_path:
return HTMLResponse(content="Preview file not found", status_code=404)
path_obj = Path(document.current_path)
if not path_obj.exists() or not path_obj.is_file():
return HTMLResponse(content="Preview file not found", status_code=404)
media_type = document.mime_type or "application/octet-stream"
return FileResponse(path=str(path_obj), media_type=media_type, filename=path_obj.name, headers={"Content-Disposition": "inline; filename=\"" + path_obj.name + "\""})
@router.get("/{document_id}", response_class=HTMLResponse) @router.get("/{document_id}", response_class=HTMLResponse)
def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)): def document_detail(document_id: str, request: Request, queue: str | None = None, db: Session = Depends(get_db)):
document = ( document = (
@ -1241,6 +1282,17 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
return HTMLResponse(content="Document not found", status_code=404) return HTMLResponse(content="Document not found", status_code=404)
raw_ocr, reviewed_ocr = _get_current_text_versions(document) raw_ocr, reviewed_ocr = _get_current_text_versions(document)
current_text_version = next(
(
tv for tv in sorted(
getattr(document, "text_versions", []),
key=lambda x: (x.version_number, x.created_at),
reverse=True,
)
if tv.is_current
),
None,
)
editor_source = request.query_params.get("editor_source", "reviewed") editor_source = request.query_params.get("editor_source", "reviewed")
review_text_value = _build_review_text_value(raw_ocr, reviewed_ocr, editor_source) review_text_value = _build_review_text_value(raw_ocr, reviewed_ocr, editor_source)
@ -1251,17 +1303,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
file_url = None file_url = None
storage_available = _storage_available() storage_available = _storage_available()
if storage_available and document.current_path: if document.current_path:
storage_root = Path("/mnt/svr-01/storage")
current_path = Path(document.current_path) current_path = Path(document.current_path)
try: if current_path.exists() and current_path.is_file():
rel = current_path.relative_to(storage_root) file_url = str(request.url_for("document_preview_file", document_id=document.document_id))
file_url = f"/files/{rel.as_posix()}"
except Exception:
file_url = None
app_url = str(request.url_for("document_detail", document_id=document.document_id)) app_url = str(request.url_for("document_detail", document_id=document.document_id))
error = request.query_params.get("error") error = request.query_params.get("error")
success = request.query_params.get("success")
error_expected = request.query_params.get("expected") error_expected = request.query_params.get("expected")
error_actual = request.query_params.get("actual") error_actual = request.query_params.get("actual")
@ -1279,6 +1328,8 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
additional_form = _additional_field_form_values(document, selected_preset) additional_form = _additional_field_form_values(document, selected_preset)
current_extracted = get_current_extracted_fields(document) current_extracted = get_current_extracted_fields(document)
current_additional = _get_current_additional_fields(document) current_additional = _get_current_additional_fields(document)
current_extracted_version_number = _get_current_extracted_version_number(document)
current_additional_version_number = _get_current_additional_version_number(document)
queue_nav = _get_queue_navigation(db, document) queue_nav = _get_queue_navigation(db, document)
naming_row = document.naming_fields[0] if getattr(document, "naming_fields", None) else None naming_row = document.naming_fields[0] if getattr(document, "naming_fields", None) else None
@ -1299,6 +1350,19 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
file_exists = _version_file_available(version, document.document_id) file_exists = _version_file_available(version, document.document_id)
version_rows.append((version, file_exists)) version_rows.append((version, file_exists))
ocr_version_options = [
(v.version_number, v.version_type, v.created_at)
for v in sorted(getattr(document, "text_versions", []), key=lambda v: v.version_number, reverse=True)
]
extracted_version_options = [
(v.version_number, v.created_at)
for v in sorted(getattr(document, "extracted_field_versions", []), key=lambda v: v.version_number, reverse=True)
]
additional_version_options = [
(v.version_number, v.created_at)
for v in sorted(getattr(document, "additional_field_versions", []), key=lambda v: v.version_number, reverse=True)
]
active_tab = request.query_params.get("tab", "ocr-review") active_tab = request.query_params.get("tab", "ocr-review")
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "versions", "raw-ocr", "source-options"}: if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "versions", "raw-ocr", "source-options"}:
active_tab = "ocr-review" active_tab = "ocr-review"
@ -1317,10 +1381,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
"next_fields_doc": queue_nav.get("next_fields_doc"), "next_fields_doc": queue_nav.get("next_fields_doc"),
"raw_ocr": raw_ocr, "raw_ocr": raw_ocr,
"reviewed_ocr": reviewed_ocr, "reviewed_ocr": reviewed_ocr,
"current_text_version": current_text_version,
"review_text_value": review_text_value, "review_text_value": review_text_value,
"file_url": file_url, "file_url": file_url,
"storage_available": storage_available, "storage_available": storage_available,
"version_rows": version_rows, "version_rows": version_rows,
"ocr_version_options": ocr_version_options,
"extracted_version_options": extracted_version_options,
"additional_version_options": additional_version_options,
"app_url": app_url, "app_url": app_url,
"quality_flag_options": QUALITY_FLAG_OPTIONS, "quality_flag_options": QUALITY_FLAG_OPTIONS,
"current_quality_flags": raw_ocr.quality_flags if raw_ocr and raw_ocr.quality_flags else [], "current_quality_flags": raw_ocr.quality_flags if raw_ocr and raw_ocr.quality_flags else [],
@ -1329,12 +1397,15 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
"expected_line_count": expected_line_count, "expected_line_count": expected_line_count,
"actual_line_count": actual_line_count, "actual_line_count": actual_line_count,
"error": error, "error": error,
"success": success,
"error_expected": error_expected, "error_expected": error_expected,
"error_actual": error_actual, "error_actual": error_actual,
"extracted_form": extracted_form, "extracted_form": extracted_form,
"current_extracted": current_extracted, "current_extracted": current_extracted,
"current_extracted_version_number": current_extracted_version_number,
"additional_form": additional_form, "additional_form": additional_form,
"current_additional": current_additional, "current_additional": current_additional,
"current_additional_version_number": current_additional_version_number,
"presets": all_presets, "presets": all_presets,
"selected_preset_id": preset_id, "selected_preset_id": preset_id,
"existing_document_types": existing_document_types, "existing_document_types": existing_document_types,
@ -1377,48 +1448,213 @@ def export_reviewed_jsonl(db: Session = Depends(get_db)):
) )
def _restore_ocr_to_original(db: Session, document: Document) -> bool:
target = (
db.query(TextVersion)
.filter(
TextVersion.document_id == document.id,
TextVersion.version_number == 1,
)
.first()
)
if target is None:
return False
all_versions = (
db.query(TextVersion)
.filter(TextVersion.document_id == document.id)
.all()
)
for tv in all_versions:
tv.is_current = (tv.id == target.id)
document.review_status = "reviewed" if target.version_type == "reviewed" else "pending"
db.add(document)
return True
def _restore_ocr_from_version_number(db: Session, document: Document, target_version_number: int) -> bool:
target = (
db.query(TextVersion)
.filter(
TextVersion.document_id == document.id,
TextVersion.version_number == target_version_number,
)
.first()
)
if target is None:
return False
all_versions = (
db.query(TextVersion)
.filter(TextVersion.document_id == document.id)
.all()
)
for tv in all_versions:
tv.is_current = (tv.id == target.id)
document.review_status = "reviewed" if target.version_type == "reviewed" else "pending"
db.add(document)
return True
def _get_current_extracted_version_number(document: Document) -> int | None:
row = get_current_extracted_fields(document)
versions = getattr(document, "extracted_field_versions", None) or []
if row is None:
return None
for v in sorted(versions, key=lambda x: x.version_number, reverse=True):
if (
row.merchant_raw == v.merchant_raw
and row.merchant_normalized == v.merchant_normalized
and row.transaction_date == v.transaction_date
and row.transaction_time == v.transaction_time
and row.subtotal == v.subtotal
and row.tax == v.tax
and row.total == v.total
and row.currency == v.currency
and row.payment_method == v.payment_method
and row.receipt_number == v.receipt_number
and row.location == v.location
and row.counterparty == v.counterparty
and row.extra_json == v.extra_json
):
return v.version_number
return None
def _get_current_additional_version_number(document: Document) -> int | None:
row = _get_current_additional_fields(document)
versions = getattr(document, "additional_field_versions", None) or []
if row is None:
return None
for v in sorted(versions, key=lambda x: x.version_number, reverse=True):
if (
row.owner_primary == v.owner_primary
and row.owner_secondary == v.owner_secondary
and row.paid_by_person == v.paid_by_person
and row.occasion_note == v.occasion_note
and row.is_shared_expense == v.is_shared_expense
and row.covered_people == v.covered_people
and row.attendees == v.attendees
and row.reimbursement_expected_from == v.reimbursement_expected_from
and row.reimbursement_paid_by == v.reimbursement_paid_by
and row.reimbursement_paid_to == v.reimbursement_paid_to
and row.reimbursement_paid_amount == v.reimbursement_paid_amount
and row.reimbursement_paid_date == v.reimbursement_paid_date
and row.reimbursement_note == v.reimbursement_note
):
return v.version_number
return None
def _parse_restore_choice(value: str) -> tuple[str, int | None]:
if not value or value == "none":
return ("none", None)
if value == "original":
return ("original", None)
if value.startswith("version:"):
try:
return ("version", int(value.split(":", 1)[1]))
except ValueError:
return ("none", None)
return ("none", None)
@router.post("/{document_id}/source-options", response_class=RedirectResponse) @router.post("/{document_id}/source-options", response_class=RedirectResponse)
def apply_source_options( def apply_source_options(
document_id: str, document_id: str,
file_action: str = Form("none"), file_action: str = Form("none"),
reset_ocr: str | None = Form(None), ocr_restore_choice: str = Form("none"),
clear_extracted: str | None = Form(None), extracted_restore_choice: str = Form("none"),
clear_additional: str | None = Form(None), additional_restore_choice: str = Form("none"),
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
document = db.query(Document).filter(Document.document_id == document_id).first() document = (
if not document: db.query(Document)
.options(
selectinload(Document.text_versions),
selectinload(Document.naming_fields),
selectinload(Document.extracted_fields),
selectinload(Document.additional_fields),
selectinload(Document.versions),
selectinload(Document.extracted_field_versions),
selectinload(Document.additional_field_versions),
)
.filter(Document.document_id == document_id)
.first()
)
if document is None:
return RedirectResponse(url="/documents/", status_code=303) return RedirectResponse(url="/documents/", status_code=303)
try: try:
# ---- File revert ---- changed = False
if file_action == "revert_original": if file_action == "revert_original":
if document.original_path: original_path = document.original_path or document.source_path
document.current_path = document.original_path if original_path:
original_file = Path(original_path)
if original_file.exists():
document.current_path = str(original_file)
document.canonical_filename = original_file.name
document.sha256_current = _sha256_for_file(original_file)
db.add(document)
changed = True
# ---- Reset OCR ---- elif file_action == "revert_current_version":
if reset_ocr: latest_version = (
db.query(TextVersion).filter( db.query(DocumentVersion)
TextVersion.document_id == document.id .filter(DocumentVersion.document_id == document.id)
).delete() .order_by(DocumentVersion.version_number.desc())
document.review_status = "pending" .first()
)
if latest_version and latest_version.file_path:
version_file = Path(latest_version.file_path)
if version_file.exists():
document.current_path = str(version_file)
document.canonical_filename = version_file.name
document.sha256_current = _sha256_for_file(version_file)
db.add(document)
changed = True
# ---- Clear extracted ---- ocr_mode, ocr_version = _parse_restore_choice(ocr_restore_choice)
if clear_extracted: print("PARSED_OCR", ocr_restore_choice, ocr_mode, ocr_version, flush=True)
db.query(ExtractedField).filter( if ocr_mode == "original":
ExtractedField.document_id == document.id if _restore_ocr_to_original(db, document):
).delete() changed = True
elif ocr_mode == "version" and ocr_version is not None:
if _restore_ocr_from_version_number(db, document, ocr_version):
changed = True
# ---- Clear additional ---- extracted_mode, extracted_version = _parse_restore_choice(extracted_restore_choice)
if clear_additional: print("PARSED_EXTRACTED", extracted_restore_choice, extracted_mode, extracted_version, flush=True)
db.query(DocumentAdditionalField).filter( if extracted_mode == "original":
DocumentAdditionalField.document_id == document.id if _restore_extracted_to_original(db, document):
).delete() changed = True
elif extracted_mode == "version" and extracted_version is not None:
if _restore_extracted_from_version_number(db, document, extracted_version):
changed = True
additional_mode, additional_version = _parse_restore_choice(additional_restore_choice)
print("PARSED_ADDITIONAL", additional_restore_choice, additional_mode, additional_version, flush=True)
if additional_mode == "original":
if _restore_additional_to_original(db, document):
changed = True
elif additional_mode == "version" and additional_version is not None:
if _restore_additional_from_version_number(db, document, additional_version):
changed = True
if changed:
db.commit() db.commit()
else:
db.rollback()
except Exception as e: except Exception as e:
print("source-options failed:", repr(e), flush=True) print("source-options failed:", repr(e), flush=True)
traceback.print_exc()
db.rollback() db.rollback()
return RedirectResponse( return RedirectResponse(
url=f"/documents/{document.document_id}?error=source_options_failed&tab=source-options", url=f"/documents/{document.document_id}?error=source_options_failed&tab=source-options",

View File

@ -105,6 +105,11 @@
Storage mount unavailable. Please retry in a moment. Storage mount unavailable. Please retry in a moment.
</div> </div>
{% endif %} {% endif %}
{% if success %}
<div style="background:#ecfdf5; border:1px solid #a7f3d0; color:#065f46; padding:0.75rem 1rem; border-radius:10px; margin-bottom:1rem;">
{{ success }}
</div>
{% endif %}
<div class="workspace-grid"> <div class="workspace-grid">
<section> <section>
@ -114,7 +119,7 @@
<p class="empty-state">Storage mount unavailable. Preview is temporarily unavailable.</p> <p class="empty-state">Storage mount unavailable. Preview is temporarily unavailable.</p>
{% elif file_url %} {% elif file_url %}
{% if document.mime_type == "application/pdf" %} {% if document.mime_type == "application/pdf" %}
<iframe class="preview-frame" src="{{ file_url }}"></iframe> <embed class="preview-frame" src="{{ file_url }}" type="application/pdf">
{% elif document.mime_type in ["image/jpeg", "image/png"] %} {% elif document.mime_type in ["image/jpeg", "image/png"] %}
<img class="preview-image" src="{{ file_url }}" alt="Document image"> <img class="preview-image" src="{{ file_url }}" alt="Document image">
{% else %} {% else %}
@ -139,10 +144,10 @@
<div class="tab-panel{% if active_tab == 'ocr-review' %} active{% endif %}" data-panel="ocr-review"> <div class="tab-panel{% if active_tab == 'ocr-review' %} active{% endif %}" data-panel="ocr-review">
<h2 class="card-title">Reviewed OCR</h2> <h2 class="card-title">Reviewed OCR</h2>
{% if reviewed_ocr %} {% if current_text_version %}
<p>Current reviewed version saved at {{ reviewed_ocr.created_at }} — v{{ reviewed_ocr.version_number }}</p> <p>Current OCR version: v{{ current_text_version.version_number }} — {{ current_text_version.version_type }} — {{ current_text_version.created_at }}</p>
{% else %} {% else %}
<p class="empty-state">No reviewed OCR saved yet.</p> <p class="empty-state">No OCR version available yet.</p>
{% endif %} {% endif %}
<p> <p>
@ -188,9 +193,21 @@
<div class="tab-panel{% if active_tab == 'extracted-fields' %} active{% endif %}" data-panel="extracted-fields"> <div class="tab-panel{% if active_tab == 'extracted-fields' %} active{% endif %}" data-panel="extracted-fields">
<h2 class="card-title">Extracted fields</h2> <h2 class="card-title">Extracted fields</h2>
{% if current_extracted_version_number %}
{% set current_extracted_meta = (
extracted_version_options
| selectattr(0, "equalto", current_extracted_version_number)
| list
| first
) %}
<p>
Current extracted version: v{{ current_extracted_version_number }}
{% if current_extracted_meta %}— {{ current_extracted_meta[1] }}{% endif %}
</p>
{% endif %}
{% if current_extracted %} {% if current_extracted %}
<p>Current extracted fields last updated at {{ current_extracted.updated_at }}</p>
{% else %} {% else %}
<p class="empty-state">No extracted fields saved yet.</p> <p class="empty-state">No extracted fields saved yet.</p>
{% endif %} {% endif %}
@ -228,9 +245,19 @@
<div class="tab-panel{% if active_tab == 'additional-fields' %} active{% endif %}" data-panel="additional-fields"> <div class="tab-panel{% if active_tab == 'additional-fields' %} active{% endif %}" data-panel="additional-fields">
<h2 class="card-title">Additional fields</h2> <h2 class="card-title">Additional fields</h2>
{% if current_additional_version_number %}{% set current_additional_meta = (
additional_version_options
| selectattr(0, "equalto", current_additional_version_number)
| list
| first
) %}
<p>
Current additional version: v{{ current_additional_version_number }}
{% if current_additional_meta %}— {{ current_additional_meta[1] }}{% endif %}
</p>{% endif %}
{% if current_additional %} {% if current_additional %}
<p>Current additional fields last updated at {{ current_additional.updated_at }}</p>
{% else %} {% else %}
<p class="empty-state">No additional fields saved yet.</p> <p class="empty-state">No additional fields saved yet.</p>
{% endif %} {% endif %}
@ -361,42 +388,59 @@
<div class="tab-panel{% if active_tab == 'source-options' %} active{% endif %}" data-panel="source-options"> <div class="tab-panel{% if active_tab == 'source-options' %} active{% endif %}" data-panel="source-options">
<h2 class="card-title">Source Options</h2> <h2 class="card-title">Source Options</h2>
<form method="post" action="/documents/{{ document.document_id }}/source-options" style="display:flex; flex-direction:column; gap:1rem;" enctype="multipart/form-data"> <form method="post" action="/documents/{{ document.document_id }}/source-options" style="display:flex; flex-direction:column; gap:1rem;">
<div class="card" style="padding:1rem;"> <div class="card" style="padding:1rem;">
<h3 style="margin-top:0;">File Source</h3> <h3 style="margin-top:0;">File Source</h3>
<div style="display:flex; flex-direction:column; gap:0.75rem;"> <div style="display:flex; flex-direction:column; gap:0.75rem;">
<label style="display:flex; align-items:center; gap:0.5rem;">
<input type="radio" name="file_action" value="none" checked>
<span>No file change</span>
</label>
<label style="display:flex; align-items:center; gap:0.5rem;"> <label style="display:flex; align-items:center; gap:0.5rem;">
<input type="radio" name="file_action" value="revert_original"> <input type="radio" name="file_action" value="revert_original">
<span>Revert to original file</span> <span>Revert to original file</span>
</label> </label>
<label style="display:flex; align-items:center; gap:0.5rem;"> <label style="display:flex; align-items:center; gap:0.5rem;">
<input type="radio" name="file_action" value="revert_current_version"> <input type="radio" name="file_action" value="revert_current_version">
<span>Revert to current saved version</span> <span>Revert to current saved version</span>
</label> </label>
<label style="display:flex; align-items:center; gap:0.5rem;">
<input type="radio" name="file_action" value="none" checked>
<span>No file change</span>
</label>
</div> </div>
</div> </div>
<div class="card" style="padding:1rem;"> <div class="card" style="padding:1rem;">
<h3 style="margin-top:0;">Data Reset</h3> <h3 style="margin-top:0;">Data Reset</h3>
<div style="display:flex; flex-direction:column; gap:0.75rem;">
<label style="display:flex; align-items:center; gap:0.5rem;"> <div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center; margin-bottom:0.75rem;">
<input type="checkbox" name="reset_ocr" value="1"> <strong>OCR</strong>
<span>Reset OCR</span> <select name="ocr_restore_choice">
</label> <option value="none" selected>No change</option>
<label style="display:flex; align-items:center; gap:0.5rem;"> <option value="original">Original</option>
<input type="checkbox" name="clear_extracted" value="1"> {% for version_number, version_type, created_at in ocr_version_options %}
<span>Clear extracted fields</span> <option value="version:{{ version_number }}">v{{ version_number }} — {{ version_type }} — {{ created_at }}</option>
</label> {% endfor %}
<label style="display:flex; align-items:center; gap:0.5rem;"> </select>
<input type="checkbox" name="clear_additional" value="1"> </div>
<span>Clear additional fields</span>
</label> <div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center; margin-bottom:0.75rem;">
<strong>Extracted fields</strong>
<select name="extracted_restore_choice">
<option value="none" selected>No change</option>
<option value="original">Original</option>
{% for version_number, created_at in extracted_version_options %}
<option value="version:{{ version_number }}">v{{ version_number }} — {{ created_at }}</option>
{% endfor %}
</select>
</div>
<div style="display:grid; grid-template-columns: 180px 1fr; gap:0.75rem; align-items:center;">
<strong>Additional fields</strong>
<select name="additional_restore_choice">
<option value="none" selected>No change</option>
<option value="original">Original</option>
{% for version_number, created_at in additional_version_options %}
<option value="version:{{ version_number }}">v{{ version_number }} — {{ created_at }}</option>
{% endfor %}
</select>
</div> </div>
</div> </div>
@ -437,7 +481,7 @@
<div class="meta-item"><span class="meta-label">Original filename</span>{{ document.original_filename }}</div> <div class="meta-item"><span class="meta-label">Original filename</span>{{ document.original_filename }}</div>
<div class="meta-item"><span class="meta-label">Canonical filename</span>{{ document.canonical_filename }}</div> <div class="meta-item"><span class="meta-label">Canonical filename</span>{{ document.canonical_filename }}</div>
<div class="meta-item"><span class="meta-label">MIME type</span>{{ document.mime_type }}</div> <div class="meta-item"><span class="meta-label">MIME type</span>{{ document.mime_type }}</div>
<div class="meta-item"><span class="meta-label">File size</span>{{ document.file_size }}</div> <div class="meta-item"><span class="meta-label">File size</span>{{ human_size(document.file_size) }}</div>
<div class="meta-item"><span class="meta-label">Page count</span>{{ document.page_count }}</div> <div class="meta-item"><span class="meta-label">Page count</span>{{ document.page_count }}</div>
<div class="meta-item"><span class="meta-label">Share path</span>{{ document.share_path or "" }}</div> <div class="meta-item"><span class="meta-label">Share path</span>{{ document.share_path or "" }}</div>
<div class="meta-item"><span class="meta-label">Created at</span>{{ document.created_at }}</div> <div class="meta-item"><span class="meta-label">Created at</span>{{ document.created_at }}</div>