Fix layout review viewer initialization and replica tab state
This commit is contained in:
parent
474ab010fe
commit
4dcb6ebd0e
|
|
@ -621,16 +621,13 @@ def _get_current_text_versions(document: Document) -> tuple[TextVersion | None,
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# raw_ocr is source capture only. It should not control editor state.
|
||||||
raw_ocr = next(
|
raw_ocr = next(
|
||||||
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr" and tv.is_current),
|
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
if raw_ocr is None:
|
|
||||||
raw_ocr = next(
|
|
||||||
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# reviewed_ocr is the canonical editable state used by OCR Review + Layout Review.
|
||||||
reviewed_ocr = next(
|
reviewed_ocr = next(
|
||||||
(
|
(
|
||||||
tv for tv in sorted_text_versions
|
tv for tv in sorted_text_versions
|
||||||
|
|
@ -650,7 +647,6 @@ def _get_current_text_versions(document: Document) -> tuple[TextVersion | None,
|
||||||
return raw_ocr, reviewed_ocr
|
return raw_ocr, reviewed_ocr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _default_word_style() -> dict:
|
def _default_word_style() -> dict:
|
||||||
return {
|
return {
|
||||||
"font_family": "Helvetica",
|
"font_family": "Helvetica",
|
||||||
|
|
@ -1174,9 +1170,17 @@ def rerun_ocr(document_id: str, db: Session = Depends(get_db)):
|
||||||
analysis_json = build_layout_ocr_analysis_for_document(document)
|
analysis_json = build_layout_ocr_analysis_for_document(document)
|
||||||
text_content = analysis_json.get("text_content") or ""
|
text_content = analysis_json.get("text_content") or ""
|
||||||
|
|
||||||
for row in getattr(document, "text_versions", []) or []:
|
existing_reviewed = next(
|
||||||
if getattr(row, "is_current", False):
|
(
|
||||||
row.is_current = False
|
tv for tv in sorted(
|
||||||
|
getattr(document, "text_versions", []) or [],
|
||||||
|
key=lambda x: (x.version_number, x.created_at),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
if tv.version_type in ("reviewed", "reviewed_ocr")
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
next_version = (
|
next_version = (
|
||||||
max((getattr(v, "version_number", 0) or 0) for v in getattr(document, "text_versions", []) or []) + 1
|
max((getattr(v, "version_number", 0) or 0) for v in getattr(document, "text_versions", []) or []) + 1
|
||||||
|
|
@ -1189,7 +1193,7 @@ def rerun_ocr(document_id: str, db: Session = Depends(get_db)):
|
||||||
version_type="raw_ocr",
|
version_type="raw_ocr",
|
||||||
text_content=text_content,
|
text_content=text_content,
|
||||||
created_by="rerun_ocr_layout",
|
created_by="rerun_ocr_layout",
|
||||||
is_current=True,
|
is_current=False if existing_reviewed else True,
|
||||||
ocr_engine=layout_result.engine_name,
|
ocr_engine=layout_result.engine_name,
|
||||||
ocr_engine_version=layout_result.engine_version,
|
ocr_engine_version=layout_result.engine_version,
|
||||||
rerun_source="layout_ocr",
|
rerun_source="layout_ocr",
|
||||||
|
|
@ -1368,7 +1372,7 @@ def save_pdf(document_id: str, output_path: str = Form(""), db: Session = Depend
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{document_id}/save-replica-pdf", response_class=RedirectResponse)
|
@router.post("/{document_id}/save-replica-pdf", response_class=RedirectResponse)
|
||||||
def save_replica_pdf_clean(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)):
|
def save_replica_pdf_clean(document_id: str, output_path: str = Form(""), return_tab: str = Form("ocr-review"), return_viewer_source: str = Form("replica"), db: Session = Depends(get_db)):
|
||||||
if not _storage_available():
|
if not _storage_available():
|
||||||
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
|
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
|
||||||
|
|
||||||
|
|
@ -1392,7 +1396,7 @@ def save_replica_pdf_clean(document_id: str, output_path: str = Form(""), db: Se
|
||||||
output_path_obj = _resolve_document_output_path(document, output_path)
|
output_path_obj = _resolve_document_output_path(document, output_path)
|
||||||
save_replica_pdf(db, document, output_path_obj, mode="clean")
|
save_replica_pdf(db, document, output_path_obj, mode="clean")
|
||||||
return RedirectResponse(
|
return RedirectResponse(
|
||||||
url=f"/documents/{document.document_id}?success=saved_replica_pdf&tab=ocr-review&viewer_source=replica",
|
url=f"/documents/{document.document_id}?success=saved_replica_pdf&tab={return_tab}&viewer_source={return_viewer_source}",
|
||||||
status_code=303,
|
status_code=303,
|
||||||
)
|
)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
|
@ -1421,7 +1425,7 @@ def save_replica_pdf_clean(document_id: str, output_path: str = Form(""), db: Se
|
||||||
)
|
)
|
||||||
|
|
||||||
@router.post("/{document_id}/save-replica-pdf-scan-backed", response_class=RedirectResponse)
|
@router.post("/{document_id}/save-replica-pdf-scan-backed", response_class=RedirectResponse)
|
||||||
def save_replica_pdf_scan_backed(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)):
|
def save_replica_pdf_scan_backed(document_id: str, output_path: str = Form(""), return_tab: str = Form("ocr-review"), return_viewer_source: str = Form("replica_scan_backed"), db: Session = Depends(get_db)):
|
||||||
if not _storage_available():
|
if not _storage_available():
|
||||||
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
|
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
|
||||||
|
|
||||||
|
|
@ -1444,16 +1448,16 @@ def save_replica_pdf_scan_backed(document_id: str, output_path: str = Form(""),
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
if "invalid_output_path" in str(e):
|
if "invalid_output_path" in str(e):
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303)
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_scan_backed_failed&tab=ocr-review", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_scan_backed_failed&tab={return_tab}&viewer_source=scan", status_code=303)
|
||||||
except Exception:
|
except Exception:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_scan_backed_failed&tab=ocr-review", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?error=save_replica_pdf_scan_backed_failed&tab={return_tab}&viewer_source=scan", status_code=303)
|
||||||
|
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?success=saved_replica_pdf_scan_backed&tab=ocr-review", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?success=saved_replica_pdf_scan_backed&tab={return_tab}&viewer_source={return_viewer_source}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{document_id}/save-replica-pdf-debug-overlay", response_class=RedirectResponse)
|
@router.post("/{document_id}/save-replica-pdf-debug-overlay", response_class=RedirectResponse)
|
||||||
def save_replica_pdf_debug_overlay(document_id: str, output_path: str = Form(""), db: Session = Depends(get_db)):
|
def save_replica_pdf_debug_overlay(document_id: str, output_path: str = Form(""), return_tab: str = Form("ocr-review"), return_viewer_source: str = Form("replica_debug_overlay"), db: Session = Depends(get_db)):
|
||||||
if not _storage_available():
|
if not _storage_available():
|
||||||
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
|
return RedirectResponse(url=f"/documents/{document_id}?error=storage_unavailable", status_code=303)
|
||||||
|
|
||||||
|
|
@ -1480,18 +1484,18 @@ def save_replica_pdf_debug_overlay(document_id: str, output_path: str = Form("")
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}?error=invalid_output_path", status_code=303)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return RedirectResponse(
|
return RedirectResponse(
|
||||||
url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab=ocr-review",
|
url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab={return_tab}&viewer_source=scan",
|
||||||
status_code=303,
|
status_code=303,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return RedirectResponse(
|
return RedirectResponse(
|
||||||
url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab=ocr-review",
|
url=f"/documents/{document.document_id}?error=save_replica_pdf_debug_overlay_failed&tab={return_tab}&viewer_source=scan",
|
||||||
status_code=303,
|
status_code=303,
|
||||||
)
|
)
|
||||||
|
|
||||||
return RedirectResponse(
|
return RedirectResponse(
|
||||||
url=f"/documents/{document.document_id}?success=saved_replica_pdf_debug_overlay&tab=ocr-review&viewer_source=replica_debug_overlay",
|
url=f"/documents/{document.document_id}?success=saved_replica_pdf_debug_overlay&tab={return_tab}&viewer_source={return_viewer_source}",
|
||||||
status_code=303,
|
status_code=303,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1981,6 +1985,58 @@ def _layout_review_group_words_into_lines(words, y_tol: float = 12.0):
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{document_id}/reset-layout-review", response_class=RedirectResponse)
|
||||||
|
def reset_layout_review(document_id: str, db: Session = Depends(get_db)):
|
||||||
|
document = (
|
||||||
|
db.query(Document)
|
||||||
|
.options(selectinload(Document.text_versions))
|
||||||
|
.filter(Document.document_id == document_id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if document is None:
|
||||||
|
return RedirectResponse(url=f"/documents/{document_id}?tab=layout-review&error=document_not_found", status_code=303)
|
||||||
|
|
||||||
|
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
|
||||||
|
if raw_ocr is None or not isinstance(getattr(raw_ocr, "layout_json", None), dict):
|
||||||
|
return RedirectResponse(url=f"/documents/{document_id}?tab=layout-review&error=no_raw_layout_to_reset", status_code=303)
|
||||||
|
|
||||||
|
reset_layout = deepcopy(raw_ocr.layout_json)
|
||||||
|
reset_layout["layout_sync_status"] = "reset_from_raw_ocr"
|
||||||
|
reset_layout["layout_sync_source"] = "raw_ocr_reset"
|
||||||
|
reset_layout["layout_needs_review"] = False
|
||||||
|
reset_layout = _normalize_layout_review_payload(reset_layout)
|
||||||
|
|
||||||
|
_append_layout_edit_event(
|
||||||
|
reset_layout,
|
||||||
|
{
|
||||||
|
"event_type": "layout_review_reset_from_raw_ocr",
|
||||||
|
"actor": "user",
|
||||||
|
"source": "layout_review_reset",
|
||||||
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
reset_text = _canonical_layout_text(reset_layout)
|
||||||
|
|
||||||
|
_save_canonical_review_state(
|
||||||
|
db=db,
|
||||||
|
document=document,
|
||||||
|
source_version=raw_ocr,
|
||||||
|
text_content=reset_text,
|
||||||
|
layout_json=reset_layout,
|
||||||
|
created_by="layout_review_reset",
|
||||||
|
rerun_source="layout_review_reset",
|
||||||
|
event_type="layout_review_reset_from_raw_ocr",
|
||||||
|
)
|
||||||
|
|
||||||
|
return RedirectResponse(
|
||||||
|
url=f"/documents/{document_id}?tab=layout-review&viewer_source=scan&success=layout_review_reset",
|
||||||
|
status_code=303,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{document_id}/save-layout-review")
|
@router.post("/{document_id}/save-layout-review")
|
||||||
async def save_layout_review(document_id: str, request: Request, db: Session = Depends(get_db)):
|
async def save_layout_review(document_id: str, request: Request, db: Session = Depends(get_db)):
|
||||||
form = await request.form()
|
form = await request.form()
|
||||||
|
|
@ -2168,6 +2224,12 @@ async def save_layout_review(document_id: str, request: Request, db: Session = D
|
||||||
|
|
||||||
@router.get("/{document_id}", response_class=HTMLResponse)
|
@router.get("/{document_id}", response_class=HTMLResponse)
|
||||||
def document_detail(document_id: str, request: Request, queue: str | None = None, viewer_source: str = "scan", db: Session = Depends(get_db)):
|
def document_detail(document_id: str, request: Request, queue: str | None = None, viewer_source: str = "scan", db: Session = Depends(get_db)):
|
||||||
|
requested_tab = request.query_params.get("tab", "ocr-review")
|
||||||
|
if requested_tab == "layout-review" and viewer_source != "scan":
|
||||||
|
return RedirectResponse(
|
||||||
|
url=f"/documents/{document_id}?tab=layout-review&viewer_source=scan",
|
||||||
|
status_code=303,
|
||||||
|
)
|
||||||
current_user = getattr(request.state, "current_user", None)
|
current_user = getattr(request.state, "current_user", None)
|
||||||
document = (
|
document = (
|
||||||
db.query(Document)
|
db.query(Document)
|
||||||
|
|
|
||||||
|
|
@ -236,12 +236,18 @@ document.addEventListener("DOMContentLoaded", () => {
|
||||||
</form>
|
</form>
|
||||||
<div class="button-row" style="margin-top:0.6rem;">
|
<div class="button-row" style="margin-top:0.6rem;">
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf" style="display:inline;">
|
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf" style="display:inline;">
|
||||||
|
<input type="hidden" name="return_tab" value="{{ active_tab }}">
|
||||||
|
<input type="hidden" name="return_viewer_source" value="replica">
|
||||||
<button type="submit">Save Replica PDF</button>
|
<button type="submit">Save Replica PDF</button>
|
||||||
</form>
|
</form>
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-scan-backed" style="display:inline;">
|
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-scan-backed" style="display:inline;">
|
||||||
|
<input type="hidden" name="return_tab" value="{{ active_tab }}">
|
||||||
|
<input type="hidden" name="return_viewer_source" value="replica_scan_backed">
|
||||||
<button type="submit">Save Replica PDF (Scan-backed)</button>
|
<button type="submit">Save Replica PDF (Scan-backed)</button>
|
||||||
</form>
|
</form>
|
||||||
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-debug-overlay" style="display:inline;">
|
<form method="post" action="/documents/{{ document.document_id }}/save-replica-pdf-debug-overlay" style="display:inline;">
|
||||||
|
<input type="hidden" name="return_tab" value="{{ active_tab }}">
|
||||||
|
<input type="hidden" name="return_viewer_source" value="replica_debug_overlay">
|
||||||
<button type="submit">Save Replica PDF (Debug Overlay)</button>
|
<button type="submit">Save Replica PDF (Debug Overlay)</button>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
|
@ -450,7 +456,7 @@ document.addEventListener("DOMContentLoaded", () => {
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="right-pane-tabs">
|
<div class="right-pane-tabs">
|
||||||
<button class="tab-button{% if active_tab in ['ocr-review', 'raw-ocr', 'source-options'] %} active{% endif %}" type="button" data-tab="ocr-review">OCR Review</button>
|
<button class="tab-button{% if active_tab in ['ocr-review', 'raw-ocr', 'source-options'] %} active{% endif %}" type="button" data-tab="ocr-review">OCR Review</button>
|
||||||
<button class="tab-button{% if active_tab == 'layout-review' %} active{% endif %}" type="button" data-tab="layout-review">Layout Review</button>
|
<button class="tab-button{% if active_tab == 'layout-review' %} active{% endif %}" type="button" data-tab="layout-review">Layout Review</button>
|
||||||
<button class="tab-button{% if active_tab == 'extracted-fields' %} active{% endif %}" type="button" data-tab="extracted-fields">Extracted Fields</button>
|
<button class="tab-button{% if active_tab == 'extracted-fields' %} active{% endif %}" type="button" data-tab="extracted-fields">Extracted Fields</button>
|
||||||
<button class="tab-button{% if active_tab == 'additional-fields' %} active{% endif %}" type="button" data-tab="additional-fields">Additional Fields</button>
|
<button class="tab-button{% if active_tab == 'additional-fields' %} active{% endif %}" type="button" data-tab="additional-fields">Additional Fields</button>
|
||||||
<button class="tab-button{% if active_tab == 'line-items' %} active{% endif %}" type="button" data-tab="line-items">Line Items</button>
|
<button class="tab-button{% if active_tab == 'line-items' %} active{% endif %}" type="button" data-tab="line-items">Line Items</button>
|
||||||
|
|
@ -1931,6 +1937,9 @@ document.addEventListener("DOMContentLoaded", () => {
|
||||||
<div class="word-ribbon-row">
|
<div class="word-ribbon-row">
|
||||||
<button type="button" class="layout-tool-btn" id="layout-undo">Undo</button>
|
<button type="button" class="layout-tool-btn" id="layout-undo">Undo</button>
|
||||||
<button type="button" class="layout-tool-btn" id="layout-redo">Redo</button>
|
<button type="button" class="layout-tool-btn" id="layout-redo">Redo</button>
|
||||||
|
<form method="post" action="/documents/{{ document.document_id }}/reset-layout-review" style="display:inline;" onsubmit="return confirm('Reset Layout Review from raw OCR? This will discard current layout-review edits.');">
|
||||||
|
<button type="submit" class="layout-tool-btn danger">Reset Layout</button>
|
||||||
|
</form>
|
||||||
</div>
|
</div>
|
||||||
<div class="word-ribbon-label">Edit</div>
|
<div class="word-ribbon-label">Edit</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue