Implement bidirectional OCR and layout review sync

This commit is contained in:
Sean McElwain 2026-05-10 15:06:19 -05:00
parent 3f81d1a198
commit 658240c031
4 changed files with 880 additions and 54 deletions

View File

@ -402,6 +402,15 @@ def _build_word_entries_for_page(page_layout: dict, page_h: float) -> list[dict]
return entries
def _page_layout_line_entries(page_layout: dict) -> list[dict]:
region_lines = []
for region in page_layout.get("regions", []) or []:
region_lines.extend(region.get("lines", []) or [])
if region_lines:
return region_lines
return page_layout.get("lines", []) or []
def _flatten_layout_lines(layout_json: dict | None) -> list[dict]:
if not layout_json:
return []
@ -1009,7 +1018,7 @@ def _render_replica_pdf_from_layout(
if mode in {"scan_backed", "debug_overlay"} and (page_layout.get("words") or []):
render_entries = _build_word_entries_for_page(page_layout, page_h)
else:
render_entries = page_layout.get("lines", []) or []
render_entries = _page_layout_line_entries(page_layout)
for line in render_entries:
text_line = (line.get("text") or "").strip()

View File

@ -28,6 +28,142 @@ class LayoutOCRResult:
}
def _safe_float(value, default=0.0):
try:
return float(value)
except Exception:
return float(default)
def _bbox_union(items: list[dict[str, Any]]) -> list[float]:
if not items:
return [0.0, 0.0, 0.0, 0.0]
xs1, ys1, xs2, ys2 = [], [], [], []
for item in items:
bbox = item.get("bbox") or [0, 0, 0, 0]
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
xs1.append(_safe_float(bbox[0]))
ys1.append(_safe_float(bbox[1]))
xs2.append(_safe_float(bbox[2]))
ys2.append(_safe_float(bbox[3]))
if not xs1:
return [0.0, 0.0, 0.0, 0.0]
return [min(xs1), min(ys1), max(xs2), max(ys2)]
def _word_center_x(word: dict[str, Any]) -> float:
bbox = word.get("bbox") or [0, 0, 0, 0]
return (_safe_float(bbox[0]) + _safe_float(bbox[2])) / 2.0
def _word_center_y(word: dict[str, Any]) -> float:
bbox = word.get("bbox") or [0, 0, 0, 0]
return (_safe_float(bbox[1]) + _safe_float(bbox[3])) / 2.0
def _group_words_into_lines_local(words: list[dict[str, Any]], y_tol: float = 12.0) -> list[dict[str, Any]]:
if not words:
return []
ordered = sorted(words, key=lambda w: (_word_center_y(w), _safe_float((w.get("bbox") or [0, 0, 0, 0])[0])))
groups: list[list[dict[str, Any]]] = []
for word in ordered:
placed = False
wy = _word_center_y(word)
for group in groups:
gy = sum(_word_center_y(item) for item in group) / len(group)
if abs(wy - gy) <= y_tol:
group.append(word)
placed = True
break
if not placed:
groups.append([word])
lines: list[dict[str, Any]] = []
for idx, group in enumerate(groups, start=1):
group = sorted(group, key=lambda w: _safe_float((w.get("bbox") or [0, 0, 0, 0])[0]))
text_value = " ".join((w.get("text") or "").strip() for w in group if (w.get("text") or "").strip()).strip()
if not text_value:
continue
bbox = _bbox_union(group)
avg_height = max(
1.0,
sum((_safe_float((w.get("bbox") or [0, 0, 0, 0])[3]) - _safe_float((w.get("bbox") or [0, 0, 0, 0])[1])) for w in group) / len(group),
)
lines.append(
{
"line_id": idx,
"text": text_value,
"bbox": bbox,
"confidence": None,
"font_family_guess": "Helvetica",
"font_size_guess": max(6.0, avg_height * 0.75),
"text_color_guess": "#000000",
"word_ids": [w.get("word_id") for w in group if w.get("word_id") is not None],
"words": group,
}
)
return lines
def _build_regions_from_words(words: list[dict[str, Any]], page_w: float) -> list[dict[str, Any]]:
visible_words = [
w for w in words
if (w.get("text") or "").strip()
and isinstance(w.get("bbox"), (list, tuple))
and len(w.get("bbox")) == 4
]
if not visible_words:
return []
ordered_x = sorted(visible_words, key=_word_center_x)
centers = [_word_center_x(w) for w in ordered_x]
split_idx = None
max_gap = 0.0
for i in range(len(centers) - 1):
gap = centers[i + 1] - centers[i]
if gap > max_gap:
max_gap = gap
split_idx = i
min_gap = max(80.0, page_w * 0.10)
if split_idx is None or max_gap < min_gap:
bucket = sorted(visible_words, key=lambda w: (_word_center_y(w), _word_center_x(w)))
return [
{
"region_id": 1,
"bbox": _bbox_union(bucket),
"words": bucket,
"lines": _group_words_into_lines_local(bucket),
}
]
split_x = (centers[split_idx] + centers[split_idx + 1]) / 2.0
left_words = [w for w in visible_words if _word_center_x(w) <= split_x]
right_words = [w for w in visible_words if _word_center_x(w) > split_x]
buckets = [bucket for bucket in [left_words, right_words] if bucket]
buckets.sort(key=lambda bucket: _bbox_union(bucket)[0])
regions = []
for idx, bucket in enumerate(buckets, start=1):
bucket = sorted(bucket, key=lambda w: (_word_center_y(w), _word_center_x(w)))
regions.append(
{
"region_id": idx,
"bbox": _bbox_union(bucket),
"words": bucket,
"lines": _group_words_into_lines_local(bucket),
}
)
return regions
def _group_words_into_lines(words: list[dict[str, Any]], y_tol: float = 12.0) -> list[dict[str, Any]]:
if not words:
return []
@ -126,6 +262,7 @@ def run_layout_ocr(pdf_path: str | Path, dpi: int = 300) -> LayoutOCRResult:
words.append(
{
"word_id": len(words) + 1,
"text": text,
"bbox": [left, top, right, bottom],
"confidence": conf,
@ -133,6 +270,7 @@ def run_layout_ocr(pdf_path: str | Path, dpi: int = 300) -> LayoutOCRResult:
)
lines = _group_words_into_lines(words)
regions = _build_regions_from_words(words, page_w)
pages.append(
{
@ -143,6 +281,7 @@ def run_layout_ocr(pdf_path: str | Path, dpi: int = 300) -> LayoutOCRResult:
"image_height": page_h,
"lines": lines,
"words": words,
"regions": regions,
}
)

View File

@ -8,14 +8,16 @@ import hashlib
import json
from decimal import Decimal
from pathlib import Path
from io import BytesIO
from fastapi import APIRouter, Depends, Form, Query, Request
from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse
from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse, Response
from fastapi.templating import Jinja2Templates
from sqlalchemy import distinct
from sqlalchemy import func
from sqlalchemy.orm import Session, selectinload
from pypdf import PdfReader
from pdf2image import convert_from_path
from app.core.storage_settings import get_default_save_root
from app.db.deps import get_db
@ -617,11 +619,27 @@ def _get_current_text_versions(document: Document) -> tuple[TextVersion | None,
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr" and tv.is_current),
None,
)
if raw_ocr is None:
raw_ocr = next(
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"),
None,
)
reviewed_ocr = next(
(tv for tv in sorted_text_versions if tv.version_type == "reviewed" and tv.is_current),
(
tv for tv in sorted_text_versions
if tv.version_type in ("reviewed", "reviewed_ocr") and tv.is_current
),
None,
)
if reviewed_ocr is None:
reviewed_ocr = next(
(
tv for tv in sorted_text_versions
if tv.version_type in ("reviewed", "reviewed_ocr")
),
None,
)
return raw_ocr, reviewed_ocr
@ -647,13 +665,11 @@ def _build_review_text_value(
else:
source = reviewed_ocr or raw_ocr
if source and source.layout_json:
return "\n".join(_extract_line_texts_from_layout(source.layout_json))
if source and source.text_content:
return source.text_content
if source and source.layout_json:
return "\n".join(_extract_line_texts_from_layout(source.layout_json))
return ""
def _line_count_from_layout(layout_json: dict | None) -> int:
return len(_extract_line_texts_from_layout(layout_json))
@ -662,19 +678,61 @@ def _apply_reviewed_lines_to_layout(base_layout: dict | None, reviewed_text: str
if not base_layout:
return None
reviewed_lines = reviewed_text.splitlines()
new_layout = deepcopy(base_layout)
reviewed_lines = reviewed_text.splitlines()
line_idx = 0
idx = 0
for page in new_layout.get("pages", []):
for line in page.get("lines", []):
line["text"] = reviewed_lines[idx] if idx < len(reviewed_lines) else ""
idx += 1
page_words = page.get("words", []) or []
words_by_id = {}
words_by_bbox = {}
for w in page_words:
word_id = w.get("id")
if word_id is not None:
words_by_id[str(word_id)] = w
bbox = w.get("bbox")
if isinstance(bbox, (list, tuple)) and len(bbox) == 4:
words_by_bbox[tuple(float(x) for x in bbox)] = w
for line in page.get("lines", []) or []:
new_line_text = reviewed_lines[line_idx] if line_idx < len(reviewed_lines) else ""
line["text"] = new_line_text
line_idx += 1
line_words = line.get("words", []) or []
if not line_words:
continue
tokens = new_line_text.split()
assigned = []
if not tokens:
assigned = [""] * len(line_words)
elif len(tokens) == len(line_words):
assigned = tokens
elif len(tokens) < len(line_words):
assigned = tokens + ([""] * (len(line_words) - len(tokens)))
else:
assigned = tokens[:len(line_words) - 1] + [" ".join(tokens[len(line_words) - 1:])]
for lw, token in zip(line_words, assigned):
lw["text"] = token
target = None
word_id = lw.get("id")
if word_id is not None:
target = words_by_id.get(str(word_id))
if target is None:
bbox = lw.get("bbox")
if isinstance(bbox, (list, tuple)) and len(bbox) == 4:
target = words_by_bbox.get(tuple(float(x) for x in bbox))
if target is not None:
target["text"] = token
return new_layout
def _get_existing_document_types(db: Session) -> list[str]:
rows = (
db.query(distinct(Document.document_type))
@ -1303,10 +1361,10 @@ def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
@router.post("/{document_id}/review-text", response_class=RedirectResponse)
def save_reviewed_text(
async def review_text(
document_id: str,
reviewed_text: str = Form(...),
quality_flags: list[str] | None = Form(None),
reviewed_text: str = Form(""),
quality_flags: list[str] = Form(default=[]),
quality_note: str = Form(""),
db: Session = Depends(get_db),
):
@ -1320,33 +1378,44 @@ def save_reviewed_text(
if document is None:
return RedirectResponse(url="/documents/", status_code=303)
raw_ocr, _ = _get_current_text_versions(document)
expected_line_count = _line_count_from_layout(raw_ocr.layout_json if raw_ocr else None)
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
base_layout = None
if reviewed_ocr and isinstance(getattr(reviewed_ocr, "layout_json", None), dict):
base_layout = json.loads(json.dumps(reviewed_ocr.layout_json))
elif raw_ocr and isinstance(getattr(raw_ocr, "layout_json", None), dict):
base_layout = json.loads(json.dumps(raw_ocr.layout_json))
expected_line_count = _line_count_from_layout(base_layout)
actual_line_count = len(reviewed_text.splitlines())
if expected_line_count and actual_line_count != expected_line_count:
return RedirectResponse(
url=f"/documents/{document.document_id}?error=line_count_mismatch&expected={expected_line_count}&actual={actual_line_count}&tab=ocr-review",
status_code=303,
)
existing_reviewed = [tv for tv in document.text_versions if tv.version_type == "reviewed" and tv.is_current]
existing_reviewed = [
tv for tv in document.text_versions
if tv.version_type in ("reviewed", "reviewed_ocr") and tv.is_current
]
for tv in existing_reviewed:
tv.is_current = False
reviewed_layout = _apply_reviewed_lines_to_layout(
raw_ocr.layout_json if raw_ocr else None,
reviewed_text,
)
if expected_line_count and actual_line_count == expected_line_count:
reviewed_layout = _apply_reviewed_lines_to_layout(base_layout, reviewed_text)
if isinstance(reviewed_layout, dict):
reviewed_layout["layout_sync_source"] = "ocr_review"
reviewed_layout["layout_sync_status"] = "synced"
reviewed_layout["layout_needs_review"] = False
else:
reviewed_layout = dict(base_layout or {})
reviewed_layout["layout_sync_source"] = "ocr_review"
reviewed_layout["layout_sync_status"] = "text_changed_needs_layout_review"
reviewed_layout["layout_needs_review"] = True
reviewed_version = TextVersion(
document_id=document.id,
version_number=max(tv.version_number for tv in document.text_versions) + 1 if document.text_versions else 1,
version_type="reviewed",
version_type="reviewed_ocr",
text_content=reviewed_text,
created_by="mcelwain",
is_current=True,
derived_from_version_id=raw_ocr.id if raw_ocr else None,
derived_from_version_id=(reviewed_ocr.id if reviewed_ocr else (raw_ocr.id if raw_ocr else None)),
layout_json=reviewed_layout,
)
db.add(reviewed_version)
@ -1359,8 +1428,10 @@ def save_reviewed_text(
document.review_status = "reviewed"
db.commit()
return RedirectResponse(url=f"/documents/{document.document_id}?tab=line-items&success=saved_reviewed_ocr", status_code=303)
return RedirectResponse(
url=f"/documents/{document.document_id}?tab=ocr-review&success=saved_reviewed_ocr",
status_code=303,
)
@router.post("/{document_id}/save-extracted-fields", response_class=RedirectResponse)
def save_extracted_fields_route(
@ -1673,6 +1744,29 @@ async def save_line_items(
status_code=303,
)
@router.get("/{document_id}/preview-image")
def document_preview_image(document_id: str, page: int = 1, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
if document is None or not document.current_path:
return HTMLResponse(content="Preview image not found", status_code=404)
path_obj = Path(document.current_path)
if not path_obj.exists() or not path_obj.is_file():
return HTMLResponse(content="Preview image not found", status_code=404)
try:
pil_images = convert_from_path(str(path_obj), dpi=150, first_page=page, last_page=page)
if not pil_images:
return HTMLResponse(content="Preview image not found", status_code=404)
img = pil_images[0]
buf = BytesIO()
img.save(buf, format="PNG")
return Response(content=buf.getvalue(), media_type="image/png")
except Exception as e:
return HTMLResponse(content=f"Preview image generation failed: {e!r}", status_code=500)
@router.get("/{document_id}/preview-file")
def document_preview_file(document_id: str, path: str | None = None, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
@ -1706,6 +1800,209 @@ def _build_preview_url_for_path(request: Request, document_id: str, path_value:
base = str(request.url_for("document_preview_file", document_id=document_id))
return f"{base}?path={quote(str(path_obj))}&v={int(path_obj.stat().st_mtime)}"
# --- layout review save helpers start ---
def _layout_review_group_words_into_lines(words, y_tol: float = 12.0):
normalized = []
for word in words or []:
bbox = word.get("bbox") or [0, 0, 0, 0]
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
try:
x1 = float(bbox[0])
y1 = float(bbox[1])
x2 = float(bbox[2])
y2 = float(bbox[3])
except Exception:
continue
normalized.append({
"id": word.get("id"),
"text": (word.get("text") or "").strip(),
"bbox": [x1, y1, x2, y2],
})
normalized.sort(key=lambda w: (w["bbox"][1], w["bbox"][0]))
groups = []
for word in normalized:
word_center_y = (word["bbox"][1] + word["bbox"][3]) / 2.0
placed = False
for group in groups:
group_center_y = sum((item["bbox"][1] + item["bbox"][3]) / 2.0 for item in group) / len(group)
if abs(word_center_y - group_center_y) <= y_tol:
group.append(word)
placed = True
break
if not placed:
groups.append([word])
lines = []
for group in groups:
group.sort(key=lambda w: w["bbox"][0])
line_text = " ".join((item.get("text") or "").strip() for item in group).strip()
left = min(item["bbox"][0] for item in group)
top = min(item["bbox"][1] for item in group)
right = max(item["bbox"][2] for item in group)
bottom = max(item["bbox"][3] for item in group)
lines.append({
"text": line_text,
"bbox": [left, top, right, bottom],
"confidence": None,
"font_family_guess": "Helvetica",
"font_size_guess": max(6.0, (bottom - top) * 0.75),
"text_color_guess": "#000000",
"words": group,
})
return lines
@router.post("/{document_id}/save-layout-review")
async def save_layout_review(document_id: str, request: Request, db: Session = Depends(get_db)):
form = await request.form()
payload_raw = form.get("layout_review_json")
if not payload_raw:
return RedirectResponse(
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_missing_payload",
status_code=303,
)
try:
payload = json.loads(payload_raw)
except Exception:
return RedirectResponse(
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_invalid_json",
status_code=303,
)
document = (
db.query(Document)
.options(selectinload(Document.text_versions))
.filter(Document.document_id == document_id)
.first()
)
if document is None:
return HTMLResponse(content="Document not found", status_code=404)
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
current_text_version = next(
(
tv for tv in sorted(
getattr(document, "text_versions", []),
key=lambda x: (x.version_number, x.created_at),
reverse=True,
)
if tv.is_current
),
None,
)
source_version = reviewed_ocr or raw_ocr or current_text_version
if source_version is None:
return RedirectResponse(
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_no_source",
status_code=303,
)
posted_pages = payload.get("pages") if isinstance(payload, dict) else None
if not isinstance(posted_pages, list) or not posted_pages:
return RedirectResponse(
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_no_pages",
status_code=303,
)
rebuilt_pages = []
rebuilt_text_lines = []
for idx, page in enumerate(posted_pages, start=1):
page_number = int(page.get("page") or idx)
page_width = float(page.get("page_width") or 1.0)
page_height = float(page.get("page_height") or 1.0)
words = []
for word_idx, word in enumerate(page.get("words", []) or [], start=1):
bbox = word.get("bbox") or [0, 0, 0, 0]
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
try:
x1 = float(bbox[0])
y1 = float(bbox[1])
x2 = float(bbox[2])
y2 = float(bbox[3])
except Exception:
continue
words.append({
"id": int(word.get("id") or word_idx),
"text": (word.get("text") or "").strip(),
"bbox": [x1, y1, x2, y2],
"confidence": None,
})
lines = _layout_review_group_words_into_lines(words)
rebuilt_text_lines.extend((line.get("text") or "") for line in lines)
rebuilt_pages.append({
"page": page_number,
"page_width": page_width,
"page_height": page_height,
"image_width": page_width,
"image_height": page_height,
"words": words,
"lines": lines,
})
source_layout_json = getattr(source_version, "layout_json", None)
new_layout_json = {}
if isinstance(source_layout_json, dict):
for key in ("schema_version", "analysis_type", "engine"):
if key in source_layout_json:
new_layout_json[key] = source_layout_json[key]
if "schema_version" not in new_layout_json:
new_layout_json["schema_version"] = 1
if "analysis_type" not in new_layout_json:
new_layout_json["analysis_type"] = "canonical"
new_layout_json["pages"] = rebuilt_pages
new_layout_json["layout_sync_status"] = "synced"
new_layout_json["layout_sync_source"] = "layout_review"
new_layout_json["layout_needs_review"] = False
new_text_content = "\n".join(rebuilt_text_lines).strip()
next_version_number = max(
[getattr(tv, "version_number", 0) for tv in getattr(document, "text_versions", [])] or [0]
) + 1
for tv in getattr(document, "text_versions", []):
tv.is_current = False
new_version = TextVersion(
document_id=document.id,
version_number=next_version_number,
version_type="reviewed_ocr",
text_content=new_text_content,
created_by="layout_review_editor",
is_current=True,
ocr_engine=getattr(source_version, "ocr_engine", None),
ocr_engine_version=getattr(source_version, "ocr_engine_version", None),
rerun_source="layout_review",
quality_score=getattr(source_version, "quality_score", None),
quality_flags=getattr(source_version, "quality_flags", None),
quality_note=getattr(source_version, "quality_note", None),
derived_from_version_id=getattr(source_version, "id", None),
layout_json=new_layout_json,
)
db.add(new_version)
db.commit()
return RedirectResponse(
url=f"/documents/{document_id}?tab=layout-review&success=saved_layout_review",
status_code=303,
)
# --- layout review save helpers end ---
@router.get("/{document_id}", response_class=HTMLResponse)
def document_detail(document_id: str, request: Request, queue: str | None = None, viewer_source: str = "scan", db: Session = Depends(get_db)):
current_user = getattr(request.state, "current_user", None)
@ -1726,6 +2023,12 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
return HTMLResponse(content="Document not found", status_code=404)
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
layout_source_version = reviewed_ocr or raw_ocr
layout_source_json = (
layout_source_version.layout_json
if layout_source_version and isinstance(getattr(layout_source_version, "layout_json", None), dict)
else None
)
current_text_version = next(
(
tv for tv in sorted(
@ -1741,7 +2044,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
editor_source = request.query_params.get("editor_source", "reviewed")
review_text_value = _build_review_text_value(raw_ocr, reviewed_ocr, editor_source)
expected_line_count = _line_count_from_layout(raw_ocr.layout_json if raw_ocr else None)
layout_source_version = reviewed_ocr or raw_ocr or current_text_version
layout_source_json = (
layout_source_version.layout_json
if layout_source_version and isinstance(getattr(layout_source_version, "layout_json", None), dict)
else None
)
expected_line_count = _line_count_from_layout(layout_source_json)
actual_line_count = len(review_text_value.splitlines()) if review_text_value else 0
line_numbers = list(range(1, max(actual_line_count, expected_line_count) + 1))
@ -1750,32 +2060,26 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
replica_debug_overlay_output = _get_latest_replica_output(document, "debug_overlay")
overlay_page_data = []
layout_review_pages = []
try:
current_text_version_for_overlay = next(
(
tv for tv in sorted(
getattr(document, "text_versions", []),
key=lambda x: (x.version_number, x.created_at),
reverse=True,
)
if tv.is_current
),
None,
)
overlay_pages = ((current_text_version_for_overlay.layout_json or {}).get("pages", []) if current_text_version_for_overlay and current_text_version_for_overlay.layout_json else []) or []
layout_json = layout_source_json or {}
overlay_pages = layout_json.get("pages", []) if isinstance(layout_json, dict) else []
for page in overlay_pages:
page_width = float(page.get("page_width") or page.get("image_width") or 1.0)
page_height = float(page.get("page_height") or page.get("image_height") or 1.0)
words = []
for word in page.get("words", []) or []:
for idx, word in enumerate(page.get("words", []) or [], start=1):
bbox = word.get("bbox") or [0, 0, 0, 0]
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
words.append({
word_row = {
"id": idx,
"text": (word.get("text") or "").strip(),
"bbox": [float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])],
})
}
words.append(word_row)
lines = []
source_lines = []
@ -1794,14 +2098,24 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
})
overlay_page_data.append({
"page": page.get("page"),
"page_width": page_width,
"page_height": page_height,
"words": [{"text": w["text"], "bbox": w["bbox"]} for w in words],
"lines": lines,
})
layout_review_pages.append({
"page": page.get("page"),
"page_width": page_width,
"page_height": page_height,
"words": words,
"lines": lines,
})
except Exception:
except Exception as e:
print("layout review build failed:", repr(e), flush=True)
overlay_page_data = []
layout_review_pages = []
scan_path = document.current_path
replica_path = replica_clean_output.file_path if replica_clean_output and replica_clean_output.file_path else None
@ -1823,6 +2137,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
storage_available = _storage_available()
file_url = _build_preview_url_for_path(request, document.document_id, preview_path)
layout_review_image_url = str(request.url_for("document_preview_image", document_id=document.document_id)) + "?page=1"
app_url = str(request.url_for("document_detail", document_id=document.document_id))
error = request.query_params.get("error")
@ -1899,7 +2214,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
]
active_tab = request.query_params.get("tab", "ocr-review")
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "line-items", "versions", "raw-ocr", "source-options"}:
if active_tab not in {"ocr-review", "layout-review", "extracted-fields", "additional-fields", "line-items", "versions", "raw-ocr", "source-options"}:
active_tab = "ocr-review"
return templates.TemplateResponse(
@ -1920,10 +2235,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
"current_text_version": current_text_version,
"review_text_value": review_text_value,
"file_url": file_url,
"layout_review_image_url": layout_review_image_url,
"storage_available": storage_available,
"viewer_source": effective_viewer_source,
"overlay_page_data": overlay_page_data,
"layout_review_pages": layout_review_pages,
"replica_clean_output": replica_clean_output,
"replica_scan_backed_output": replica_scan_backed_output,
"replica_debug_overlay_output": replica_debug_overlay_output,
"version_rows": version_rows,
"current_line_item_version": current_line_item_version,
"ocr_version_options": ocr_version_options,

View File

@ -321,6 +321,7 @@ document.addEventListener("DOMContentLoaded", () => {
<div class="card">
<div class="right-pane-tabs">
<button class="tab-button{% if active_tab in ['ocr-review', 'raw-ocr', 'source-options'] %} active{% endif %}" type="button" data-tab="ocr-review">OCR Review</button>
<button class="tab-button{% if active_tab == 'layout-review' %} active{% endif %}" type="button" data-tab="layout-review">Layout Review</button>
<button class="tab-button{% if active_tab == 'extracted-fields' %} active{% endif %}" type="button" data-tab="extracted-fields">Extracted Fields</button>
<button class="tab-button{% if active_tab == 'additional-fields' %} active{% endif %}" type="button" data-tab="additional-fields">Additional Fields</button>
<button class="tab-button{% if active_tab == 'line-items' %} active{% endif %}" type="button" data-tab="line-items">Line Items</button>
@ -383,7 +384,360 @@ document.addEventListener("DOMContentLoaded", () => {
</form>
</div>
<div class="tab-panel{% if active_tab == 'extracted-fields' %} active{% endif %}" data-panel="extracted-fields">
<div class="tab-panel{% if active_tab == 'layout-review' %} active{% endif %}" data-panel="layout-review">
<div class="ocr-review-header-row">
<h2 class="card-title">Layout Review</h2>
<div style="font-size:0.95rem; color:#666;">Browser-only scaffold editor</div>
</div>
{% if layout_review_pages %}
<style>
@media (max-width: 900px) {
#layout-review-root { grid-template-columns: 1fr !important; }
#layout-review-canvas-wrap { min-height: 520px !important; }
}
#layout-review-stage, #layout-review-stage * {
-webkit-user-select: none !important;
user-select: none !important;
-webkit-touch-callout: none !important;
}
#layout-review-image {
display: block;
width: 100%;
height: auto;
pointer-events: none;
-webkit-user-drag: none;
user-drag: none;
}
#layout-review-canvas {
position: absolute;
inset: 0;
width: 100%;
height: 100%;
pointer-events: auto;
touch-action: none;
display: block;
z-index: 5;
background: transparent;
}
#layout-review-debug {
position: absolute;
left: 8px;
bottom: 8px;
z-index: 6;
background: rgba(0,0,0,0.75);
color: #fff;
padding: 4px 8px;
border-radius: 8px;
font-size: 12px;
}
</style>
<div id="layout-review-root" style="display:grid; grid-template-columns:minmax(0,1fr) 320px; gap:1rem; align-items:start;">
<div>
<div id="layout-review-canvas-wrap" style="position:relative; width:100%; min-height:900px; border:1px solid #ddd; border-radius:12px; overflow:hidden; background:#fafafa; -webkit-user-select:none; user-select:none; -webkit-touch-callout:none;" oncontextmenu="return false;">
{% if layout_review_image_url %}
<div id="layout-review-stage" style="position:relative; width:100%; display:block;">
<img
id="layout-review-image"
src="{{ layout_review_image_url }}"
alt="Layout review page"
draggable="false"
>
<canvas id="layout-review-canvas"></canvas>
<div id="layout-review-debug">boot</div>
</div>
{% endif %}
</div>
</div>
<div style="border:1px solid #ddd; border-radius:12px; padding:0.9rem; background:#fff;">
<h3 style="margin-top:0;">Selected word</h3>
<div style="display:grid; gap:0.65rem;">
<div>
<label for="layout-word-id">Word ID</label>
<input id="layout-word-id" type="text" readonly style="width:100%;">
</div>
<div>
<label for="layout-word-text">Text</label>
<input id="layout-word-text" type="text" style="width:100%;">
</div>
<div style="display:grid; grid-template-columns:1fr 1fr; gap:0.5rem;">
<div>
<label for="layout-x1">x1</label>
<input id="layout-x1" type="number" step="0.1" style="width:100%;">
</div>
<div>
<label for="layout-y1">y1</label>
<input id="layout-y1" type="number" step="0.1" style="width:100%;">
</div>
<div>
<label for="layout-x2">x2</label>
<input id="layout-x2" type="number" step="0.1" style="width:100%;">
</div>
<div>
<label for="layout-y2">y2</label>
<input id="layout-y2" type="number" step="0.1" style="width:100%;">
</div>
</div>
<div style="display:flex; gap:0.5rem; flex-wrap:wrap;">
<button type="button" id="layout-apply-word">Apply</button>
<button type="button" id="layout-nudge-left"></button>
<button type="button" id="layout-nudge-right"></button>
<button type="button" id="layout-nudge-up"></button>
<button type="button" id="layout-nudge-down"></button>
</div>
<div style="font-size:0.9rem; color:#666;">
Apply changes updates the layout editor only. Save layout review persists layout. Save reviewed OCR persists text and marks layout for review.
</div>
</div>
</div>
</div>
<form method="post" action="/documents/{{ document.document_id }}/save-layout-review" id="layout-review-save-form" style="margin-top:0.75rem;">
<input type="hidden" name="layout_review_json" id="layout-review-json">
<button type="submit" class="primary">Save layout review</button>
</form>
<script id="layout-review-data" type="application/json">{{ layout_review_pages|tojson }}</script>
<script>
(function () {
const dataTag = document.getElementById("layout-review-data");
const canvas = document.getElementById("layout-review-canvas");
const image = document.getElementById("layout-review-image");
const debugEl = document.getElementById("layout-review-debug");
const idInput = document.getElementById("layout-word-id");
const textInput = document.getElementById("layout-word-text");
const x1Input = document.getElementById("layout-x1");
const y1Input = document.getElementById("layout-y1");
const x2Input = document.getElementById("layout-x2");
const y2Input = document.getElementById("layout-y2");
function debug(msg) {
if (debugEl) debugEl.textContent = msg;
try { console.log("[layout-review]", msg); } catch (e) {}
}
if (!dataTag || !canvas || !image) {
debug("missing-elements");
return;
}
let pages = [];
try {
pages = JSON.parse(dataTag.textContent || "[]");
} catch (e) {
debug("json-error");
return;
}
if (!Array.isArray(pages) || !pages.length) {
debug("no-pages");
return;
}
const page = pages[0];
let words = JSON.parse(JSON.stringify(page.words || []));
let selectedId = null;
const ctx = canvas.getContext("2d");
if (!ctx) {
debug("no-ctx");
return;
}
function getSelectedWord() {
return words.find(w => String(w.id) === String(selectedId)) || null;
}
function syncEditor() {
const w = getSelectedWord();
if (!w) {
if (idInput) idInput.value = "";
if (textInput) textInput.value = "";
if (x1Input) x1Input.value = "";
if (y1Input) y1Input.value = "";
if (x2Input) x2Input.value = "";
if (y2Input) y2Input.value = "";
return;
}
if (idInput) idInput.value = w.id;
if (textInput) textInput.value = w.text || "";
if (x1Input) x1Input.value = w.bbox[0];
if (y1Input) y1Input.value = w.bbox[1];
if (x2Input) x2Input.value = w.bbox[2];
if (y2Input) y2Input.value = w.bbox[3];
}
function sizeCanvasToImage() {
const rect = image.getBoundingClientRect();
if (!rect.width || !rect.height) return null;
const ratio = window.devicePixelRatio || 1;
canvas.width = Math.round(rect.width * ratio);
canvas.height = Math.round(rect.height * ratio);
canvas.style.width = rect.width + "px";
canvas.style.height = rect.height + "px";
ctx.setTransform(1, 0, 0, 1, 0, 0);
ctx.scale(ratio, ratio);
return { width: rect.width, height: rect.height };
}
function renderCanvas() {
const sized = sizeCanvasToImage();
if (!sized) {
debug("size-failed");
return;
}
const displayWidth = sized.width;
const displayHeight = sized.height;
ctx.clearRect(0, 0, displayWidth, displayHeight);
const scaleX = displayWidth / Number(page.page_width || 1);
const scaleY = displayHeight / Number(page.page_height || 1);
for (const word of words) {
const bbox = word.bbox || [0, 0, 0, 0];
const x1 = Number(bbox[0] || 0) * scaleX;
const y1 = Number(bbox[1] || 0) * scaleY;
const x2 = Number(bbox[2] || 0) * scaleX;
const y2 = Number(bbox[3] || 0) * scaleY;
const w = Math.max(1, x2 - x1);
const h = Math.max(1, y2 - y1);
const selected = String(word.id) === String(selectedId);
ctx.save();
ctx.strokeStyle = selected ? "rgba(37,99,235,0.95)" : "rgba(220,38,38,0.85)";
ctx.lineWidth = selected ? 2 : 1;
ctx.fillStyle = selected ? "rgba(37,99,235,0.12)" : "rgba(220,38,38,0.03)";
ctx.fillRect(x1, y1, w, h);
ctx.strokeRect(x1, y1, w, h);
ctx.restore();
}
debug("render words=" + words.length);
}
function pickWord(clientX, clientY) {
const rect = canvas.getBoundingClientRect();
if (!rect.width || !rect.height) return null;
const px = (clientX - rect.left) * (Number(page.page_width || 1) / rect.width);
const py = (clientY - rect.top) * (Number(page.page_height || 1) / rect.height);
for (let i = words.length - 1; i >= 0; i--) {
const bbox = words[i].bbox || [0, 0, 0, 0];
const x1 = Number(bbox[0] || 0);
const y1 = Number(bbox[1] || 0);
const x2 = Number(bbox[2] || 0);
const y2 = Number(bbox[3] || 0);
if (px >= x1 && px <= x2 && py >= y1 && py <= y2) return words[i];
}
return null;
}
function applyEditorValues() {
const w = getSelectedWord();
if (!w) return;
w.text = textInput ? textInput.value : w.text;
w.bbox = [
Number(x1Input ? x1Input.value : 0),
Number(y1Input ? y1Input.value : 0),
Number(x2Input ? x2Input.value : 0),
Number(y2Input ? y2Input.value : 0),
];
renderCanvas();
}
function nudge(dx, dy) {
const w = getSelectedWord();
if (!w) return;
w.bbox = [
Number(w.bbox[0]) + dx,
Number(w.bbox[1]) + dy,
Number(w.bbox[2]) + dx,
Number(w.bbox[3]) + dy,
];
syncEditor();
renderCanvas();
}
function buildLayoutReviewPayload() {
return JSON.stringify({
pages: [{
page: page.page || 1,
page_width: page.page_width || 1,
page_height: page.page_height || 1,
words: words.map((w, idx) => ({
id: Number(w.id || (idx + 1)),
text: w.text || "",
bbox: [
Number((w.bbox || [0,0,0,0])[0] || 0),
Number((w.bbox || [0,0,0,0])[1] || 0),
Number((w.bbox || [0,0,0,0])[2] || 0),
Number((w.bbox || [0,0,0,0])[3] || 0),
],
})),
}],
});
}
function handlePick(ev) {
ev.preventDefault();
const point = (ev.touches && ev.touches.length) ? ev.touches[0] : ev;
const hit = pickWord(point.clientX, point.clientY);
if (!hit) {
debug("pick-miss");
return;
}
selectedId = hit.id;
syncEditor();
renderCanvas();
debug("picked " + hit.id);
}
["contextmenu", "selectstart", "dragstart", "touchstart", "touchend", "mousedown"].forEach((evt) => {
canvas.addEventListener(evt, (e) => { e.preventDefault(); }, { passive: false });
image.addEventListener(evt, (e) => { e.preventDefault(); }, { passive: false });
});
canvas.addEventListener("pointerdown", handlePick, { passive: false });
canvas.addEventListener("touchstart", handlePick, { passive: false });
document.getElementById("layout-apply-word")?.addEventListener("click", applyEditorValues);
document.getElementById("layout-nudge-left")?.addEventListener("click", () => nudge(-1, 0));
document.getElementById("layout-nudge-right")?.addEventListener("click", () => nudge(1, 0));
document.getElementById("layout-nudge-up")?.addEventListener("click", () => nudge(0, -1));
document.getElementById("layout-nudge-down")?.addEventListener("click", () => nudge(0, 1));
document.getElementById("layout-review-save-form")?.addEventListener("submit", function () {
applyEditorValues();
const hidden = document.getElementById("layout-review-json");
if (hidden) {
hidden.value = buildLayoutReviewPayload();
}
});
window.addEventListener("resize", renderCanvas);
image.addEventListener("load", renderCanvas);
syncEditor();
renderCanvas();
setTimeout(renderCanvas, 300);
setTimeout(renderCanvas, 1000);
})();
</script>
{% else %}
<p class="empty-state">No layout review data available yet.</p>
{% endif %}
</div>
<div class="tab-panel{% if active_tab == 'extracted-fields' %} active{% endif %}" data-panel="extracted-fields">
<div class="extracted-fields-header-row">
<h2 class="card-title">Extracted fields</h2>
<form method="get" action="/documents/{{ document.document_id }}" class="extracted-autofill-inline-form">
@ -1019,3 +1373,8 @@ document.addEventListener("DOMContentLoaded", () => {
})();
</script>