Implement bidirectional OCR and layout review sync
This commit is contained in:
parent
3f81d1a198
commit
658240c031
|
|
@ -402,6 +402,15 @@ def _build_word_entries_for_page(page_layout: dict, page_h: float) -> list[dict]
|
|||
return entries
|
||||
|
||||
|
||||
def _page_layout_line_entries(page_layout: dict) -> list[dict]:
|
||||
region_lines = []
|
||||
for region in page_layout.get("regions", []) or []:
|
||||
region_lines.extend(region.get("lines", []) or [])
|
||||
if region_lines:
|
||||
return region_lines
|
||||
return page_layout.get("lines", []) or []
|
||||
|
||||
|
||||
def _flatten_layout_lines(layout_json: dict | None) -> list[dict]:
|
||||
if not layout_json:
|
||||
return []
|
||||
|
|
@ -1009,7 +1018,7 @@ def _render_replica_pdf_from_layout(
|
|||
if mode in {"scan_backed", "debug_overlay"} and (page_layout.get("words") or []):
|
||||
render_entries = _build_word_entries_for_page(page_layout, page_h)
|
||||
else:
|
||||
render_entries = page_layout.get("lines", []) or []
|
||||
render_entries = _page_layout_line_entries(page_layout)
|
||||
|
||||
for line in render_entries:
|
||||
text_line = (line.get("text") or "").strip()
|
||||
|
|
|
|||
|
|
@ -28,6 +28,142 @@ class LayoutOCRResult:
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def _safe_float(value, default=0.0):
|
||||
try:
|
||||
return float(value)
|
||||
except Exception:
|
||||
return float(default)
|
||||
|
||||
|
||||
def _bbox_union(items: list[dict[str, Any]]) -> list[float]:
|
||||
if not items:
|
||||
return [0.0, 0.0, 0.0, 0.0]
|
||||
xs1, ys1, xs2, ys2 = [], [], [], []
|
||||
for item in items:
|
||||
bbox = item.get("bbox") or [0, 0, 0, 0]
|
||||
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
|
||||
continue
|
||||
xs1.append(_safe_float(bbox[0]))
|
||||
ys1.append(_safe_float(bbox[1]))
|
||||
xs2.append(_safe_float(bbox[2]))
|
||||
ys2.append(_safe_float(bbox[3]))
|
||||
if not xs1:
|
||||
return [0.0, 0.0, 0.0, 0.0]
|
||||
return [min(xs1), min(ys1), max(xs2), max(ys2)]
|
||||
|
||||
|
||||
def _word_center_x(word: dict[str, Any]) -> float:
|
||||
bbox = word.get("bbox") or [0, 0, 0, 0]
|
||||
return (_safe_float(bbox[0]) + _safe_float(bbox[2])) / 2.0
|
||||
|
||||
|
||||
def _word_center_y(word: dict[str, Any]) -> float:
|
||||
bbox = word.get("bbox") or [0, 0, 0, 0]
|
||||
return (_safe_float(bbox[1]) + _safe_float(bbox[3])) / 2.0
|
||||
|
||||
|
||||
def _group_words_into_lines_local(words: list[dict[str, Any]], y_tol: float = 12.0) -> list[dict[str, Any]]:
|
||||
if not words:
|
||||
return []
|
||||
|
||||
ordered = sorted(words, key=lambda w: (_word_center_y(w), _safe_float((w.get("bbox") or [0, 0, 0, 0])[0])))
|
||||
groups: list[list[dict[str, Any]]] = []
|
||||
|
||||
for word in ordered:
|
||||
placed = False
|
||||
wy = _word_center_y(word)
|
||||
for group in groups:
|
||||
gy = sum(_word_center_y(item) for item in group) / len(group)
|
||||
if abs(wy - gy) <= y_tol:
|
||||
group.append(word)
|
||||
placed = True
|
||||
break
|
||||
if not placed:
|
||||
groups.append([word])
|
||||
|
||||
lines: list[dict[str, Any]] = []
|
||||
for idx, group in enumerate(groups, start=1):
|
||||
group = sorted(group, key=lambda w: _safe_float((w.get("bbox") or [0, 0, 0, 0])[0]))
|
||||
text_value = " ".join((w.get("text") or "").strip() for w in group if (w.get("text") or "").strip()).strip()
|
||||
if not text_value:
|
||||
continue
|
||||
bbox = _bbox_union(group)
|
||||
avg_height = max(
|
||||
1.0,
|
||||
sum((_safe_float((w.get("bbox") or [0, 0, 0, 0])[3]) - _safe_float((w.get("bbox") or [0, 0, 0, 0])[1])) for w in group) / len(group),
|
||||
)
|
||||
lines.append(
|
||||
{
|
||||
"line_id": idx,
|
||||
"text": text_value,
|
||||
"bbox": bbox,
|
||||
"confidence": None,
|
||||
"font_family_guess": "Helvetica",
|
||||
"font_size_guess": max(6.0, avg_height * 0.75),
|
||||
"text_color_guess": "#000000",
|
||||
"word_ids": [w.get("word_id") for w in group if w.get("word_id") is not None],
|
||||
"words": group,
|
||||
}
|
||||
)
|
||||
return lines
|
||||
|
||||
|
||||
def _build_regions_from_words(words: list[dict[str, Any]], page_w: float) -> list[dict[str, Any]]:
|
||||
visible_words = [
|
||||
w for w in words
|
||||
if (w.get("text") or "").strip()
|
||||
and isinstance(w.get("bbox"), (list, tuple))
|
||||
and len(w.get("bbox")) == 4
|
||||
]
|
||||
if not visible_words:
|
||||
return []
|
||||
|
||||
ordered_x = sorted(visible_words, key=_word_center_x)
|
||||
centers = [_word_center_x(w) for w in ordered_x]
|
||||
|
||||
split_idx = None
|
||||
max_gap = 0.0
|
||||
for i in range(len(centers) - 1):
|
||||
gap = centers[i + 1] - centers[i]
|
||||
if gap > max_gap:
|
||||
max_gap = gap
|
||||
split_idx = i
|
||||
|
||||
min_gap = max(80.0, page_w * 0.10)
|
||||
|
||||
if split_idx is None or max_gap < min_gap:
|
||||
bucket = sorted(visible_words, key=lambda w: (_word_center_y(w), _word_center_x(w)))
|
||||
return [
|
||||
{
|
||||
"region_id": 1,
|
||||
"bbox": _bbox_union(bucket),
|
||||
"words": bucket,
|
||||
"lines": _group_words_into_lines_local(bucket),
|
||||
}
|
||||
]
|
||||
|
||||
split_x = (centers[split_idx] + centers[split_idx + 1]) / 2.0
|
||||
left_words = [w for w in visible_words if _word_center_x(w) <= split_x]
|
||||
right_words = [w for w in visible_words if _word_center_x(w) > split_x]
|
||||
|
||||
buckets = [bucket for bucket in [left_words, right_words] if bucket]
|
||||
buckets.sort(key=lambda bucket: _bbox_union(bucket)[0])
|
||||
|
||||
regions = []
|
||||
for idx, bucket in enumerate(buckets, start=1):
|
||||
bucket = sorted(bucket, key=lambda w: (_word_center_y(w), _word_center_x(w)))
|
||||
regions.append(
|
||||
{
|
||||
"region_id": idx,
|
||||
"bbox": _bbox_union(bucket),
|
||||
"words": bucket,
|
||||
"lines": _group_words_into_lines_local(bucket),
|
||||
}
|
||||
)
|
||||
return regions
|
||||
|
||||
def _group_words_into_lines(words: list[dict[str, Any]], y_tol: float = 12.0) -> list[dict[str, Any]]:
|
||||
if not words:
|
||||
return []
|
||||
|
|
@ -126,6 +262,7 @@ def run_layout_ocr(pdf_path: str | Path, dpi: int = 300) -> LayoutOCRResult:
|
|||
|
||||
words.append(
|
||||
{
|
||||
"word_id": len(words) + 1,
|
||||
"text": text,
|
||||
"bbox": [left, top, right, bottom],
|
||||
"confidence": conf,
|
||||
|
|
@ -133,6 +270,7 @@ def run_layout_ocr(pdf_path: str | Path, dpi: int = 300) -> LayoutOCRResult:
|
|||
)
|
||||
|
||||
lines = _group_words_into_lines(words)
|
||||
regions = _build_regions_from_words(words, page_w)
|
||||
|
||||
pages.append(
|
||||
{
|
||||
|
|
@ -143,6 +281,7 @@ def run_layout_ocr(pdf_path: str | Path, dpi: int = 300) -> LayoutOCRResult:
|
|||
"image_height": page_h,
|
||||
"lines": lines,
|
||||
"words": words,
|
||||
"regions": regions,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -8,14 +8,16 @@ import hashlib
|
|||
import json
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from io import BytesIO
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, Query, Request
|
||||
from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse
|
||||
from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse, Response
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from sqlalchemy import distinct
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
from pypdf import PdfReader
|
||||
from pdf2image import convert_from_path
|
||||
|
||||
from app.core.storage_settings import get_default_save_root
|
||||
from app.db.deps import get_db
|
||||
|
|
@ -617,11 +619,27 @@ def _get_current_text_versions(document: Document) -> tuple[TextVersion | None,
|
|||
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr" and tv.is_current),
|
||||
None,
|
||||
)
|
||||
if raw_ocr is None:
|
||||
raw_ocr = next(
|
||||
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"),
|
||||
None,
|
||||
)
|
||||
|
||||
reviewed_ocr = next(
|
||||
(tv for tv in sorted_text_versions if tv.version_type == "reviewed" and tv.is_current),
|
||||
(
|
||||
tv for tv in sorted_text_versions
|
||||
if tv.version_type in ("reviewed", "reviewed_ocr") and tv.is_current
|
||||
),
|
||||
None,
|
||||
)
|
||||
if reviewed_ocr is None:
|
||||
reviewed_ocr = next(
|
||||
(
|
||||
tv for tv in sorted_text_versions
|
||||
if tv.version_type in ("reviewed", "reviewed_ocr")
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
return raw_ocr, reviewed_ocr
|
||||
|
||||
|
|
@ -647,13 +665,11 @@ def _build_review_text_value(
|
|||
else:
|
||||
source = reviewed_ocr or raw_ocr
|
||||
|
||||
if source and source.layout_json:
|
||||
return "\n".join(_extract_line_texts_from_layout(source.layout_json))
|
||||
if source and source.text_content:
|
||||
return source.text_content
|
||||
if source and source.layout_json:
|
||||
return "\n".join(_extract_line_texts_from_layout(source.layout_json))
|
||||
return ""
|
||||
|
||||
|
||||
def _line_count_from_layout(layout_json: dict | None) -> int:
|
||||
return len(_extract_line_texts_from_layout(layout_json))
|
||||
|
||||
|
|
@ -662,19 +678,61 @@ def _apply_reviewed_lines_to_layout(base_layout: dict | None, reviewed_text: str
|
|||
if not base_layout:
|
||||
return None
|
||||
|
||||
reviewed_lines = reviewed_text.splitlines()
|
||||
new_layout = deepcopy(base_layout)
|
||||
reviewed_lines = reviewed_text.splitlines()
|
||||
line_idx = 0
|
||||
|
||||
idx = 0
|
||||
for page in new_layout.get("pages", []):
|
||||
for line in page.get("lines", []):
|
||||
line["text"] = reviewed_lines[idx] if idx < len(reviewed_lines) else ""
|
||||
idx += 1
|
||||
page_words = page.get("words", []) or []
|
||||
|
||||
words_by_id = {}
|
||||
words_by_bbox = {}
|
||||
for w in page_words:
|
||||
word_id = w.get("id")
|
||||
if word_id is not None:
|
||||
words_by_id[str(word_id)] = w
|
||||
bbox = w.get("bbox")
|
||||
if isinstance(bbox, (list, tuple)) and len(bbox) == 4:
|
||||
words_by_bbox[tuple(float(x) for x in bbox)] = w
|
||||
|
||||
for line in page.get("lines", []) or []:
|
||||
new_line_text = reviewed_lines[line_idx] if line_idx < len(reviewed_lines) else ""
|
||||
line["text"] = new_line_text
|
||||
line_idx += 1
|
||||
|
||||
line_words = line.get("words", []) or []
|
||||
if not line_words:
|
||||
continue
|
||||
|
||||
tokens = new_line_text.split()
|
||||
|
||||
assigned = []
|
||||
if not tokens:
|
||||
assigned = [""] * len(line_words)
|
||||
elif len(tokens) == len(line_words):
|
||||
assigned = tokens
|
||||
elif len(tokens) < len(line_words):
|
||||
assigned = tokens + ([""] * (len(line_words) - len(tokens)))
|
||||
else:
|
||||
assigned = tokens[:len(line_words) - 1] + [" ".join(tokens[len(line_words) - 1:])]
|
||||
|
||||
for lw, token in zip(line_words, assigned):
|
||||
lw["text"] = token
|
||||
|
||||
target = None
|
||||
word_id = lw.get("id")
|
||||
if word_id is not None:
|
||||
target = words_by_id.get(str(word_id))
|
||||
|
||||
if target is None:
|
||||
bbox = lw.get("bbox")
|
||||
if isinstance(bbox, (list, tuple)) and len(bbox) == 4:
|
||||
target = words_by_bbox.get(tuple(float(x) for x in bbox))
|
||||
|
||||
if target is not None:
|
||||
target["text"] = token
|
||||
|
||||
return new_layout
|
||||
|
||||
|
||||
|
||||
def _get_existing_document_types(db: Session) -> list[str]:
|
||||
rows = (
|
||||
db.query(distinct(Document.document_type))
|
||||
|
|
@ -1303,10 +1361,10 @@ def save_field_enriched_pdf(document_id: str, db: Session = Depends(get_db)):
|
|||
|
||||
|
||||
@router.post("/{document_id}/review-text", response_class=RedirectResponse)
|
||||
def save_reviewed_text(
|
||||
async def review_text(
|
||||
document_id: str,
|
||||
reviewed_text: str = Form(...),
|
||||
quality_flags: list[str] | None = Form(None),
|
||||
reviewed_text: str = Form(""),
|
||||
quality_flags: list[str] = Form(default=[]),
|
||||
quality_note: str = Form(""),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
|
|
@ -1320,33 +1378,44 @@ def save_reviewed_text(
|
|||
if document is None:
|
||||
return RedirectResponse(url="/documents/", status_code=303)
|
||||
|
||||
raw_ocr, _ = _get_current_text_versions(document)
|
||||
expected_line_count = _line_count_from_layout(raw_ocr.layout_json if raw_ocr else None)
|
||||
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
|
||||
|
||||
base_layout = None
|
||||
if reviewed_ocr and isinstance(getattr(reviewed_ocr, "layout_json", None), dict):
|
||||
base_layout = json.loads(json.dumps(reviewed_ocr.layout_json))
|
||||
elif raw_ocr and isinstance(getattr(raw_ocr, "layout_json", None), dict):
|
||||
base_layout = json.loads(json.dumps(raw_ocr.layout_json))
|
||||
|
||||
expected_line_count = _line_count_from_layout(base_layout)
|
||||
actual_line_count = len(reviewed_text.splitlines())
|
||||
|
||||
if expected_line_count and actual_line_count != expected_line_count:
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?error=line_count_mismatch&expected={expected_line_count}&actual={actual_line_count}&tab=ocr-review",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
existing_reviewed = [tv for tv in document.text_versions if tv.version_type == "reviewed" and tv.is_current]
|
||||
existing_reviewed = [
|
||||
tv for tv in document.text_versions
|
||||
if tv.version_type in ("reviewed", "reviewed_ocr") and tv.is_current
|
||||
]
|
||||
for tv in existing_reviewed:
|
||||
tv.is_current = False
|
||||
|
||||
reviewed_layout = _apply_reviewed_lines_to_layout(
|
||||
raw_ocr.layout_json if raw_ocr else None,
|
||||
reviewed_text,
|
||||
)
|
||||
if expected_line_count and actual_line_count == expected_line_count:
|
||||
reviewed_layout = _apply_reviewed_lines_to_layout(base_layout, reviewed_text)
|
||||
if isinstance(reviewed_layout, dict):
|
||||
reviewed_layout["layout_sync_source"] = "ocr_review"
|
||||
reviewed_layout["layout_sync_status"] = "synced"
|
||||
reviewed_layout["layout_needs_review"] = False
|
||||
else:
|
||||
reviewed_layout = dict(base_layout or {})
|
||||
reviewed_layout["layout_sync_source"] = "ocr_review"
|
||||
reviewed_layout["layout_sync_status"] = "text_changed_needs_layout_review"
|
||||
reviewed_layout["layout_needs_review"] = True
|
||||
|
||||
reviewed_version = TextVersion(
|
||||
document_id=document.id,
|
||||
version_number=max(tv.version_number for tv in document.text_versions) + 1 if document.text_versions else 1,
|
||||
version_type="reviewed",
|
||||
version_type="reviewed_ocr",
|
||||
text_content=reviewed_text,
|
||||
created_by="mcelwain",
|
||||
is_current=True,
|
||||
derived_from_version_id=raw_ocr.id if raw_ocr else None,
|
||||
derived_from_version_id=(reviewed_ocr.id if reviewed_ocr else (raw_ocr.id if raw_ocr else None)),
|
||||
layout_json=reviewed_layout,
|
||||
)
|
||||
db.add(reviewed_version)
|
||||
|
|
@ -1359,8 +1428,10 @@ def save_reviewed_text(
|
|||
document.review_status = "reviewed"
|
||||
db.commit()
|
||||
|
||||
return RedirectResponse(url=f"/documents/{document.document_id}?tab=line-items&success=saved_reviewed_ocr", status_code=303)
|
||||
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document.document_id}?tab=ocr-review&success=saved_reviewed_ocr",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
@router.post("/{document_id}/save-extracted-fields", response_class=RedirectResponse)
|
||||
def save_extracted_fields_route(
|
||||
|
|
@ -1673,6 +1744,29 @@ async def save_line_items(
|
|||
status_code=303,
|
||||
)
|
||||
|
||||
@router.get("/{document_id}/preview-image")
|
||||
def document_preview_image(document_id: str, page: int = 1, db: Session = Depends(get_db)):
|
||||
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||
if document is None or not document.current_path:
|
||||
return HTMLResponse(content="Preview image not found", status_code=404)
|
||||
|
||||
path_obj = Path(document.current_path)
|
||||
if not path_obj.exists() or not path_obj.is_file():
|
||||
return HTMLResponse(content="Preview image not found", status_code=404)
|
||||
|
||||
try:
|
||||
pil_images = convert_from_path(str(path_obj), dpi=150, first_page=page, last_page=page)
|
||||
if not pil_images:
|
||||
return HTMLResponse(content="Preview image not found", status_code=404)
|
||||
|
||||
img = pil_images[0]
|
||||
buf = BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
return Response(content=buf.getvalue(), media_type="image/png")
|
||||
except Exception as e:
|
||||
return HTMLResponse(content=f"Preview image generation failed: {e!r}", status_code=500)
|
||||
|
||||
|
||||
@router.get("/{document_id}/preview-file")
|
||||
def document_preview_file(document_id: str, path: str | None = None, db: Session = Depends(get_db)):
|
||||
document = db.query(Document).filter(Document.document_id == document_id).first()
|
||||
|
|
@ -1706,6 +1800,209 @@ def _build_preview_url_for_path(request: Request, document_id: str, path_value:
|
|||
base = str(request.url_for("document_preview_file", document_id=document_id))
|
||||
return f"{base}?path={quote(str(path_obj))}&v={int(path_obj.stat().st_mtime)}"
|
||||
|
||||
|
||||
# --- layout review save helpers start ---
|
||||
def _layout_review_group_words_into_lines(words, y_tol: float = 12.0):
|
||||
normalized = []
|
||||
for word in words or []:
|
||||
bbox = word.get("bbox") or [0, 0, 0, 0]
|
||||
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
|
||||
continue
|
||||
try:
|
||||
x1 = float(bbox[0])
|
||||
y1 = float(bbox[1])
|
||||
x2 = float(bbox[2])
|
||||
y2 = float(bbox[3])
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
normalized.append({
|
||||
"id": word.get("id"),
|
||||
"text": (word.get("text") or "").strip(),
|
||||
"bbox": [x1, y1, x2, y2],
|
||||
})
|
||||
|
||||
normalized.sort(key=lambda w: (w["bbox"][1], w["bbox"][0]))
|
||||
|
||||
groups = []
|
||||
for word in normalized:
|
||||
word_center_y = (word["bbox"][1] + word["bbox"][3]) / 2.0
|
||||
placed = False
|
||||
for group in groups:
|
||||
group_center_y = sum((item["bbox"][1] + item["bbox"][3]) / 2.0 for item in group) / len(group)
|
||||
if abs(word_center_y - group_center_y) <= y_tol:
|
||||
group.append(word)
|
||||
placed = True
|
||||
break
|
||||
if not placed:
|
||||
groups.append([word])
|
||||
|
||||
lines = []
|
||||
for group in groups:
|
||||
group.sort(key=lambda w: w["bbox"][0])
|
||||
line_text = " ".join((item.get("text") or "").strip() for item in group).strip()
|
||||
left = min(item["bbox"][0] for item in group)
|
||||
top = min(item["bbox"][1] for item in group)
|
||||
right = max(item["bbox"][2] for item in group)
|
||||
bottom = max(item["bbox"][3] for item in group)
|
||||
lines.append({
|
||||
"text": line_text,
|
||||
"bbox": [left, top, right, bottom],
|
||||
"confidence": None,
|
||||
"font_family_guess": "Helvetica",
|
||||
"font_size_guess": max(6.0, (bottom - top) * 0.75),
|
||||
"text_color_guess": "#000000",
|
||||
"words": group,
|
||||
})
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
@router.post("/{document_id}/save-layout-review")
|
||||
async def save_layout_review(document_id: str, request: Request, db: Session = Depends(get_db)):
|
||||
form = await request.form()
|
||||
payload_raw = form.get("layout_review_json")
|
||||
|
||||
if not payload_raw:
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_missing_payload",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
try:
|
||||
payload = json.loads(payload_raw)
|
||||
except Exception:
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_invalid_json",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
document = (
|
||||
db.query(Document)
|
||||
.options(selectinload(Document.text_versions))
|
||||
.filter(Document.document_id == document_id)
|
||||
.first()
|
||||
)
|
||||
if document is None:
|
||||
return HTMLResponse(content="Document not found", status_code=404)
|
||||
|
||||
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
|
||||
current_text_version = next(
|
||||
(
|
||||
tv for tv in sorted(
|
||||
getattr(document, "text_versions", []),
|
||||
key=lambda x: (x.version_number, x.created_at),
|
||||
reverse=True,
|
||||
)
|
||||
if tv.is_current
|
||||
),
|
||||
None,
|
||||
)
|
||||
source_version = reviewed_ocr or raw_ocr or current_text_version
|
||||
if source_version is None:
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_no_source",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
posted_pages = payload.get("pages") if isinstance(payload, dict) else None
|
||||
if not isinstance(posted_pages, list) or not posted_pages:
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document_id}?tab=layout-review&error=layout_review_no_pages",
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
rebuilt_pages = []
|
||||
rebuilt_text_lines = []
|
||||
|
||||
for idx, page in enumerate(posted_pages, start=1):
|
||||
page_number = int(page.get("page") or idx)
|
||||
page_width = float(page.get("page_width") or 1.0)
|
||||
page_height = float(page.get("page_height") or 1.0)
|
||||
|
||||
words = []
|
||||
for word_idx, word in enumerate(page.get("words", []) or [], start=1):
|
||||
bbox = word.get("bbox") or [0, 0, 0, 0]
|
||||
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
|
||||
continue
|
||||
try:
|
||||
x1 = float(bbox[0])
|
||||
y1 = float(bbox[1])
|
||||
x2 = float(bbox[2])
|
||||
y2 = float(bbox[3])
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
words.append({
|
||||
"id": int(word.get("id") or word_idx),
|
||||
"text": (word.get("text") or "").strip(),
|
||||
"bbox": [x1, y1, x2, y2],
|
||||
"confidence": None,
|
||||
})
|
||||
|
||||
lines = _layout_review_group_words_into_lines(words)
|
||||
rebuilt_text_lines.extend((line.get("text") or "") for line in lines)
|
||||
|
||||
rebuilt_pages.append({
|
||||
"page": page_number,
|
||||
"page_width": page_width,
|
||||
"page_height": page_height,
|
||||
"image_width": page_width,
|
||||
"image_height": page_height,
|
||||
"words": words,
|
||||
"lines": lines,
|
||||
})
|
||||
|
||||
source_layout_json = getattr(source_version, "layout_json", None)
|
||||
new_layout_json = {}
|
||||
if isinstance(source_layout_json, dict):
|
||||
for key in ("schema_version", "analysis_type", "engine"):
|
||||
if key in source_layout_json:
|
||||
new_layout_json[key] = source_layout_json[key]
|
||||
|
||||
if "schema_version" not in new_layout_json:
|
||||
new_layout_json["schema_version"] = 1
|
||||
if "analysis_type" not in new_layout_json:
|
||||
new_layout_json["analysis_type"] = "canonical"
|
||||
|
||||
new_layout_json["pages"] = rebuilt_pages
|
||||
new_layout_json["layout_sync_status"] = "synced"
|
||||
new_layout_json["layout_sync_source"] = "layout_review"
|
||||
new_layout_json["layout_needs_review"] = False
|
||||
new_text_content = "\n".join(rebuilt_text_lines).strip()
|
||||
|
||||
next_version_number = max(
|
||||
[getattr(tv, "version_number", 0) for tv in getattr(document, "text_versions", [])] or [0]
|
||||
) + 1
|
||||
|
||||
for tv in getattr(document, "text_versions", []):
|
||||
tv.is_current = False
|
||||
|
||||
new_version = TextVersion(
|
||||
document_id=document.id,
|
||||
version_number=next_version_number,
|
||||
version_type="reviewed_ocr",
|
||||
text_content=new_text_content,
|
||||
created_by="layout_review_editor",
|
||||
is_current=True,
|
||||
ocr_engine=getattr(source_version, "ocr_engine", None),
|
||||
ocr_engine_version=getattr(source_version, "ocr_engine_version", None),
|
||||
rerun_source="layout_review",
|
||||
quality_score=getattr(source_version, "quality_score", None),
|
||||
quality_flags=getattr(source_version, "quality_flags", None),
|
||||
quality_note=getattr(source_version, "quality_note", None),
|
||||
derived_from_version_id=getattr(source_version, "id", None),
|
||||
layout_json=new_layout_json,
|
||||
)
|
||||
db.add(new_version)
|
||||
db.commit()
|
||||
|
||||
return RedirectResponse(
|
||||
url=f"/documents/{document_id}?tab=layout-review&success=saved_layout_review",
|
||||
status_code=303,
|
||||
)
|
||||
# --- layout review save helpers end ---
|
||||
|
||||
@router.get("/{document_id}", response_class=HTMLResponse)
|
||||
def document_detail(document_id: str, request: Request, queue: str | None = None, viewer_source: str = "scan", db: Session = Depends(get_db)):
|
||||
current_user = getattr(request.state, "current_user", None)
|
||||
|
|
@ -1726,6 +2023,12 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
return HTMLResponse(content="Document not found", status_code=404)
|
||||
|
||||
raw_ocr, reviewed_ocr = _get_current_text_versions(document)
|
||||
layout_source_version = reviewed_ocr or raw_ocr
|
||||
layout_source_json = (
|
||||
layout_source_version.layout_json
|
||||
if layout_source_version and isinstance(getattr(layout_source_version, "layout_json", None), dict)
|
||||
else None
|
||||
)
|
||||
current_text_version = next(
|
||||
(
|
||||
tv for tv in sorted(
|
||||
|
|
@ -1741,7 +2044,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
editor_source = request.query_params.get("editor_source", "reviewed")
|
||||
review_text_value = _build_review_text_value(raw_ocr, reviewed_ocr, editor_source)
|
||||
|
||||
expected_line_count = _line_count_from_layout(raw_ocr.layout_json if raw_ocr else None)
|
||||
layout_source_version = reviewed_ocr or raw_ocr or current_text_version
|
||||
layout_source_json = (
|
||||
layout_source_version.layout_json
|
||||
if layout_source_version and isinstance(getattr(layout_source_version, "layout_json", None), dict)
|
||||
else None
|
||||
)
|
||||
|
||||
expected_line_count = _line_count_from_layout(layout_source_json)
|
||||
actual_line_count = len(review_text_value.splitlines()) if review_text_value else 0
|
||||
line_numbers = list(range(1, max(actual_line_count, expected_line_count) + 1))
|
||||
|
||||
|
|
@ -1750,32 +2060,26 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
replica_debug_overlay_output = _get_latest_replica_output(document, "debug_overlay")
|
||||
|
||||
overlay_page_data = []
|
||||
layout_review_pages = []
|
||||
try:
|
||||
current_text_version_for_overlay = next(
|
||||
(
|
||||
tv for tv in sorted(
|
||||
getattr(document, "text_versions", []),
|
||||
key=lambda x: (x.version_number, x.created_at),
|
||||
reverse=True,
|
||||
)
|
||||
if tv.is_current
|
||||
),
|
||||
None,
|
||||
)
|
||||
overlay_pages = ((current_text_version_for_overlay.layout_json or {}).get("pages", []) if current_text_version_for_overlay and current_text_version_for_overlay.layout_json else []) or []
|
||||
layout_json = layout_source_json or {}
|
||||
overlay_pages = layout_json.get("pages", []) if isinstance(layout_json, dict) else []
|
||||
|
||||
for page in overlay_pages:
|
||||
page_width = float(page.get("page_width") or page.get("image_width") or 1.0)
|
||||
page_height = float(page.get("page_height") or page.get("image_height") or 1.0)
|
||||
|
||||
words = []
|
||||
for word in page.get("words", []) or []:
|
||||
for idx, word in enumerate(page.get("words", []) or [], start=1):
|
||||
bbox = word.get("bbox") or [0, 0, 0, 0]
|
||||
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
|
||||
continue
|
||||
words.append({
|
||||
word_row = {
|
||||
"id": idx,
|
||||
"text": (word.get("text") or "").strip(),
|
||||
"bbox": [float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])],
|
||||
})
|
||||
}
|
||||
words.append(word_row)
|
||||
|
||||
lines = []
|
||||
source_lines = []
|
||||
|
|
@ -1794,14 +2098,24 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
})
|
||||
|
||||
overlay_page_data.append({
|
||||
"page": page.get("page"),
|
||||
"page_width": page_width,
|
||||
"page_height": page_height,
|
||||
"words": [{"text": w["text"], "bbox": w["bbox"]} for w in words],
|
||||
"lines": lines,
|
||||
})
|
||||
|
||||
layout_review_pages.append({
|
||||
"page": page.get("page"),
|
||||
"page_width": page_width,
|
||||
"page_height": page_height,
|
||||
"words": words,
|
||||
"lines": lines,
|
||||
})
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
print("layout review build failed:", repr(e), flush=True)
|
||||
overlay_page_data = []
|
||||
layout_review_pages = []
|
||||
|
||||
scan_path = document.current_path
|
||||
replica_path = replica_clean_output.file_path if replica_clean_output and replica_clean_output.file_path else None
|
||||
|
|
@ -1823,6 +2137,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
|
||||
storage_available = _storage_available()
|
||||
file_url = _build_preview_url_for_path(request, document.document_id, preview_path)
|
||||
layout_review_image_url = str(request.url_for("document_preview_image", document_id=document.document_id)) + "?page=1"
|
||||
|
||||
app_url = str(request.url_for("document_detail", document_id=document.document_id))
|
||||
error = request.query_params.get("error")
|
||||
|
|
@ -1899,7 +2214,7 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
]
|
||||
|
||||
active_tab = request.query_params.get("tab", "ocr-review")
|
||||
if active_tab not in {"ocr-review", "extracted-fields", "additional-fields", "line-items", "versions", "raw-ocr", "source-options"}:
|
||||
if active_tab not in {"ocr-review", "layout-review", "extracted-fields", "additional-fields", "line-items", "versions", "raw-ocr", "source-options"}:
|
||||
active_tab = "ocr-review"
|
||||
|
||||
return templates.TemplateResponse(
|
||||
|
|
@ -1920,10 +2235,14 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
|||
"current_text_version": current_text_version,
|
||||
"review_text_value": review_text_value,
|
||||
"file_url": file_url,
|
||||
"layout_review_image_url": layout_review_image_url,
|
||||
"storage_available": storage_available,
|
||||
"viewer_source": effective_viewer_source,
|
||||
"overlay_page_data": overlay_page_data,
|
||||
"layout_review_pages": layout_review_pages,
|
||||
"replica_clean_output": replica_clean_output,
|
||||
"replica_scan_backed_output": replica_scan_backed_output,
|
||||
"replica_debug_overlay_output": replica_debug_overlay_output,
|
||||
"version_rows": version_rows,
|
||||
"current_line_item_version": current_line_item_version,
|
||||
"ocr_version_options": ocr_version_options,
|
||||
|
|
|
|||
|
|
@ -321,6 +321,7 @@ document.addEventListener("DOMContentLoaded", () => {
|
|||
<div class="card">
|
||||
<div class="right-pane-tabs">
|
||||
<button class="tab-button{% if active_tab in ['ocr-review', 'raw-ocr', 'source-options'] %} active{% endif %}" type="button" data-tab="ocr-review">OCR Review</button>
|
||||
<button class="tab-button{% if active_tab == 'layout-review' %} active{% endif %}" type="button" data-tab="layout-review">Layout Review</button>
|
||||
<button class="tab-button{% if active_tab == 'extracted-fields' %} active{% endif %}" type="button" data-tab="extracted-fields">Extracted Fields</button>
|
||||
<button class="tab-button{% if active_tab == 'additional-fields' %} active{% endif %}" type="button" data-tab="additional-fields">Additional Fields</button>
|
||||
<button class="tab-button{% if active_tab == 'line-items' %} active{% endif %}" type="button" data-tab="line-items">Line Items</button>
|
||||
|
|
@ -383,7 +384,360 @@ document.addEventListener("DOMContentLoaded", () => {
|
|||
</form>
|
||||
</div>
|
||||
|
||||
<div class="tab-panel{% if active_tab == 'extracted-fields' %} active{% endif %}" data-panel="extracted-fields">
|
||||
|
||||
<div class="tab-panel{% if active_tab == 'layout-review' %} active{% endif %}" data-panel="layout-review">
|
||||
<div class="ocr-review-header-row">
|
||||
<h2 class="card-title">Layout Review</h2>
|
||||
<div style="font-size:0.95rem; color:#666;">Browser-only scaffold editor</div>
|
||||
</div>
|
||||
|
||||
{% if layout_review_pages %}
|
||||
<style>
|
||||
@media (max-width: 900px) {
|
||||
#layout-review-root { grid-template-columns: 1fr !important; }
|
||||
#layout-review-canvas-wrap { min-height: 520px !important; }
|
||||
}
|
||||
#layout-review-stage, #layout-review-stage * {
|
||||
-webkit-user-select: none !important;
|
||||
user-select: none !important;
|
||||
-webkit-touch-callout: none !important;
|
||||
}
|
||||
#layout-review-image {
|
||||
display: block;
|
||||
width: 100%;
|
||||
height: auto;
|
||||
pointer-events: none;
|
||||
-webkit-user-drag: none;
|
||||
user-drag: none;
|
||||
}
|
||||
#layout-review-canvas {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
pointer-events: auto;
|
||||
touch-action: none;
|
||||
display: block;
|
||||
z-index: 5;
|
||||
background: transparent;
|
||||
}
|
||||
#layout-review-debug {
|
||||
position: absolute;
|
||||
left: 8px;
|
||||
bottom: 8px;
|
||||
z-index: 6;
|
||||
background: rgba(0,0,0,0.75);
|
||||
color: #fff;
|
||||
padding: 4px 8px;
|
||||
border-radius: 8px;
|
||||
font-size: 12px;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div id="layout-review-root" style="display:grid; grid-template-columns:minmax(0,1fr) 320px; gap:1rem; align-items:start;">
|
||||
<div>
|
||||
<div id="layout-review-canvas-wrap" style="position:relative; width:100%; min-height:900px; border:1px solid #ddd; border-radius:12px; overflow:hidden; background:#fafafa; -webkit-user-select:none; user-select:none; -webkit-touch-callout:none;" oncontextmenu="return false;">
|
||||
{% if layout_review_image_url %}
|
||||
<div id="layout-review-stage" style="position:relative; width:100%; display:block;">
|
||||
<img
|
||||
id="layout-review-image"
|
||||
src="{{ layout_review_image_url }}"
|
||||
alt="Layout review page"
|
||||
draggable="false"
|
||||
>
|
||||
<canvas id="layout-review-canvas"></canvas>
|
||||
<div id="layout-review-debug">boot</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="border:1px solid #ddd; border-radius:12px; padding:0.9rem; background:#fff;">
|
||||
<h3 style="margin-top:0;">Selected word</h3>
|
||||
<div style="display:grid; gap:0.65rem;">
|
||||
<div>
|
||||
<label for="layout-word-id">Word ID</label>
|
||||
<input id="layout-word-id" type="text" readonly style="width:100%;">
|
||||
</div>
|
||||
<div>
|
||||
<label for="layout-word-text">Text</label>
|
||||
<input id="layout-word-text" type="text" style="width:100%;">
|
||||
</div>
|
||||
<div style="display:grid; grid-template-columns:1fr 1fr; gap:0.5rem;">
|
||||
<div>
|
||||
<label for="layout-x1">x1</label>
|
||||
<input id="layout-x1" type="number" step="0.1" style="width:100%;">
|
||||
</div>
|
||||
<div>
|
||||
<label for="layout-y1">y1</label>
|
||||
<input id="layout-y1" type="number" step="0.1" style="width:100%;">
|
||||
</div>
|
||||
<div>
|
||||
<label for="layout-x2">x2</label>
|
||||
<input id="layout-x2" type="number" step="0.1" style="width:100%;">
|
||||
</div>
|
||||
<div>
|
||||
<label for="layout-y2">y2</label>
|
||||
<input id="layout-y2" type="number" step="0.1" style="width:100%;">
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:flex; gap:0.5rem; flex-wrap:wrap;">
|
||||
<button type="button" id="layout-apply-word">Apply</button>
|
||||
<button type="button" id="layout-nudge-left">←</button>
|
||||
<button type="button" id="layout-nudge-right">→</button>
|
||||
<button type="button" id="layout-nudge-up">↑</button>
|
||||
<button type="button" id="layout-nudge-down">↓</button>
|
||||
</div>
|
||||
<div style="font-size:0.9rem; color:#666;">
|
||||
Apply changes updates the layout editor only. Save layout review persists layout. Save reviewed OCR persists text and marks layout for review.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form method="post" action="/documents/{{ document.document_id }}/save-layout-review" id="layout-review-save-form" style="margin-top:0.75rem;">
|
||||
<input type="hidden" name="layout_review_json" id="layout-review-json">
|
||||
<button type="submit" class="primary">Save layout review</button>
|
||||
</form>
|
||||
|
||||
<script id="layout-review-data" type="application/json">{{ layout_review_pages|tojson }}</script>
|
||||
<script>
|
||||
(function () {
|
||||
const dataTag = document.getElementById("layout-review-data");
|
||||
const canvas = document.getElementById("layout-review-canvas");
|
||||
const image = document.getElementById("layout-review-image");
|
||||
const debugEl = document.getElementById("layout-review-debug");
|
||||
const idInput = document.getElementById("layout-word-id");
|
||||
const textInput = document.getElementById("layout-word-text");
|
||||
const x1Input = document.getElementById("layout-x1");
|
||||
const y1Input = document.getElementById("layout-y1");
|
||||
const x2Input = document.getElementById("layout-x2");
|
||||
const y2Input = document.getElementById("layout-y2");
|
||||
|
||||
function debug(msg) {
|
||||
if (debugEl) debugEl.textContent = msg;
|
||||
try { console.log("[layout-review]", msg); } catch (e) {}
|
||||
}
|
||||
|
||||
if (!dataTag || !canvas || !image) {
|
||||
debug("missing-elements");
|
||||
return;
|
||||
}
|
||||
|
||||
let pages = [];
|
||||
try {
|
||||
pages = JSON.parse(dataTag.textContent || "[]");
|
||||
} catch (e) {
|
||||
debug("json-error");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Array.isArray(pages) || !pages.length) {
|
||||
debug("no-pages");
|
||||
return;
|
||||
}
|
||||
|
||||
const page = pages[0];
|
||||
let words = JSON.parse(JSON.stringify(page.words || []));
|
||||
let selectedId = null;
|
||||
const ctx = canvas.getContext("2d");
|
||||
|
||||
if (!ctx) {
|
||||
debug("no-ctx");
|
||||
return;
|
||||
}
|
||||
|
||||
function getSelectedWord() {
|
||||
return words.find(w => String(w.id) === String(selectedId)) || null;
|
||||
}
|
||||
|
||||
function syncEditor() {
|
||||
const w = getSelectedWord();
|
||||
if (!w) {
|
||||
if (idInput) idInput.value = "";
|
||||
if (textInput) textInput.value = "";
|
||||
if (x1Input) x1Input.value = "";
|
||||
if (y1Input) y1Input.value = "";
|
||||
if (x2Input) x2Input.value = "";
|
||||
if (y2Input) y2Input.value = "";
|
||||
return;
|
||||
}
|
||||
if (idInput) idInput.value = w.id;
|
||||
if (textInput) textInput.value = w.text || "";
|
||||
if (x1Input) x1Input.value = w.bbox[0];
|
||||
if (y1Input) y1Input.value = w.bbox[1];
|
||||
if (x2Input) x2Input.value = w.bbox[2];
|
||||
if (y2Input) y2Input.value = w.bbox[3];
|
||||
}
|
||||
|
||||
function sizeCanvasToImage() {
|
||||
const rect = image.getBoundingClientRect();
|
||||
if (!rect.width || !rect.height) return null;
|
||||
|
||||
const ratio = window.devicePixelRatio || 1;
|
||||
canvas.width = Math.round(rect.width * ratio);
|
||||
canvas.height = Math.round(rect.height * ratio);
|
||||
canvas.style.width = rect.width + "px";
|
||||
canvas.style.height = rect.height + "px";
|
||||
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0);
|
||||
ctx.scale(ratio, ratio);
|
||||
|
||||
return { width: rect.width, height: rect.height };
|
||||
}
|
||||
|
||||
function renderCanvas() {
|
||||
const sized = sizeCanvasToImage();
|
||||
if (!sized) {
|
||||
debug("size-failed");
|
||||
return;
|
||||
}
|
||||
|
||||
const displayWidth = sized.width;
|
||||
const displayHeight = sized.height;
|
||||
ctx.clearRect(0, 0, displayWidth, displayHeight);
|
||||
|
||||
const scaleX = displayWidth / Number(page.page_width || 1);
|
||||
const scaleY = displayHeight / Number(page.page_height || 1);
|
||||
|
||||
for (const word of words) {
|
||||
const bbox = word.bbox || [0, 0, 0, 0];
|
||||
const x1 = Number(bbox[0] || 0) * scaleX;
|
||||
const y1 = Number(bbox[1] || 0) * scaleY;
|
||||
const x2 = Number(bbox[2] || 0) * scaleX;
|
||||
const y2 = Number(bbox[3] || 0) * scaleY;
|
||||
const w = Math.max(1, x2 - x1);
|
||||
const h = Math.max(1, y2 - y1);
|
||||
const selected = String(word.id) === String(selectedId);
|
||||
|
||||
ctx.save();
|
||||
ctx.strokeStyle = selected ? "rgba(37,99,235,0.95)" : "rgba(220,38,38,0.85)";
|
||||
ctx.lineWidth = selected ? 2 : 1;
|
||||
ctx.fillStyle = selected ? "rgba(37,99,235,0.12)" : "rgba(220,38,38,0.03)";
|
||||
ctx.fillRect(x1, y1, w, h);
|
||||
ctx.strokeRect(x1, y1, w, h);
|
||||
ctx.restore();
|
||||
}
|
||||
|
||||
debug("render words=" + words.length);
|
||||
}
|
||||
|
||||
function pickWord(clientX, clientY) {
|
||||
const rect = canvas.getBoundingClientRect();
|
||||
if (!rect.width || !rect.height) return null;
|
||||
|
||||
const px = (clientX - rect.left) * (Number(page.page_width || 1) / rect.width);
|
||||
const py = (clientY - rect.top) * (Number(page.page_height || 1) / rect.height);
|
||||
|
||||
for (let i = words.length - 1; i >= 0; i--) {
|
||||
const bbox = words[i].bbox || [0, 0, 0, 0];
|
||||
const x1 = Number(bbox[0] || 0);
|
||||
const y1 = Number(bbox[1] || 0);
|
||||
const x2 = Number(bbox[2] || 0);
|
||||
const y2 = Number(bbox[3] || 0);
|
||||
if (px >= x1 && px <= x2 && py >= y1 && py <= y2) return words[i];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function applyEditorValues() {
|
||||
const w = getSelectedWord();
|
||||
if (!w) return;
|
||||
w.text = textInput ? textInput.value : w.text;
|
||||
w.bbox = [
|
||||
Number(x1Input ? x1Input.value : 0),
|
||||
Number(y1Input ? y1Input.value : 0),
|
||||
Number(x2Input ? x2Input.value : 0),
|
||||
Number(y2Input ? y2Input.value : 0),
|
||||
];
|
||||
renderCanvas();
|
||||
}
|
||||
|
||||
function nudge(dx, dy) {
|
||||
const w = getSelectedWord();
|
||||
if (!w) return;
|
||||
w.bbox = [
|
||||
Number(w.bbox[0]) + dx,
|
||||
Number(w.bbox[1]) + dy,
|
||||
Number(w.bbox[2]) + dx,
|
||||
Number(w.bbox[3]) + dy,
|
||||
];
|
||||
syncEditor();
|
||||
renderCanvas();
|
||||
}
|
||||
|
||||
function buildLayoutReviewPayload() {
|
||||
return JSON.stringify({
|
||||
pages: [{
|
||||
page: page.page || 1,
|
||||
page_width: page.page_width || 1,
|
||||
page_height: page.page_height || 1,
|
||||
words: words.map((w, idx) => ({
|
||||
id: Number(w.id || (idx + 1)),
|
||||
text: w.text || "",
|
||||
bbox: [
|
||||
Number((w.bbox || [0,0,0,0])[0] || 0),
|
||||
Number((w.bbox || [0,0,0,0])[1] || 0),
|
||||
Number((w.bbox || [0,0,0,0])[2] || 0),
|
||||
Number((w.bbox || [0,0,0,0])[3] || 0),
|
||||
],
|
||||
})),
|
||||
}],
|
||||
});
|
||||
}
|
||||
|
||||
function handlePick(ev) {
|
||||
ev.preventDefault();
|
||||
const point = (ev.touches && ev.touches.length) ? ev.touches[0] : ev;
|
||||
const hit = pickWord(point.clientX, point.clientY);
|
||||
if (!hit) {
|
||||
debug("pick-miss");
|
||||
return;
|
||||
}
|
||||
selectedId = hit.id;
|
||||
syncEditor();
|
||||
renderCanvas();
|
||||
debug("picked " + hit.id);
|
||||
}
|
||||
|
||||
["contextmenu", "selectstart", "dragstart", "touchstart", "touchend", "mousedown"].forEach((evt) => {
|
||||
canvas.addEventListener(evt, (e) => { e.preventDefault(); }, { passive: false });
|
||||
image.addEventListener(evt, (e) => { e.preventDefault(); }, { passive: false });
|
||||
});
|
||||
|
||||
canvas.addEventListener("pointerdown", handlePick, { passive: false });
|
||||
canvas.addEventListener("touchstart", handlePick, { passive: false });
|
||||
|
||||
document.getElementById("layout-apply-word")?.addEventListener("click", applyEditorValues);
|
||||
document.getElementById("layout-nudge-left")?.addEventListener("click", () => nudge(-1, 0));
|
||||
document.getElementById("layout-nudge-right")?.addEventListener("click", () => nudge(1, 0));
|
||||
document.getElementById("layout-nudge-up")?.addEventListener("click", () => nudge(0, -1));
|
||||
document.getElementById("layout-nudge-down")?.addEventListener("click", () => nudge(0, 1));
|
||||
|
||||
document.getElementById("layout-review-save-form")?.addEventListener("submit", function () {
|
||||
applyEditorValues();
|
||||
const hidden = document.getElementById("layout-review-json");
|
||||
if (hidden) {
|
||||
hidden.value = buildLayoutReviewPayload();
|
||||
}
|
||||
});
|
||||
|
||||
window.addEventListener("resize", renderCanvas);
|
||||
image.addEventListener("load", renderCanvas);
|
||||
|
||||
syncEditor();
|
||||
renderCanvas();
|
||||
setTimeout(renderCanvas, 300);
|
||||
setTimeout(renderCanvas, 1000);
|
||||
})();
|
||||
</script>
|
||||
{% else %}
|
||||
<p class="empty-state">No layout review data available yet.</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
|
||||
<div class="tab-panel{% if active_tab == 'extracted-fields' %} active{% endif %}" data-panel="extracted-fields">
|
||||
<div class="extracted-fields-header-row">
|
||||
<h2 class="card-title">Extracted fields</h2>
|
||||
<form method="get" action="/documents/{{ document.document_id }}" class="extracted-autofill-inline-form">
|
||||
|
|
@ -1019,3 +1373,8 @@ document.addEventListener("DOMContentLoaded", () => {
|
|||
})();
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue