Add in-browser OCR overlay toggle controls

This commit is contained in:
Sean McElwain 2026-05-09 17:55:29 -05:00
parent 8c6e862799
commit 3f81d1a198
2 changed files with 176 additions and 1 deletions

View File

@ -1749,6 +1749,60 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
replica_scan_backed_output = _get_latest_replica_output(document, "scan_backed")
replica_debug_overlay_output = _get_latest_replica_output(document, "debug_overlay")
overlay_page_data = []
try:
current_text_version_for_overlay = next(
(
tv for tv in sorted(
getattr(document, "text_versions", []),
key=lambda x: (x.version_number, x.created_at),
reverse=True,
)
if tv.is_current
),
None,
)
overlay_pages = ((current_text_version_for_overlay.layout_json or {}).get("pages", []) if current_text_version_for_overlay and current_text_version_for_overlay.layout_json else []) or []
for page in overlay_pages:
page_width = float(page.get("page_width") or page.get("image_width") or 1.0)
page_height = float(page.get("page_height") or page.get("image_height") or 1.0)
words = []
for word in page.get("words", []) or []:
bbox = word.get("bbox") or [0, 0, 0, 0]
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
words.append({
"text": (word.get("text") or "").strip(),
"bbox": [float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])],
})
lines = []
source_lines = []
for region in page.get("regions", []) or []:
source_lines.extend(region.get("lines", []) or [])
if not source_lines:
source_lines = page.get("lines", []) or []
for line in source_lines:
bbox = line.get("bbox") or [0, 0, 0, 0]
if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
lines.append({
"text": (line.get("text") or "").strip(),
"bbox": [float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])],
})
overlay_page_data.append({
"page": page.get("page"),
"page_width": page_width,
"page_height": page_height,
"words": words,
"lines": lines,
})
except Exception:
overlay_page_data = []
scan_path = document.current_path
replica_path = replica_clean_output.file_path if replica_clean_output and replica_clean_output.file_path else None
replica_scan_backed_path = replica_scan_backed_output.file_path if replica_scan_backed_output and replica_scan_backed_output.file_path else None

View File

@ -272,13 +272,40 @@ document.addEventListener("DOMContentLoaded", () => {
<a class="preview-source-link{% if viewer_source == 'replica_debug_overlay' %} active{% endif %}" href="/documents/{{ document.document_id }}?tab={{ active_tab }}&viewer_source=replica_debug_overlay">Replica (Debug)</a>
{% endif %}
</div>
{% if overlay_page_data %}
<div class="preview-overlay-controls" style="display:flex; gap:0.5rem; flex-wrap:wrap; margin-top:0.75rem;">
<label style="display:flex; align-items:center; gap:0.35rem; font-size:0.95rem;">
<input type="checkbox" id="overlay-toggle-text">
<span>Show OCR text</span>
</label>
<label style="display:flex; align-items:center; gap:0.35rem; font-size:0.95rem;">
<input type="checkbox" id="overlay-toggle-boxes">
<span>Show boxes</span>
</label>
<label style="display:flex; align-items:center; gap:0.35rem; font-size:0.95rem;">
<input type="radio" name="overlay-level" id="overlay-level-lines" value="lines" checked>
<span>Lines</span>
</label>
<label style="display:flex; align-items:center; gap:0.35rem; font-size:0.95rem;">
<input type="radio" name="overlay-level" id="overlay-level-words" value="words">
<span>Words</span>
</label>
</div>
{% endif %}
</div>
{% if not storage_available %}
<p class="empty-state">Storage mount unavailable. Preview is temporarily unavailable.</p>
{% elif file_url %}
{% if document.mime_type == "application/pdf" %}
<embed class="preview-frame" src="{{ file_url }}" type="application/pdf">
<div class="preview-overlay-stack" style="position:relative;">
<embed class="preview-frame" id="preview-frame" src="{{ file_url }}" type="application/pdf">
{% if overlay_page_data %}
<div id="ocr-overlay-root" style="position:absolute; inset:0; pointer-events:none; overflow:hidden;"></div>
<script id="ocr-overlay-data" type="application/json">{{ overlay_page_data|tojson }}</script>
{% endif %}
</div>
{% elif document.mime_type in ["image/jpeg", "image/png"] %}
<img class="preview-image" src="{{ file_url }}" alt="Document image">
{% else %}
@ -898,3 +925,97 @@ document.addEventListener("DOMContentLoaded", () => {
</script>
{% endblock %}
<script>
(function () {
const dataTag = document.getElementById("ocr-overlay-data");
const overlayRoot = document.getElementById("ocr-overlay-root");
if (!dataTag || !overlayRoot) return;
let overlayData = [];
try {
overlayData = JSON.parse(dataTag.textContent || "[]");
} catch (e) {
overlayData = [];
}
if (!Array.isArray(overlayData) || !overlayData.length) return;
const textToggle = document.getElementById("overlay-toggle-text");
const boxesToggle = document.getElementById("overlay-toggle-boxes");
const levelLines = document.getElementById("overlay-level-lines");
const levelWords = document.getElementById("overlay-level-words");
function currentLevel() {
return levelWords && levelWords.checked ? "words" : "lines";
}
function clearOverlay() {
overlayRoot.innerHTML = "";
}
function renderOcrOverlay() {
clearOverlay();
const showText = !!(textToggle && textToggle.checked);
const showBoxes = !!(boxesToggle && boxesToggle.checked);
if (!showText && !showBoxes) return;
const page = overlayData[0];
if (!page) return;
const pageWidth = Number(page.page_width || 1);
const pageHeight = Number(page.page_height || 1);
const rootRect = overlayRoot.getBoundingClientRect();
if (!rootRect.width || !rootRect.height) return;
const xScale = rootRect.width / pageWidth;
const yScale = rootRect.height / pageHeight;
const items = currentLevel() === "words" ? (page.words || []) : (page.lines || []);
for (const item of items) {
const bbox = item.bbox || [0, 0, 0, 0];
const x1 = Number(bbox[0] || 0) * xScale;
const y1 = Number(bbox[1] || 0) * yScale;
const x2 = Number(bbox[2] || 0) * xScale;
const y2 = Number(bbox[3] || 0) * yScale;
const w = Math.max(1, x2 - x1);
const h = Math.max(1, y2 - y1);
const el = document.createElement("div");
el.style.position = "absolute";
el.style.left = x1 + "px";
el.style.top = y1 + "px";
el.style.width = w + "px";
el.style.height = h + "px";
el.style.boxSizing = "border-box";
el.style.pointerEvents = "none";
if (showBoxes) {
el.style.border = "1px solid rgba(220,38,38,0.55)";
el.style.background = "rgba(220,38,38,0.04)";
}
if (showText) {
el.textContent = item.text || "";
el.style.color = "rgba(220,38,38,0.92)";
el.style.fontSize = Math.max(9, Math.min(24, h * 0.9)) + "px";
el.style.lineHeight = h + "px";
el.style.whiteSpace = "nowrap";
el.style.overflow = "hidden";
}
overlayRoot.appendChild(el);
}
}
[textToggle, boxesToggle, levelLines, levelWords].forEach((el) => {
if (el) el.addEventListener("change", renderOcrOverlay);
});
window.addEventListener("resize", renderOcrOverlay);
setTimeout(renderOcrOverlay, 250);
setTimeout(renderOcrOverlay, 900);
})();
</script>