Add backend vision analysis scaffold without editor overlay

This commit is contained in:
Sean McElwain 2026-05-30 13:47:54 -05:00
parent 6792a8629e
commit bec5a21650
5 changed files with 95 additions and 4 deletions

View File

@ -27,3 +27,4 @@ from app.models.document_replica_output import DocumentReplicaOutput
from app.models.document_replica_review_state import DocumentReplicaReviewState
import app.models.document_analysis_version
from app.models.document_analysis_version import DocumentAnalysisVersion
from app.models.document_vision_analysis_output import DocumentVisionAnalysisOutput

View File

@ -0,0 +1,20 @@
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON
from sqlalchemy.sql import func
from app.db.base import Base
class DocumentVisionAnalysisOutput(Base):
__tablename__ = "document_vision_analysis_outputs"
id = Column(Integer, primary_key=True, index=True)
document_id = Column(Integer, ForeignKey("documents.id"), nullable=False, index=True)
engine = Column(String, nullable=False, default="placeholder")
model_name = Column(String, nullable=True)
prompt_version = Column(String, nullable=True)
output_type = Column(String, nullable=False, default="layout_suggestions")
analysis_json = Column(JSON, nullable=False, default=dict)
created_by = Column(String, nullable=False, default="run_vision_analysis")
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)

View File

@ -47,6 +47,7 @@ from app.models.document_analysis_version import DocumentAnalysisVersion
from app.logic.document_analysis import build_layout_ocr_analysis_for_document
from app.logic.layout_ocr import run_layout_ocr
from app.models.document import Document
from app.models.document_vision_analysis_output import DocumentVisionAnalysisOutput
from app.models.document_line_item import DocumentLineItem
from app.models.document_line_item_set import DocumentLineItemSet
from app.models.document_line_item_set_version import DocumentLineItemSetVersion
@ -2191,6 +2192,7 @@ async def save_layout_review(document_id: str, request: Request, db: Session = D
# --- layout review save helpers end ---
@router.get("/{document_id}/run-vision-analysis", response_class=RedirectResponse)
@router.post("/{document_id}/run-vision-analysis", response_class=RedirectResponse)
def run_vision_analysis(document_id: str, db: Session = Depends(get_db)):
document = db.query(Document).filter(Document.document_id == document_id).first()
@ -2218,6 +2220,45 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
status_code=303,
)
current_user = getattr(request.state, "current_user", None)
if request.query_params.get("run_vision") == "1":
document_for_vision = db.query(Document).filter(Document.document_id == document_id).first()
if document_for_vision:
DocumentVisionAnalysisOutput.__table__.create(bind=db.get_bind(), checkfirst=True)
output = DocumentVisionAnalysisOutput(
document_id=document_for_vision.id,
engine="local_placeholder",
model_name="none",
prompt_version="vision_scaffold_v1",
output_type="layout_suggestions",
analysis_json={
"schema_version": "vision_analysis_v1",
"status": "placeholder",
"document_id": document_id,
"layers": {
"vision_boxes": [],
"vision_lines": [],
"vision_regions": [
{"bbox": [0, 0, 1200, 700], "label": "VISION TEST REGION"}
],
"vision_fields": [],
"vision_line_items": [],
},
"notes": [
"Vision analysis scaffold created.",
"Next step: render page image and populate this JSON from local CV/VLM output.",
],
},
created_by="layout_review_run_vision",
)
db.add(output)
db.commit()
print(f"[vision-analysis] stored placeholder output id={output.id} for {document_id}", flush=True)
return RedirectResponse(
url=f"/documents/{document_id}?tab=layout-review&success=vision_analysis_started",
status_code=303,
)
document = (
db.query(Document)
.options(
@ -2365,6 +2406,23 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
storage_available = _storage_available()
file_url = _build_preview_url_for_path(request, document.document_id, preview_path)
latest_vision_output = None
vision_analysis_json = None
try:
DocumentVisionAnalysisOutput.__table__.create(bind=db.get_bind(), checkfirst=True)
latest_vision_output = (
db.query(DocumentVisionAnalysisOutput)
.filter(DocumentVisionAnalysisOutput.document_id == document.id)
.order_by(DocumentVisionAnalysisOutput.id.desc())
.first()
)
if latest_vision_output:
vision_analysis_json = latest_vision_output.analysis_json
except Exception as e:
print("[vision-analysis] load failed:", repr(e), flush=True)
latest_vision_output = None
vision_analysis_json = None
diagnostic_outputs = []
try:
diagnostic_outputs = list_candidate_outputs(db.connection(), document.id)
@ -2472,6 +2530,8 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
"storage_available": storage_available,
"viewer_source": effective_viewer_source,
"diagnostic_outputs": diagnostic_outputs,
"latest_vision_output": latest_vision_output,
"vision_analysis_json": vision_analysis_json,
"overlay_page_data": overlay_page_data,
"layout_review_pages": layout_review_pages,
"replica_clean_output": replica_clean_output,

View File

@ -6324,3 +6324,4 @@ table {
#layout-review-toolbar .word-ribbon-group:first-child .word-ribbon-row.layout-tool-row + .word-ribbon-row.layout-tool-row {
margin-top: 0.35rem !important;
}

View File

@ -1916,9 +1916,6 @@ document.addEventListener("DOMContentLoaded", () => {
<button type="button" class="layout-tool-btn" id="layout-tool-pan">Pan</button>
<button type="button" class="layout-tool-btn" id="layout-tool-add">Add</button>
<button type="button" class="layout-tool-btn danger" id="layout-delete-word">Delete</button>
<form method="post" action="/documents/{{ document.document_id }}/run-vision-analysis" style="display:inline;">
<button type="submit" class="layout-tool-btn">Run Vision</button>
</form>
</div>
<div class="word-ribbon-label">Tools</div>
</div>
@ -1994,6 +1991,7 @@ document.addEventListener("DOMContentLoaded", () => {
<div class="word-ribbon-label">Document</div>
</div>
</div>
<a class="layout-tool-btn" id="layout-run-vision" href="/documents/{{ document.document_id }}?tab=layout-review&run_vision=1">Run Vision Analysis</a>
<div id="layout-review-shell">
<div>
<div id="layout-review-canvas-wrap" oncontextmenu="return false;">
@ -3045,7 +3043,6 @@ function refreshSelectionUI(opts = {}) {
if (snapGuides?.x != null) drawPageGuideLine("x", snapGuides.x, "rgba(220,38,38,0.9)");
if (snapGuides?.y != null) drawPageGuideLine("y", snapGuides.y, "rgba(220,38,38,0.9)");
}
function renderCanvas() {
const sized = sizeCanvasToStage();
if (!sized) {
@ -3822,6 +3819,17 @@ function refreshSelectionUI(opts = {}) {
});
document.getElementById("layout-tool-pan")?.addEventListener("click", () => setTool("pan"));
document.getElementById("layout-tool-add")?.addEventListener("click", () => setTool("add"));
if (res.redirected) {
window.location.href = res.url;
return;
}
setStatus("Vision request sent");
} catch (err) {
console.error("[vision-analysis] request failed", err);
setStatus("Vision request failed");
}
});
document.getElementById("layout-delete-word")?.addEventListener("click", deleteSelectedWord);
document.getElementById("layout-delete-word-inline")?.addEventListener("click", deleteSelectedWord);
@ -4802,3 +4810,4 @@ document.addEventListener("DOMContentLoaded", () => {
}
});
</script>