diff --git a/app/models/__init__.py b/app/models/__init__.py index d11d447..7c414bb 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -27,3 +27,4 @@ from app.models.document_replica_output import DocumentReplicaOutput from app.models.document_replica_review_state import DocumentReplicaReviewState import app.models.document_analysis_version from app.models.document_analysis_version import DocumentAnalysisVersion +from app.models.document_vision_analysis_output import DocumentVisionAnalysisOutput diff --git a/app/models/document_vision_analysis_output.py b/app/models/document_vision_analysis_output.py new file mode 100644 index 0000000..b4c95ff --- /dev/null +++ b/app/models/document_vision_analysis_output.py @@ -0,0 +1,20 @@ +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON +from sqlalchemy.sql import func +from app.db.base import Base + + +class DocumentVisionAnalysisOutput(Base): + __tablename__ = "document_vision_analysis_outputs" + + id = Column(Integer, primary_key=True, index=True) + document_id = Column(Integer, ForeignKey("documents.id"), nullable=False, index=True) + + engine = Column(String, nullable=False, default="placeholder") + model_name = Column(String, nullable=True) + prompt_version = Column(String, nullable=True) + output_type = Column(String, nullable=False, default="layout_suggestions") + + analysis_json = Column(JSON, nullable=False, default=dict) + + created_by = Column(String, nullable=False, default="run_vision_analysis") + created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False) diff --git a/app/routes/documents.py b/app/routes/documents.py index 05e77cb..e908680 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -47,6 +47,7 @@ from app.models.document_analysis_version import DocumentAnalysisVersion from app.logic.document_analysis import build_layout_ocr_analysis_for_document from app.logic.layout_ocr import run_layout_ocr from app.models.document import Document +from app.models.document_vision_analysis_output import DocumentVisionAnalysisOutput from app.models.document_line_item import DocumentLineItem from app.models.document_line_item_set import DocumentLineItemSet from app.models.document_line_item_set_version import DocumentLineItemSetVersion @@ -2191,6 +2192,7 @@ async def save_layout_review(document_id: str, request: Request, db: Session = D # --- layout review save helpers end --- +@router.get("/{document_id}/run-vision-analysis", response_class=RedirectResponse) @router.post("/{document_id}/run-vision-analysis", response_class=RedirectResponse) def run_vision_analysis(document_id: str, db: Session = Depends(get_db)): document = db.query(Document).filter(Document.document_id == document_id).first() @@ -2218,6 +2220,45 @@ def document_detail(document_id: str, request: Request, queue: str | None = None status_code=303, ) current_user = getattr(request.state, "current_user", None) + if request.query_params.get("run_vision") == "1": + document_for_vision = db.query(Document).filter(Document.document_id == document_id).first() + if document_for_vision: + DocumentVisionAnalysisOutput.__table__.create(bind=db.get_bind(), checkfirst=True) + output = DocumentVisionAnalysisOutput( + document_id=document_for_vision.id, + engine="local_placeholder", + model_name="none", + prompt_version="vision_scaffold_v1", + output_type="layout_suggestions", + analysis_json={ + "schema_version": "vision_analysis_v1", + "status": "placeholder", + "document_id": document_id, + "layers": { + "vision_boxes": [], + "vision_lines": [], + "vision_regions": [ + {"bbox": [0, 0, 1200, 700], "label": "VISION TEST REGION"} + ], + "vision_fields": [], + "vision_line_items": [], + }, + "notes": [ + "Vision analysis scaffold created.", + "Next step: render page image and populate this JSON from local CV/VLM output.", + ], + }, + created_by="layout_review_run_vision", + ) + db.add(output) + db.commit() + print(f"[vision-analysis] stored placeholder output id={output.id} for {document_id}", flush=True) + + return RedirectResponse( + url=f"/documents/{document_id}?tab=layout-review&success=vision_analysis_started", + status_code=303, + ) + document = ( db.query(Document) .options( @@ -2365,6 +2406,23 @@ def document_detail(document_id: str, request: Request, queue: str | None = None storage_available = _storage_available() file_url = _build_preview_url_for_path(request, document.document_id, preview_path) + latest_vision_output = None + vision_analysis_json = None + try: + DocumentVisionAnalysisOutput.__table__.create(bind=db.get_bind(), checkfirst=True) + latest_vision_output = ( + db.query(DocumentVisionAnalysisOutput) + .filter(DocumentVisionAnalysisOutput.document_id == document.id) + .order_by(DocumentVisionAnalysisOutput.id.desc()) + .first() + ) + if latest_vision_output: + vision_analysis_json = latest_vision_output.analysis_json + except Exception as e: + print("[vision-analysis] load failed:", repr(e), flush=True) + latest_vision_output = None + vision_analysis_json = None + diagnostic_outputs = [] try: diagnostic_outputs = list_candidate_outputs(db.connection(), document.id) @@ -2472,6 +2530,8 @@ def document_detail(document_id: str, request: Request, queue: str | None = None "storage_available": storage_available, "viewer_source": effective_viewer_source, "diagnostic_outputs": diagnostic_outputs, + "latest_vision_output": latest_vision_output, + "vision_analysis_json": vision_analysis_json, "overlay_page_data": overlay_page_data, "layout_review_pages": layout_review_pages, "replica_clean_output": replica_clean_output, diff --git a/app/static/app.css b/app/static/app.css index e24f0b7..62d4812 100644 --- a/app/static/app.css +++ b/app/static/app.css @@ -6324,3 +6324,4 @@ table { #layout-review-toolbar .word-ribbon-group:first-child .word-ribbon-row.layout-tool-row + .word-ribbon-row.layout-tool-row { margin-top: 0.35rem !important; } + diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html index 2b55fbc..ca976d1 100644 --- a/app/templates/documents/detail.html +++ b/app/templates/documents/detail.html @@ -1916,9 +1916,6 @@ document.addEventListener("DOMContentLoaded", () => { -
- -
Tools
@@ -1994,6 +1991,7 @@ document.addEventListener("DOMContentLoaded", () => {
Document
+Run Vision Analysis
@@ -3045,7 +3043,6 @@ function refreshSelectionUI(opts = {}) { if (snapGuides?.x != null) drawPageGuideLine("x", snapGuides.x, "rgba(220,38,38,0.9)"); if (snapGuides?.y != null) drawPageGuideLine("y", snapGuides.y, "rgba(220,38,38,0.9)"); } - function renderCanvas() { const sized = sizeCanvasToStage(); if (!sized) { @@ -3822,6 +3819,17 @@ function refreshSelectionUI(opts = {}) { }); document.getElementById("layout-tool-pan")?.addEventListener("click", () => setTool("pan")); document.getElementById("layout-tool-add")?.addEventListener("click", () => setTool("add")); + if (res.redirected) { + window.location.href = res.url; + return; + } + setStatus("Vision request sent"); + } catch (err) { + console.error("[vision-analysis] request failed", err); + setStatus("Vision request failed"); + } + }); + document.getElementById("layout-delete-word")?.addEventListener("click", deleteSelectedWord); document.getElementById("layout-delete-word-inline")?.addEventListener("click", deleteSelectedWord); @@ -4802,3 +4810,4 @@ document.addEventListener("DOMContentLoaded", () => { } }); +