From 3aa2c78ac33b37f8cb0ff81ba24039ddc8f613b7 Mon Sep 17 00:00:00 2001 From: McElwain Date: Sat, 30 May 2026 16:11:30 -0500 Subject: [PATCH] Wire vision candidate generation through backend module --- app/logic/vision_analysis.py | 57 ++++++++++++++++++++++++++++++++++++ app/routes/documents.py | 22 +++++--------- 2 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 app/logic/vision_analysis.py diff --git a/app/logic/vision_analysis.py b/app/logic/vision_analysis.py new file mode 100644 index 0000000..f6b05c6 --- /dev/null +++ b/app/logic/vision_analysis.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + + +def analyze_document_image(image_path: str | Path, *, model_name: str = "placeholder") -> dict[str, Any]: + """ + Backend-only vision analysis entrypoint. + + Current phase: + - validates the image path + - returns a structured empty vision result + + Future phase: + - call local Ollama / CV model + - detect regions, line-item zones, tables, logos, checkboxes, signatures + - return normalized page-coordinate candidates + """ + path = Path(image_path) + + return { + "schema_version": "vision_analysis_v1", + "engine": "local", + "model_name": model_name, + "status": "no_model_configured", + "image_path": str(path), + "layers": { + "vision_regions": [], + "vision_lines": [], + "vision_boxes": [], + "vision_fields": [], + "vision_line_items": [], + }, + "notes": [ + "Vision module scaffold is active.", + "No CV/Ollama model is connected yet.", + ], + } + + +def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_result: dict[str, Any]) -> dict[str, Any]: + """ + Convert vision analysis into normal layout_json. + + Current phase: + - preserves the current source layout + - tags it as vision-assisted + """ + layout = dict(source_layout or {"pages": []}) + layout["vision_assisted"] = True + layout["vision_assisted_status"] = vision_result.get("status", "unknown") + layout["vision_engine"] = vision_result.get("engine") + layout["vision_model_name"] = vision_result.get("model_name") + layout["layout_sync_source"] = "vision_assisted" + layout["layout_needs_review"] = True + return layout diff --git a/app/routes/documents.py b/app/routes/documents.py index fd14b28..2b3bec2 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -27,6 +27,7 @@ from pdf2image import convert_from_path from app.core.storage_settings import get_default_save_root from app.db.deps import get_db +from app.logic.vision_analysis import analyze_document_image, build_vision_assisted_layout from app.logic.document_outputs import ( save_field_enriched_pdf_current, save_ocr_corrected_pdf_current, @@ -2233,29 +2234,22 @@ def document_detail(document_id: str, request: Request, queue: str | None = None .first() ) + image_path = getattr(document_for_vision, "current_path", None) or getattr(document_for_vision, "source_path", None) + vision_result = analyze_document_image(image_path or "", model_name="placeholder") source_layout = source_version.layout_json if source_version and isinstance(source_version.layout_json, dict) else {"pages": []} - candidate_layout = deepcopy(source_layout) - candidate_layout["vision_assisted"] = True - candidate_layout["vision_assisted_status"] = "placeholder_copied_from_current_layout" - candidate_layout["layout_sync_source"] = "vision_assisted" - candidate_layout["layout_needs_review"] = True + candidate_layout = build_vision_assisted_layout(source_layout, vision_result) output = DocumentVisionAnalysisOutput( document_id=document_for_vision.id, - engine="local_placeholder", - model_name="none", + engine=vision_result.get("engine", "local"), + model_name=vision_result.get("model_name"), prompt_version="vision_candidate_v1", output_type="layout_candidate", analysis_json={ - "schema_version": "vision_analysis_v1", - "status": "candidate_created_from_current_layout", + **vision_result, "document_id": document_id, "source_text_version_id": source_version.id if source_version else None, - "candidate_kind": "layout_json_copy_placeholder", - "notes": [ - "Vision candidate scaffold created.", - "Next step: replace copied boxes with local CV/Ollama-derived boxes and merge scoring." - ], + "candidate_kind": "layout_json_from_vision_module", }, created_by="layout_review_run_vision", )