document-processor/app/logic/vision_analysis.py

58 lines
1.7 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Any
def analyze_document_image(image_path: str | Path, *, model_name: str = "placeholder") -> dict[str, Any]:
"""
Backend-only vision analysis entrypoint.
Current phase:
- validates the image path
- returns a structured empty vision result
Future phase:
- call local Ollama / CV model
- detect regions, line-item zones, tables, logos, checkboxes, signatures
- return normalized page-coordinate candidates
"""
path = Path(image_path)
return {
"schema_version": "vision_analysis_v1",
"engine": "local",
"model_name": model_name,
"status": "no_model_configured",
"image_path": str(path),
"layers": {
"vision_regions": [],
"vision_lines": [],
"vision_boxes": [],
"vision_fields": [],
"vision_line_items": [],
},
"notes": [
"Vision module scaffold is active.",
"No CV/Ollama model is connected yet.",
],
}
def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_result: dict[str, Any]) -> dict[str, Any]:
"""
Convert vision analysis into normal layout_json.
Current phase:
- preserves the current source layout
- tags it as vision-assisted
"""
layout = dict(source_layout or {"pages": []})
layout["vision_assisted"] = True
layout["vision_assisted_status"] = vision_result.get("status", "unknown")
layout["vision_engine"] = vision_result.get("engine")
layout["vision_model_name"] = vision_result.get("model_name")
layout["layout_sync_source"] = "vision_assisted"
layout["layout_needs_review"] = True
return layout