58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
def analyze_document_image(image_path: str | Path, *, model_name: str = "placeholder") -> dict[str, Any]:
|
|
"""
|
|
Backend-only vision analysis entrypoint.
|
|
|
|
Current phase:
|
|
- validates the image path
|
|
- returns a structured empty vision result
|
|
|
|
Future phase:
|
|
- call local Ollama / CV model
|
|
- detect regions, line-item zones, tables, logos, checkboxes, signatures
|
|
- return normalized page-coordinate candidates
|
|
"""
|
|
path = Path(image_path)
|
|
|
|
return {
|
|
"schema_version": "vision_analysis_v1",
|
|
"engine": "local",
|
|
"model_name": model_name,
|
|
"status": "no_model_configured",
|
|
"image_path": str(path),
|
|
"layers": {
|
|
"vision_regions": [],
|
|
"vision_lines": [],
|
|
"vision_boxes": [],
|
|
"vision_fields": [],
|
|
"vision_line_items": [],
|
|
},
|
|
"notes": [
|
|
"Vision module scaffold is active.",
|
|
"No CV/Ollama model is connected yet.",
|
|
],
|
|
}
|
|
|
|
|
|
def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_result: dict[str, Any]) -> dict[str, Any]:
|
|
"""
|
|
Convert vision analysis into normal layout_json.
|
|
|
|
Current phase:
|
|
- preserves the current source layout
|
|
- tags it as vision-assisted
|
|
"""
|
|
layout = dict(source_layout or {"pages": []})
|
|
layout["vision_assisted"] = True
|
|
layout["vision_assisted_status"] = vision_result.get("status", "unknown")
|
|
layout["vision_engine"] = vision_result.get("engine")
|
|
layout["vision_model_name"] = vision_result.get("model_name")
|
|
layout["layout_sync_source"] = "vision_assisted"
|
|
layout["layout_needs_review"] = True
|
|
return layout
|