Wire vision candidate generation through backend module
This commit is contained in:
parent
ff228b9481
commit
3aa2c78ac3
|
|
@ -0,0 +1,57 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_document_image(image_path: str | Path, *, model_name: str = "placeholder") -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Backend-only vision analysis entrypoint.
|
||||||
|
|
||||||
|
Current phase:
|
||||||
|
- validates the image path
|
||||||
|
- returns a structured empty vision result
|
||||||
|
|
||||||
|
Future phase:
|
||||||
|
- call local Ollama / CV model
|
||||||
|
- detect regions, line-item zones, tables, logos, checkboxes, signatures
|
||||||
|
- return normalized page-coordinate candidates
|
||||||
|
"""
|
||||||
|
path = Path(image_path)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"schema_version": "vision_analysis_v1",
|
||||||
|
"engine": "local",
|
||||||
|
"model_name": model_name,
|
||||||
|
"status": "no_model_configured",
|
||||||
|
"image_path": str(path),
|
||||||
|
"layers": {
|
||||||
|
"vision_regions": [],
|
||||||
|
"vision_lines": [],
|
||||||
|
"vision_boxes": [],
|
||||||
|
"vision_fields": [],
|
||||||
|
"vision_line_items": [],
|
||||||
|
},
|
||||||
|
"notes": [
|
||||||
|
"Vision module scaffold is active.",
|
||||||
|
"No CV/Ollama model is connected yet.",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_result: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Convert vision analysis into normal layout_json.
|
||||||
|
|
||||||
|
Current phase:
|
||||||
|
- preserves the current source layout
|
||||||
|
- tags it as vision-assisted
|
||||||
|
"""
|
||||||
|
layout = dict(source_layout or {"pages": []})
|
||||||
|
layout["vision_assisted"] = True
|
||||||
|
layout["vision_assisted_status"] = vision_result.get("status", "unknown")
|
||||||
|
layout["vision_engine"] = vision_result.get("engine")
|
||||||
|
layout["vision_model_name"] = vision_result.get("model_name")
|
||||||
|
layout["layout_sync_source"] = "vision_assisted"
|
||||||
|
layout["layout_needs_review"] = True
|
||||||
|
return layout
|
||||||
|
|
@ -27,6 +27,7 @@ from pdf2image import convert_from_path
|
||||||
|
|
||||||
from app.core.storage_settings import get_default_save_root
|
from app.core.storage_settings import get_default_save_root
|
||||||
from app.db.deps import get_db
|
from app.db.deps import get_db
|
||||||
|
from app.logic.vision_analysis import analyze_document_image, build_vision_assisted_layout
|
||||||
from app.logic.document_outputs import (
|
from app.logic.document_outputs import (
|
||||||
save_field_enriched_pdf_current,
|
save_field_enriched_pdf_current,
|
||||||
save_ocr_corrected_pdf_current,
|
save_ocr_corrected_pdf_current,
|
||||||
|
|
@ -2233,29 +2234,22 @@ def document_detail(document_id: str, request: Request, queue: str | None = None
|
||||||
.first()
|
.first()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
image_path = getattr(document_for_vision, "current_path", None) or getattr(document_for_vision, "source_path", None)
|
||||||
|
vision_result = analyze_document_image(image_path or "", model_name="placeholder")
|
||||||
source_layout = source_version.layout_json if source_version and isinstance(source_version.layout_json, dict) else {"pages": []}
|
source_layout = source_version.layout_json if source_version and isinstance(source_version.layout_json, dict) else {"pages": []}
|
||||||
candidate_layout = deepcopy(source_layout)
|
candidate_layout = build_vision_assisted_layout(source_layout, vision_result)
|
||||||
candidate_layout["vision_assisted"] = True
|
|
||||||
candidate_layout["vision_assisted_status"] = "placeholder_copied_from_current_layout"
|
|
||||||
candidate_layout["layout_sync_source"] = "vision_assisted"
|
|
||||||
candidate_layout["layout_needs_review"] = True
|
|
||||||
|
|
||||||
output = DocumentVisionAnalysisOutput(
|
output = DocumentVisionAnalysisOutput(
|
||||||
document_id=document_for_vision.id,
|
document_id=document_for_vision.id,
|
||||||
engine="local_placeholder",
|
engine=vision_result.get("engine", "local"),
|
||||||
model_name="none",
|
model_name=vision_result.get("model_name"),
|
||||||
prompt_version="vision_candidate_v1",
|
prompt_version="vision_candidate_v1",
|
||||||
output_type="layout_candidate",
|
output_type="layout_candidate",
|
||||||
analysis_json={
|
analysis_json={
|
||||||
"schema_version": "vision_analysis_v1",
|
**vision_result,
|
||||||
"status": "candidate_created_from_current_layout",
|
|
||||||
"document_id": document_id,
|
"document_id": document_id,
|
||||||
"source_text_version_id": source_version.id if source_version else None,
|
"source_text_version_id": source_version.id if source_version else None,
|
||||||
"candidate_kind": "layout_json_copy_placeholder",
|
"candidate_kind": "layout_json_from_vision_module",
|
||||||
"notes": [
|
|
||||||
"Vision candidate scaffold created.",
|
|
||||||
"Next step: replace copied boxes with local CV/Ollama-derived boxes and merge scoring."
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
created_by="layout_review_run_vision",
|
created_by="layout_review_run_vision",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue