From ff228b9481c41ef3cdf81f97b379da5bf7d69331 Mon Sep 17 00:00:00 2001 From: McElwain Date: Sat, 30 May 2026 15:46:12 -0500 Subject: [PATCH] Create vision-assisted layout candidate scaffold --- app/routes/documents.py | 63 +++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/app/routes/documents.py b/app/routes/documents.py index e908680..fd14b28 100644 --- a/app/routes/documents.py +++ b/app/routes/documents.py @@ -2224,35 +2224,68 @@ def document_detail(document_id: str, request: Request, queue: str | None = None document_for_vision = db.query(Document).filter(Document.document_id == document_id).first() if document_for_vision: DocumentVisionAnalysisOutput.__table__.create(bind=db.get_bind(), checkfirst=True) + + source_version = ( + db.query(TextVersion) + .filter(TextVersion.document_id == document_for_vision.id) + .filter(TextVersion.layout_json.isnot(None)) + .order_by(TextVersion.id.desc()) + .first() + ) + + source_layout = source_version.layout_json if source_version and isinstance(source_version.layout_json, dict) else {"pages": []} + candidate_layout = deepcopy(source_layout) + candidate_layout["vision_assisted"] = True + candidate_layout["vision_assisted_status"] = "placeholder_copied_from_current_layout" + candidate_layout["layout_sync_source"] = "vision_assisted" + candidate_layout["layout_needs_review"] = True + output = DocumentVisionAnalysisOutput( document_id=document_for_vision.id, engine="local_placeholder", model_name="none", - prompt_version="vision_scaffold_v1", - output_type="layout_suggestions", + prompt_version="vision_candidate_v1", + output_type="layout_candidate", analysis_json={ "schema_version": "vision_analysis_v1", - "status": "placeholder", + "status": "candidate_created_from_current_layout", "document_id": document_id, - "layers": { - "vision_boxes": [], - "vision_lines": [], - "vision_regions": [ - {"bbox": [0, 0, 1200, 700], "label": "VISION TEST REGION"} - ], - "vision_fields": [], - "vision_line_items": [], - }, + "source_text_version_id": source_version.id if source_version else None, + "candidate_kind": "layout_json_copy_placeholder", "notes": [ - "Vision analysis scaffold created.", - "Next step: render page image and populate this JSON from local CV/VLM output.", + "Vision candidate scaffold created.", + "Next step: replace copied boxes with local CV/Ollama-derived boxes and merge scoring." ], }, created_by="layout_review_run_vision", ) db.add(output) + db.flush() + + next_version_number = ( + (db.query(func.max(TextVersion.version_number)) + .filter(TextVersion.document_id == document_for_vision.id) + .scalar() or 0) + 1 + ) + + candidate_text = _canonical_layout_text(candidate_layout) + candidate = TextVersion( + document_id=document_for_vision.id, + version_type="vision_assisted_layout", + version_number=next_version_number, + text_content=candidate_text, + layout_json=candidate_layout, + created_by="vision_assisted_layout_candidate", + is_current=True, + rerun_source="vision_assisted", + derived_from_version_id=source_version.id if source_version else None, + ) + + db.query(TextVersion).filter(TextVersion.document_id == document_for_vision.id).update({"is_current": False}) + db.add(candidate) db.commit() - print(f"[vision-analysis] stored placeholder output id={output.id} for {document_id}", flush=True) + + print(f"[vision-analysis] stored output id={output.id} and candidate text_version for {document_id}", flush=True) return RedirectResponse( url=f"/documents/{document_id}?tab=layout-review&success=vision_analysis_started",