Derive candidate fields from vision crop OCR

This commit is contained in:
Sean McElwain 2026-05-30 21:03:41 -05:00
parent 5947cc0fe0
commit da538a99ee
1 changed files with 79 additions and 0 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from typing import Any
import hashlib
import tempfile
import re
try:
import fitz # PyMuPDF
@ -619,6 +620,82 @@ def classify_and_crop_unmatched_regions(
"classified_regions": classified,
}
def build_vision_candidate_fields(classification: dict[str, Any]) -> list[dict[str, Any]]:
"""
Convert crop OCR/classification results into lightweight structured field candidates.
"""
fields: list[dict[str, Any]] = []
regions = classification.get("classified_regions") or []
money_re = re.compile(r"(?<!\d)(?:\$?\s*)\d+\.\d{2}[A-Z]?(?!\d)")
time_re = re.compile(r"\b\d{1,2}:\d{2}\s*(?:AM|PM)?\b", re.IGNORECASE)
item_count_re = re.compile(r"\b\d+\s+ITEMS?\b", re.IGNORECASE)
for idx, region in enumerate(regions):
text = str(region.get("ocr_text") or "").strip()
if not text:
continue
conf = region.get("ocr_confidence")
base = {
"source": "vision_crop_ocr",
"source_region_index": idx,
"source_bbox": region.get("bbox"),
"source_crop_path": region.get("crop_path"),
"ocr_confidence": conf,
"geometry_class": region.get("geometry_class"),
}
lower = text.lower()
if "cvs" in lower or "pharmacy" in lower:
fields.append({
**base,
"candidate_type": "merchant_or_header",
"value": text,
"confidence": 0.75 if (conf or 0) >= 70 else 0.45,
})
if time_re.search(text):
fields.append({
**base,
"candidate_type": "transaction_time",
"value": time_re.search(text).group(0),
"raw_text": text,
"confidence": 0.80 if (conf or 0) >= 70 else 0.50,
})
if item_count_re.search(text):
fields.append({
**base,
"candidate_type": "item_count",
"value": item_count_re.search(text).group(0).upper(),
"raw_text": text,
"confidence": 0.65 if (conf or 0) >= 50 else 0.40,
})
money_matches = money_re.findall(text)
if money_matches:
fields.append({
**base,
"candidate_type": "money_amounts",
"value": money_matches,
"raw_text": text,
"confidence": 0.65 if (conf or 0) >= 50 else 0.35,
})
# Capture low-value symbol/noise so later filtering can learn from it.
if len(text) <= 3 and not money_matches and not time_re.search(text):
fields.append({
**base,
"candidate_type": "symbol_or_noise",
"value": text,
"confidence": 0.20,
})
return fields
def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_result: dict[str, Any]) -> dict[str, Any]:
"""
Convert vision analysis into normal layout_json.
@ -638,6 +715,7 @@ def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_re
layout,
region_score,
)
candidate_fields = build_vision_candidate_fields(region_classification)
layout["vision_assisted"] = True
layout["vision_assisted_status"] = normalized_vision.get("status", "unknown")
@ -646,6 +724,7 @@ def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_re
layout["vision_coordinate_normalization"] = normalized_vision.get("coordinate_normalization")
layout["vision_region_score"] = region_score
layout["vision_region_classification"] = region_classification
layout["vision_candidate_fields"] = candidate_fields
layout["layout_sync_source"] = "vision_assisted"
layout["layout_needs_review"] = True
return layout