Derive candidate fields from vision crop OCR
This commit is contained in:
parent
5947cc0fe0
commit
da538a99ee
|
|
@ -4,6 +4,7 @@ from pathlib import Path
|
|||
from typing import Any
|
||||
import hashlib
|
||||
import tempfile
|
||||
import re
|
||||
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
|
|
@ -619,6 +620,82 @@ def classify_and_crop_unmatched_regions(
|
|||
"classified_regions": classified,
|
||||
}
|
||||
|
||||
|
||||
def build_vision_candidate_fields(classification: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Convert crop OCR/classification results into lightweight structured field candidates.
|
||||
"""
|
||||
fields: list[dict[str, Any]] = []
|
||||
regions = classification.get("classified_regions") or []
|
||||
|
||||
money_re = re.compile(r"(?<!\d)(?:\$?\s*)\d+\.\d{2}[A-Z]?(?!\d)")
|
||||
time_re = re.compile(r"\b\d{1,2}:\d{2}\s*(?:AM|PM)?\b", re.IGNORECASE)
|
||||
item_count_re = re.compile(r"\b\d+\s+ITEMS?\b", re.IGNORECASE)
|
||||
|
||||
for idx, region in enumerate(regions):
|
||||
text = str(region.get("ocr_text") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
conf = region.get("ocr_confidence")
|
||||
base = {
|
||||
"source": "vision_crop_ocr",
|
||||
"source_region_index": idx,
|
||||
"source_bbox": region.get("bbox"),
|
||||
"source_crop_path": region.get("crop_path"),
|
||||
"ocr_confidence": conf,
|
||||
"geometry_class": region.get("geometry_class"),
|
||||
}
|
||||
|
||||
lower = text.lower()
|
||||
|
||||
if "cvs" in lower or "pharmacy" in lower:
|
||||
fields.append({
|
||||
**base,
|
||||
"candidate_type": "merchant_or_header",
|
||||
"value": text,
|
||||
"confidence": 0.75 if (conf or 0) >= 70 else 0.45,
|
||||
})
|
||||
|
||||
if time_re.search(text):
|
||||
fields.append({
|
||||
**base,
|
||||
"candidate_type": "transaction_time",
|
||||
"value": time_re.search(text).group(0),
|
||||
"raw_text": text,
|
||||
"confidence": 0.80 if (conf or 0) >= 70 else 0.50,
|
||||
})
|
||||
|
||||
if item_count_re.search(text):
|
||||
fields.append({
|
||||
**base,
|
||||
"candidate_type": "item_count",
|
||||
"value": item_count_re.search(text).group(0).upper(),
|
||||
"raw_text": text,
|
||||
"confidence": 0.65 if (conf or 0) >= 50 else 0.40,
|
||||
})
|
||||
|
||||
money_matches = money_re.findall(text)
|
||||
if money_matches:
|
||||
fields.append({
|
||||
**base,
|
||||
"candidate_type": "money_amounts",
|
||||
"value": money_matches,
|
||||
"raw_text": text,
|
||||
"confidence": 0.65 if (conf or 0) >= 50 else 0.35,
|
||||
})
|
||||
|
||||
# Capture low-value symbol/noise so later filtering can learn from it.
|
||||
if len(text) <= 3 and not money_matches and not time_re.search(text):
|
||||
fields.append({
|
||||
**base,
|
||||
"candidate_type": "symbol_or_noise",
|
||||
"value": text,
|
||||
"confidence": 0.20,
|
||||
})
|
||||
|
||||
return fields
|
||||
|
||||
def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_result: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
Convert vision analysis into normal layout_json.
|
||||
|
|
@ -638,6 +715,7 @@ def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_re
|
|||
layout,
|
||||
region_score,
|
||||
)
|
||||
candidate_fields = build_vision_candidate_fields(region_classification)
|
||||
|
||||
layout["vision_assisted"] = True
|
||||
layout["vision_assisted_status"] = normalized_vision.get("status", "unknown")
|
||||
|
|
@ -646,6 +724,7 @@ def build_vision_assisted_layout(source_layout: dict[str, Any] | None, vision_re
|
|||
layout["vision_coordinate_normalization"] = normalized_vision.get("coordinate_normalization")
|
||||
layout["vision_region_score"] = region_score
|
||||
layout["vision_region_classification"] = region_classification
|
||||
layout["vision_candidate_fields"] = candidate_fields
|
||||
layout["layout_sync_source"] = "vision_assisted"
|
||||
layout["layout_needs_review"] = True
|
||||
return layout
|
||||
|
|
|
|||
Loading…
Reference in New Issue