feat: add training exports and dataset inspection tooling
This commit is contained in:
parent
5cc8b76270
commit
ffc3ddfe3b
|
|
@ -0,0 +1,24 @@
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
path = Path("/mnt/storage/document-processor/exports/document_training.jsonl")
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
approved = 0
|
||||||
|
|
||||||
|
with path.open() as f:
|
||||||
|
for line in f:
|
||||||
|
row = json.loads(line)
|
||||||
|
count += 1
|
||||||
|
if row["review"]["is_approved"]:
|
||||||
|
approved += 1
|
||||||
|
|
||||||
|
if count <= 3:
|
||||||
|
print("\n--- SAMPLE ---")
|
||||||
|
print("ID:", row["document"]["document_id"])
|
||||||
|
print("Merchant:", row["extracted_fields"].get("merchant_normalized"))
|
||||||
|
print("Total:", row["extracted_fields"].get("total"))
|
||||||
|
print("OCR len:", len(row["ocr_text"]))
|
||||||
|
|
||||||
|
print("\nTotal docs:", count)
|
||||||
|
print("Approved:", approved)
|
||||||
Loading…
Reference in New Issue