feat: add training exports and dataset inspection tooling
This commit is contained in:
parent
5cc8b76270
commit
ffc3ddfe3b
|
|
@ -0,0 +1,24 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("/mnt/storage/document-processor/exports/document_training.jsonl")
|
||||
|
||||
count = 0
|
||||
approved = 0
|
||||
|
||||
with path.open() as f:
|
||||
for line in f:
|
||||
row = json.loads(line)
|
||||
count += 1
|
||||
if row["review"]["is_approved"]:
|
||||
approved += 1
|
||||
|
||||
if count <= 3:
|
||||
print("\n--- SAMPLE ---")
|
||||
print("ID:", row["document"]["document_id"])
|
||||
print("Merchant:", row["extracted_fields"].get("merchant_normalized"))
|
||||
print("Total:", row["extracted_fields"].get("total"))
|
||||
print("OCR len:", len(row["ocr_text"]))
|
||||
|
||||
print("\nTotal docs:", count)
|
||||
print("Approved:", approved)
|
||||
Loading…
Reference in New Issue