25 lines
681 B
Python
25 lines
681 B
Python
import json
|
|
from pathlib import Path
|
|
|
|
path = Path("/mnt/storage/document-processor/exports/document_training.jsonl")
|
|
|
|
count = 0
|
|
approved = 0
|
|
|
|
with path.open() as f:
|
|
for line in f:
|
|
row = json.loads(line)
|
|
count += 1
|
|
if row["review"]["is_approved"]:
|
|
approved += 1
|
|
|
|
if count <= 3:
|
|
print("\n--- SAMPLE ---")
|
|
print("ID:", row["document"]["document_id"])
|
|
print("Merchant:", row["extracted_fields"].get("merchant_normalized"))
|
|
print("Total:", row["extracted_fields"].get("total"))
|
|
print("OCR len:", len(row["ocr_text"]))
|
|
|
|
print("\nTotal docs:", count)
|
|
print("Approved:", approved)
|