import json from pathlib import Path path = Path("/mnt/storage/document-processor/exports/document_training.jsonl") count = 0 approved = 0 with path.open() as f: for line in f: row = json.loads(line) count += 1 if row["review"]["is_approved"]: approved += 1 if count <= 3: print("\n--- SAMPLE ---") print("ID:", row["document"]["document_id"]) print("Merchant:", row["extracted_fields"].get("merchant_normalized")) print("Total:", row["extracted_fields"].get("total")) print("OCR len:", len(row["ocr_text"])) print("\nTotal docs:", count) print("Approved:", approved)