import re from pathlib import Path DEFAULT_OWNER_FILEPATH_NAME = "mcelwain_sean" def to_filepath_name(value: str) -> str: value = (value or "").strip().lower() value = value.replace("&", " and ") value = re.sub(r"[^\w\s-]+", "", value) value = re.sub(r"\s+", "-", value) value = re.sub(r"-{2,}", "-", value) return value.strip("-_") or "unknown" def to_owner_filepath_name(first_name: str = "", last_name: str = "") -> str: first = to_filepath_name(first_name) last = to_filepath_name(last_name) if first and last and first != "unknown" and last != "unknown": return f"{last}_{first}" return DEFAULT_OWNER_FILEPATH_NAME def _infer_extension(document) -> str: current_path = getattr(document, "current_path", "") or "" suffix = Path(current_path).suffix.strip() if suffix: return suffix mime_type = (getattr(document, "mime_type", "") or "").lower() if "pdf" in mime_type: return ".pdf" if "jpeg" in mime_type or "jpg" in mime_type: return ".jpg" if "png" in mime_type: return ".png" return "" def _latest_extracted(document): rows = list(getattr(document, "extracted_fields", []) or []) if not rows: return None return sorted(rows, key=lambda x: x.updated_at or x.created_at, reverse=True)[0] def _latest_additional(document): rows = list(getattr(document, "additional_fields", []) or []) if not rows: return None return sorted(rows, key=lambda x: x.updated_at or x.created_at, reverse=True)[0] def _split_person_name(full_name: str) -> tuple[str, str]: full_name = (full_name or "").strip() if not full_name: return "", "" if "," in full_name: last, first = [part.strip() for part in full_name.split(",", 1)] return first, last parts = full_name.split() if len(parts) == 1: return parts[0], "" return parts[0], parts[-1] def choose_owner_filepath_name(document, naming_row=None) -> str: # future naming-layer owner override can go first when added if naming_row: owner_first = getattr(naming_row, "owner_first_display_name", "") or "" owner_last = getattr(naming_row, "owner_last_display_name", "") or "" if owner_first or owner_last: return to_owner_filepath_name(owner_first, owner_last) additional = _latest_additional(document) if additional and getattr(additional, "owner_primary", None): first, last = _split_person_name(additional.owner_primary) owner_value = to_owner_filepath_name(first, last) if owner_value: return owner_value return DEFAULT_OWNER_FILEPATH_NAME def choose_entity_filepath_name(document, naming_row=None) -> str: if naming_row and getattr(naming_row, "naming_entity", None): return to_filepath_name(naming_row.naming_entity) extracted = _latest_extracted(document) if extracted: merchant = extracted.merchant_normalized or extracted.merchant_raw or "" if merchant: return to_filepath_name(merchant) return to_filepath_name(getattr(document, "document_type", "") or "document") def choose_type_folder(document, naming_row=None) -> str: raw = "" if naming_row and getattr(naming_row, "naming_type", None): raw = naming_row.naming_type elif getattr(document, "document_type", None): raw = document.document_type raw = to_filepath_name(raw or "document") mapping = { "receipt": "receipts", "statement": "statements", "invoice": "invoices", "deposit": "deposits", "withdrawal": "withdrawals", "transfer": "transfers", "payment-confirmation": "payment-confirmations", "check-image": "check-images", "prescription": "prescriptions", "eob": "eobs", "id-card": "id-cards", "business-card": "business-cards", "tax-return": "tax-returns", "tax-receipt": "tax-receipts", "tax-statement": "tax-statements", "notice": "notices", "agreement": "agreements", "outline": "outlines", "brief": "briefs", "notes": "notes", "email": "emails", "transcript": "transcripts", "audio": "audio", "photo": "photos", "document": "documents", "medical": "medical", "insurance": "insurance", "bank": "bank", } return mapping.get(raw, f"{raw}s" if not raw.endswith("s") else raw) def choose_type_singular(document, naming_row=None) -> str: raw = "" if naming_row and getattr(naming_row, "naming_type", None): raw = naming_row.naming_type elif getattr(document, "document_type", None): raw = document.document_type return to_filepath_name(raw or "document") def choose_year(document, naming_row=None) -> str: if naming_row and getattr(naming_row, "naming_date", None): return str(naming_row.naming_date).strip()[:4] extracted = _latest_extracted(document) if extracted and getattr(extracted, "transaction_date", None): return extracted.transaction_date.isoformat()[:4] created_at = getattr(document, "created_at", None) if created_at: return created_at.strftime("%Y") return "unknown" def choose_date_text(document, naming_row=None) -> str: if naming_row and getattr(naming_row, "naming_date", None): return to_filepath_name(str(naming_row.naming_date)) extracted = _latest_extracted(document) if extracted and getattr(extracted, "transaction_date", None): return extracted.transaction_date.isoformat() created_at = getattr(document, "created_at", None) if created_at: return created_at.strftime("%Y-%m-%d") return "unknown-date" def choose_description_filepath_name(document, naming_row=None) -> str: if naming_row and getattr(naming_row, "naming_description", None): return to_filepath_name(naming_row.naming_description) additional = _latest_additional(document) if additional and getattr(additional, "occasion_note", None): return to_filepath_name(additional.occasion_note) return "" def build_filename(document, naming_row=None, version_number: int | None = None) -> str: entity = choose_entity_filepath_name(document, naming_row=naming_row) type_singular = choose_type_singular(document, naming_row=naming_row) date_text = choose_date_text(document, naming_row=naming_row) description = choose_description_filepath_name(document, naming_row=naming_row) ext = _infer_extension(document) parts = [entity, type_singular, date_text] if description: parts.append(description) base = "_".join(parts) if version_number and version_number > 1: base = f"{base}_v{version_number}" return f"{base}{ext}" def build_proposed_storage_path(document, save_root: str, naming_row=None) -> str: save_root = str(save_root or "").strip() or "/mnt/svr-01/storage/records" owner = choose_owner_filepath_name(document, naming_row=naming_row) type_folder = choose_type_folder(document, naming_row=naming_row) year = choose_year(document, naming_row=naming_row) entity = choose_entity_filepath_name(document, naming_row=naming_row) target_dir = Path(save_root) / owner / type_folder / year / entity filename = build_filename(document, naming_row=naming_row, version_number=None) candidate = target_dir / filename if not candidate.exists(): return str(candidate) stem = candidate.stem suffix = candidate.suffix version = 2 while True: next_candidate = target_dir / f"{stem}_v{version}{suffix}" if not next_candidate.exists(): return str(next_candidate) version += 1