diff --git a/app/logic/id_gen.py b/app/logic/id_gen.py new file mode 100644 index 0000000..0cb6fc0 --- /dev/null +++ b/app/logic/id_gen.py @@ -0,0 +1,23 @@ +import time +from sqlalchemy.orm import Session +from app.models.document import Document + + +def generate_document_id(db: Session) -> str: + ts = int(time.time()) + ts_hex = format(ts, "x") + + n = 0 + while True: + n_hex = format(n, "x") + candidate = f"doc_{ts_hex}-{n_hex}" + + exists = ( + db.query(Document) + .filter(Document.document_id == candidate) + .first() + ) + if not exists: + return candidate + + n += 1 diff --git a/app/logic/ingest.py b/app/logic/ingest.py index bbd42ac..b40227f 100644 --- a/app/logic/ingest.py +++ b/app/logic/ingest.py @@ -9,7 +9,6 @@ import subprocess import tempfile from difflib import SequenceMatcher from pathlib import Path -from uuid import uuid4 from PIL import Image from sqlalchemy import func @@ -19,6 +18,7 @@ from app.core.config import DOCUMENT_ARCHIVE_ROOT, INBOX_ROOT, UPLOAD_ROOT from app.models.document import Document from app.models.document_version import DocumentVersion from app.models.text_version import TextVersion +from app.logic.id_gen import generate_document_id ALLOWED_EXTENSIONS = {".pdf", ".jpg", ".jpeg", ".png"} @@ -304,7 +304,7 @@ def archive_document( if not is_supported_file(source): raise ValueError(f"Unsupported file type: {source.suffix}") - document_id = f"doc_{uuid4().hex[:12]}" + document_id = generate_document_id(db) current_path = build_storage_path(document_id, source) current_path.parent.mkdir(parents=True, exist_ok=True) @@ -436,7 +436,8 @@ def ingest_uploaded_file( upload_root = Path(UPLOAD_ROOT) upload_root.mkdir(parents=True, exist_ok=True) - staged_name = f"{uuid4().hex[:12]}_{Path(filename).name}" + document_id = generate_document_id(db) + staged_name = f"{document_id}_{Path(filename).name}" staged_path = upload_root / staged_name staged_path.write_bytes(file_bytes)