refactor: use time-based hex document ids for new ingests
This commit is contained in:
parent
13590aeeca
commit
9ebaa6f99e
|
|
@ -0,0 +1,23 @@
|
|||
import time
|
||||
from sqlalchemy.orm import Session
|
||||
from app.models.document import Document
|
||||
|
||||
|
||||
def generate_document_id(db: Session) -> str:
|
||||
ts = int(time.time())
|
||||
ts_hex = format(ts, "x")
|
||||
|
||||
n = 0
|
||||
while True:
|
||||
n_hex = format(n, "x")
|
||||
candidate = f"doc_{ts_hex}-{n_hex}"
|
||||
|
||||
exists = (
|
||||
db.query(Document)
|
||||
.filter(Document.document_id == candidate)
|
||||
.first()
|
||||
)
|
||||
if not exists:
|
||||
return candidate
|
||||
|
||||
n += 1
|
||||
|
|
@ -9,7 +9,6 @@ import subprocess
|
|||
import tempfile
|
||||
from difflib import SequenceMatcher
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
from PIL import Image
|
||||
from sqlalchemy import func
|
||||
|
|
@ -19,6 +18,7 @@ from app.core.config import DOCUMENT_ARCHIVE_ROOT, INBOX_ROOT, UPLOAD_ROOT
|
|||
from app.models.document import Document
|
||||
from app.models.document_version import DocumentVersion
|
||||
from app.models.text_version import TextVersion
|
||||
from app.logic.id_gen import generate_document_id
|
||||
|
||||
|
||||
ALLOWED_EXTENSIONS = {".pdf", ".jpg", ".jpeg", ".png"}
|
||||
|
|
@ -304,7 +304,7 @@ def archive_document(
|
|||
if not is_supported_file(source):
|
||||
raise ValueError(f"Unsupported file type: {source.suffix}")
|
||||
|
||||
document_id = f"doc_{uuid4().hex[:12]}"
|
||||
document_id = generate_document_id(db)
|
||||
current_path = build_storage_path(document_id, source)
|
||||
|
||||
current_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -436,7 +436,8 @@ def ingest_uploaded_file(
|
|||
upload_root = Path(UPLOAD_ROOT)
|
||||
upload_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
staged_name = f"{uuid4().hex[:12]}_{Path(filename).name}"
|
||||
document_id = generate_document_id(db)
|
||||
staged_name = f"{document_id}_{Path(filename).name}"
|
||||
staged_path = upload_root / staged_name
|
||||
staged_path.write_bytes(file_bytes)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue