diff --git a/app/db/base.py b/app/db/base.py index d5de347..59be703 100644 --- a/app/db/base.py +++ b/app/db/base.py @@ -1,10 +1,3 @@ from sqlalchemy.orm import declarative_base Base = declarative_base() - -# Import models so Base.metadata knows about all tables -from app.models.document import Document # noqa: F401,E402 -from app.models.document_version import DocumentVersion # noqa: F401,E402 -from app.models.text_version import TextVersion # noqa: F401,E402 -from app.models.extracted_field import ExtractedField # noqa: F401,E402 -from app.models.layer1_candidate import Layer1Candidate # noqa: F401,E402 diff --git a/app/db/deps.py b/app/db/deps.py new file mode 100644 index 0000000..007760f --- /dev/null +++ b/app/db/deps.py @@ -0,0 +1,13 @@ +from collections.abc import Generator + +from sqlalchemy.orm import Session + +from app.db.session import SessionLocal + + +def get_db() -> Generator[Session, None, None]: + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/app/db/init_db.py b/app/db/init_db.py index ae4474c..77f3b73 100644 --- a/app/db/init_db.py +++ b/app/db/init_db.py @@ -1,6 +1,13 @@ from app.db.base import Base from app.db.session import engine +# Import models so Base.metadata knows about all tables +from app.models.document import Document # noqa: F401 +from app.models.document_version import DocumentVersion # noqa: F401 +from app.models.text_version import TextVersion # noqa: F401 +from app.models.extracted_field import ExtractedField # noqa: F401 +from app.models.layer1_candidate import Layer1Candidate # noqa: F401 + def init_db() -> None: Base.metadata.create_all(bind=engine) diff --git a/app/main.py b/app/main.py index 4392e1e..15ac75c 100644 --- a/app/main.py +++ b/app/main.py @@ -1,11 +1,14 @@ from fastapi import FastAPI + +from app.routes.documents import router as documents_router from app.routes.health import router as health_router -app = FastAPI() +app = FastAPI(title="document-processor") app.include_router(health_router) +app.include_router(documents_router) + @app.get("/") def root(): return {"app": "document-processor", "status": "running"} - diff --git a/app/routes/documents.py b/app/routes/documents.py new file mode 100644 index 0000000..e3b83d3 --- /dev/null +++ b/app/routes/documents.py @@ -0,0 +1,117 @@ +from pathlib import Path +from uuid import uuid4 + +from fastapi import APIRouter, Depends, Request +from fastapi.responses import HTMLResponse, RedirectResponse +from fastapi.templating import Jinja2Templates +from sqlalchemy.orm import Session, selectinload + +from app.db.deps import get_db +from app.models.document import Document +from app.models.document_version import DocumentVersion +from app.models.text_version import TextVersion + +router = APIRouter(prefix="/documents", tags=["documents"]) + +BASE_DIR = Path(__file__).resolve().parent.parent +templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) + + +@router.get("/", response_class=HTMLResponse) +def list_documents(request: Request, db: Session = Depends(get_db)): + documents = db.query(Document).order_by(Document.created_at.desc()).all() + return templates.TemplateResponse( + request=request, + name="documents/list.html", + context={"request": request, "documents": documents}, + ) + + +@router.get("/test-ingest", response_class=RedirectResponse) +def test_ingest(db: Session = Depends(get_db)): + public_id = f"doc_{uuid4().hex[:12]}" + + document = Document( + document_id=public_id, + document_type="receipt", + source_path=f"/mnt/storage/documents/incoming/{public_id}.pdf", + original_path=f"/mnt/storage/documents/archive/originals/{public_id}.pdf", + current_path=f"/mnt/storage/documents/current/{public_id}.pdf", + original_filename=f"{public_id}.pdf", + canonical_filename=f"{public_id}.pdf", + mime_type="application/pdf", + file_size=245760, + page_count=1, + sha256_original="dummy_original_hash", + sha256_current="dummy_current_hash", + storage_status="ingested", + review_status="ocr_complete", + ) + db.add(document) + db.flush() + + version = DocumentVersion( + document_id=document.id, + version_number=1, + version_type="original", + file_path=document.original_path or document.source_path, + sha256=document.sha256_original, + created_by="system", + notes="Initial test ingest", + ) + db.add(version) + + raw_text = TextVersion( + document_id=document.id, + version_type="raw_ocr", + text_content=( + "CVS PHARMACY\n" + "Date: 2026-04-01\n" + "Total: 12.34 USD\n" + "Household supplies\n" + ), + created_by="system", + is_current=True, + ) + db.add(raw_text) + + db.commit() + + return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303) + + +@router.get("/{document_id}", response_class=HTMLResponse) +def document_detail(document_id: str, request: Request, db: Session = Depends(get_db)): + document = ( + db.query(Document) + .options( + selectinload(Document.versions), + selectinload(Document.text_versions), + selectinload(Document.extracted_fields), + selectinload(Document.layer1_candidates), + ) + .filter(Document.document_id == document_id) + .first() + ) + + if document is None: + return HTMLResponse(content="Document not found", status_code=404) + + raw_ocr = next( + ( + tv + for tv in sorted(document.text_versions, key=lambda x: x.created_at, reverse=True) + if tv.version_type == "raw_ocr" + ), + None, + ) + + return templates.TemplateResponse( + request=request, + name="documents/detail.html", + context={ + "request": request, + "document": document, + "raw_ocr": raw_ocr, + }, + ) diff --git a/app/templates/documents/detail.html b/app/templates/documents/detail.html new file mode 100644 index 0000000..ec56947 --- /dev/null +++ b/app/templates/documents/detail.html @@ -0,0 +1,52 @@ + + + + + {{ document.document_id }} + + +

Back to documents

+ +

{{ document.document_id }}

+ +

Document metadata

+ + +

Document versions

+ {% if document.versions %} + + {% else %} +

No versions found.

+ {% endif %} + +

Raw OCR

+ {% if raw_ocr %} +
{{ raw_ocr.text_content }}
+ {% else %} +

No raw OCR text found.

+ {% endif %} + + diff --git a/app/templates/documents/list.html b/app/templates/documents/list.html new file mode 100644 index 0000000..2f8922c --- /dev/null +++ b/app/templates/documents/list.html @@ -0,0 +1,27 @@ + + + + + Documents + + +

Documents

+ +

Create test ingest

+ + {% if documents %} + + {% else %} +

No documents yet.

+ {% endif %} + +