feat: add Phase 3 reviewed OCR editing with versioned persistence

This commit is contained in:
Sean McElwain 2026-04-02 10:50:41 -05:00
parent 6cdf5d6dd9
commit 6ec58f848b
2 changed files with 79 additions and 7 deletions

View File

@ -1,7 +1,7 @@
from pathlib import Path
from uuid import uuid4
from fastapi import APIRouter, Depends, Request
from fastapi import APIRouter, Depends, Form, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session, selectinload
@ -80,6 +80,44 @@ def test_ingest(db: Session = Depends(get_db)):
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
@router.post("/{document_id}/review-text", response_class=RedirectResponse)
def save_reviewed_text(
document_id: str,
reviewed_text: str = Form(...),
db: Session = Depends(get_db),
):
document = (
db.query(Document)
.options(selectinload(Document.text_versions))
.filter(Document.document_id == document_id)
.first()
)
if document is None:
return RedirectResponse(url="/documents/", status_code=303)
existing_reviewed = [
tv for tv in document.text_versions if tv.version_type == "reviewed" and tv.is_current
]
for tv in existing_reviewed:
tv.is_current = False
reviewed_version = TextVersion(
document_id=document.id,
version_type="reviewed",
text_content=reviewed_text,
created_by="mcelwain",
is_current=True,
)
db.add(reviewed_version)
document.review_status = "reviewed"
db.commit()
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
@router.get("/{document_id}", response_class=HTMLResponse)
def document_detail(document_id: str, request: Request, db: Session = Depends(get_db)):
document = (
@ -97,15 +135,28 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge
if document is None:
return HTMLResponse(content="Document not found", status_code=404)
sorted_text_versions = sorted(
document.text_versions,
key=lambda x: x.created_at,
reverse=True,
)
raw_ocr = next(
(
tv
for tv in sorted(document.text_versions, key=lambda x: x.created_at, reverse=True)
if tv.version_type == "raw_ocr"
),
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"),
None,
)
reviewed_ocr = next(
(tv for tv in sorted_text_versions if tv.version_type == "reviewed" and tv.is_current),
None,
)
review_text_value = (
reviewed_ocr.text_content
if reviewed_ocr is not None
else raw_ocr.text_content if raw_ocr is not None else ""
)
return templates.TemplateResponse(
request=request,
name="documents/detail.html",
@ -113,5 +164,7 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge
"request": request,
"document": document,
"raw_ocr": raw_ocr,
"reviewed_ocr": reviewed_ocr,
"review_text_value": review_text_value,
},
)

View File

@ -5,7 +5,7 @@
<title>{{ document.document_id }}</title>
</head>
<body>
<p><a href="/documents">Back to documents</a></p>
<p><a href="/documents/">Back to documents</a></p>
<h1>{{ document.document_id }}</h1>
@ -48,5 +48,24 @@
{% else %}
<p>No raw OCR text found.</p>
{% endif %}
<h2>Reviewed OCR</h2>
{% if reviewed_ocr %}
<p>Current reviewed version saved at {{ reviewed_ocr.created_at }}</p>
{% else %}
<p>No reviewed OCR saved yet.</p>
{% endif %}
<form method="post" action="/documents/{{ document.document_id }}/review-text">
<div>
<label for="reviewed_text">Edit reviewed OCR text:</label>
</div>
<div>
<textarea id="reviewed_text" name="reviewed_text" rows="20" cols="100">{{ review_text_value }}</textarea>
</div>
<div style="margin-top: 1rem;">
<button type="submit">Save reviewed OCR</button>
</div>
</form>
</body>
</html>