feat: add Phase 3 reviewed OCR editing with versioned persistence
This commit is contained in:
parent
6cdf5d6dd9
commit
6ec58f848b
|
|
@ -1,7 +1,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Request
|
from fastapi import APIRouter, Depends, Form, Request
|
||||||
from fastapi.responses import HTMLResponse, RedirectResponse
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
from sqlalchemy.orm import Session, selectinload
|
from sqlalchemy.orm import Session, selectinload
|
||||||
|
|
@ -80,6 +80,44 @@ def test_ingest(db: Session = Depends(get_db)):
|
||||||
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{document_id}/review-text", response_class=RedirectResponse)
|
||||||
|
def save_reviewed_text(
|
||||||
|
document_id: str,
|
||||||
|
reviewed_text: str = Form(...),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
document = (
|
||||||
|
db.query(Document)
|
||||||
|
.options(selectinload(Document.text_versions))
|
||||||
|
.filter(Document.document_id == document_id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
if document is None:
|
||||||
|
return RedirectResponse(url="/documents/", status_code=303)
|
||||||
|
|
||||||
|
existing_reviewed = [
|
||||||
|
tv for tv in document.text_versions if tv.version_type == "reviewed" and tv.is_current
|
||||||
|
]
|
||||||
|
for tv in existing_reviewed:
|
||||||
|
tv.is_current = False
|
||||||
|
|
||||||
|
reviewed_version = TextVersion(
|
||||||
|
document_id=document.id,
|
||||||
|
version_type="reviewed",
|
||||||
|
text_content=reviewed_text,
|
||||||
|
created_by="mcelwain",
|
||||||
|
is_current=True,
|
||||||
|
)
|
||||||
|
db.add(reviewed_version)
|
||||||
|
|
||||||
|
document.review_status = "reviewed"
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return RedirectResponse(url=f"/documents/{document.document_id}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{document_id}", response_class=HTMLResponse)
|
@router.get("/{document_id}", response_class=HTMLResponse)
|
||||||
def document_detail(document_id: str, request: Request, db: Session = Depends(get_db)):
|
def document_detail(document_id: str, request: Request, db: Session = Depends(get_db)):
|
||||||
document = (
|
document = (
|
||||||
|
|
@ -97,15 +135,28 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge
|
||||||
if document is None:
|
if document is None:
|
||||||
return HTMLResponse(content="Document not found", status_code=404)
|
return HTMLResponse(content="Document not found", status_code=404)
|
||||||
|
|
||||||
|
sorted_text_versions = sorted(
|
||||||
|
document.text_versions,
|
||||||
|
key=lambda x: x.created_at,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
|
||||||
raw_ocr = next(
|
raw_ocr = next(
|
||||||
(
|
(tv for tv in sorted_text_versions if tv.version_type == "raw_ocr"),
|
||||||
tv
|
|
||||||
for tv in sorted(document.text_versions, key=lambda x: x.created_at, reverse=True)
|
|
||||||
if tv.version_type == "raw_ocr"
|
|
||||||
),
|
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
reviewed_ocr = next(
|
||||||
|
(tv for tv in sorted_text_versions if tv.version_type == "reviewed" and tv.is_current),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
review_text_value = (
|
||||||
|
reviewed_ocr.text_content
|
||||||
|
if reviewed_ocr is not None
|
||||||
|
else raw_ocr.text_content if raw_ocr is not None else ""
|
||||||
|
)
|
||||||
|
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request=request,
|
request=request,
|
||||||
name="documents/detail.html",
|
name="documents/detail.html",
|
||||||
|
|
@ -113,5 +164,7 @@ def document_detail(document_id: str, request: Request, db: Session = Depends(ge
|
||||||
"request": request,
|
"request": request,
|
||||||
"document": document,
|
"document": document,
|
||||||
"raw_ocr": raw_ocr,
|
"raw_ocr": raw_ocr,
|
||||||
|
"reviewed_ocr": reviewed_ocr,
|
||||||
|
"review_text_value": review_text_value,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@
|
||||||
<title>{{ document.document_id }}</title>
|
<title>{{ document.document_id }}</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p><a href="/documents">Back to documents</a></p>
|
<p><a href="/documents/">Back to documents</a></p>
|
||||||
|
|
||||||
<h1>{{ document.document_id }}</h1>
|
<h1>{{ document.document_id }}</h1>
|
||||||
|
|
||||||
|
|
@ -48,5 +48,24 @@
|
||||||
{% else %}
|
{% else %}
|
||||||
<p>No raw OCR text found.</p>
|
<p>No raw OCR text found.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
<h2>Reviewed OCR</h2>
|
||||||
|
{% if reviewed_ocr %}
|
||||||
|
<p>Current reviewed version saved at {{ reviewed_ocr.created_at }}</p>
|
||||||
|
{% else %}
|
||||||
|
<p>No reviewed OCR saved yet.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<form method="post" action="/documents/{{ document.document_id }}/review-text">
|
||||||
|
<div>
|
||||||
|
<label for="reviewed_text">Edit reviewed OCR text:</label>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<textarea id="reviewed_text" name="reviewed_text" rows="20" cols="100">{{ review_text_value }}</textarea>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top: 1rem;">
|
||||||
|
<button type="submit">Save reviewed OCR</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue