from datetime import datetime from decimal import Decimal from sqlalchemy import String, DateTime, ForeignKey, Text, Boolean, Integer, JSON, Numeric from sqlalchemy.orm import Mapped, mapped_column, relationship from app.db.base import Base class TextVersion(Base): __tablename__ = "text_versions" id: Mapped[int] = mapped_column(primary_key=True, index=True) document_id: Mapped[int] = mapped_column( ForeignKey("documents.id"), nullable=False, index=True ) version_number: Mapped[int] = mapped_column(Integer, nullable=False) version_type: Mapped[str] = mapped_column(String(50), nullable=False) # raw_ocr, reviewed text_content: Mapped[str] = mapped_column(Text, nullable=False) created_by: Mapped[str | None] = mapped_column(String(100), nullable=True) is_current: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) ocr_engine: Mapped[str | None] = mapped_column(String(100), nullable=True) ocr_engine_version: Mapped[str | None] = mapped_column(String(100), nullable=True) rerun_source: Mapped[str | None] = mapped_column(String(100), nullable=True) quality_score: Mapped[Decimal | None] = mapped_column(Numeric(5, 2), nullable=True) quality_flags: Mapped[list | None] = mapped_column(JSON, nullable=True) quality_note: Mapped[str | None] = mapped_column(Text, nullable=True) derived_from_version_id: Mapped[int | None] = mapped_column( ForeignKey("text_versions.id"), nullable=True, ) created_at: Mapped[datetime] = mapped_column( DateTime, default=datetime.utcnow, nullable=False ) document: Mapped["Document"] = relationship(back_populates="text_versions")