Improve replica text fitting and baseline alignment

This commit is contained in:
Sean McElwain 2026-05-24 21:36:30 -05:00
parent 09c2fcda5f
commit 01e081d45a
1 changed files with 123 additions and 13 deletions

View File

@ -66,6 +66,7 @@ from pypdf import PdfReader, PdfWriter
from reportlab.lib.utils import ImageReader
from reportlab.pdfbase.pdfmetrics import stringWidth
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from sqlalchemy import func
from sqlalchemy.orm import Session
@ -365,6 +366,53 @@ def _fit_font_size_for_bbox_text(text: str, box_width: float, box_height: float)
return min(approx, width_limited, box_height * 0.9)
def _safe_pdf_font_name(font_name: str | None) -> str:
candidate = (font_name or "Helvetica").strip()
try:
pdfmetrics.getFont(candidate)
return candidate
except Exception:
return "Helvetica"
def _font_size_for_box(text: str, font_name: str, box_width: float, box_height: float, saved_size: float | None = None) -> float:
fitted = _fit_font_size_for_bbox_text(text, box_width, box_height)
if saved_size and saved_size > 0:
# Saved UI/editor font size is allowed, but geometry wins for replica output.
return max(1.0, min(float(saved_size), float(fitted)))
return max(1.0, float(fitted))
def _baseline_for_box(font_name: str, font_size: float, pdf_box_bottom: float, box_height: float) -> float:
try:
ascent, descent = pdfmetrics.getAscentDescent(font_name, font_size)
except Exception:
ascent, descent = font_size * 0.718, -font_size * 0.207
glyph_height = ascent - descent
vertical_pad = max(0.0, (box_height - glyph_height) / 2.0)
return pdf_box_bottom + vertical_pad - descent
def _horizontal_scale_for_box(text: str, font_name: str, font_size: float, box_width: float) -> float:
try:
rendered_width = pdfmetrics.stringWidth(text, font_name, font_size)
except Exception:
rendered_width = 0
if rendered_width <= 0:
return 100.0
if rendered_width <= box_width:
return 100.0
# Compress long text to fit the detected box, but do not collapse it into unreadability.
return max(35.0, min(100.0, (box_width / rendered_width) * 100.0))
def _build_word_entries_for_page(page_layout: dict, page_h: float) -> list[dict]:
entries = []
for word in page_layout.get("words", []) or []:
@ -384,24 +432,28 @@ def _build_word_entries_for_page(page_layout: dict, page_h: float) -> list[dict]
box_width = max(1.0, right - left)
box_height = max(1.0, bottom - top)
source_font_size = word.get("font_size_guess")
try:
font_size = float(source_font_size)
except (TypeError, ValueError):
font_size = _fit_font_size_for_bbox_text(word_text, box_width, box_height)
font_name = _safe_pdf_font_name(word.get("font_family_guess") or "Helvetica")
if font_size <= 0:
font_size = _fit_font_size_for_bbox_text(word_text, box_width, box_height)
try:
saved_font_size = float(word.get("font_size_guess"))
except (TypeError, ValueError):
saved_font_size = None
font_size = _font_size_for_box(word_text, font_name, box_width, box_height, saved_font_size)
pdf_box_bottom = page_h - bottom
baseline_y = _baseline_for_box(font_name, font_size, pdf_box_bottom, box_height)
horizontal_scale = _horizontal_scale_for_box(word_text, font_name, font_size, box_width)
entries.append(
{
"text": word_text,
"pdf_x": left,
"pdf_y": page_h - bottom,
"pdf_y": baseline_y,
"box_width": box_width,
"box_height": box_height,
"font_family_guess": word.get("font_family_guess") or "Helvetica",
"font_family_guess": font_name,
"font_size_guess": font_size,
"horizontal_scale": horizontal_scale,
"text_color_guess": word.get("text_color_guess") or "#000000",
"text_render_mode_clean": word.get("text_render_mode_clean", 0),
"text_render_mode_scan_backed": word.get("text_render_mode_scan_backed", 3),
@ -411,6 +463,55 @@ def _build_word_entries_for_page(page_layout: dict, page_h: float) -> list[dict]
return entries
def _build_line_entries_for_page(page_layout: dict, page_h: float) -> list[dict]:
entries = []
for line in page_layout.get("lines", []) or []:
text_line = (line.get("text") or "").strip()
bbox = line.get("bbox")
if not text_line or not bbox or not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
continue
try:
left, top, right, bottom = [float(v) for v in bbox]
except (TypeError, ValueError):
continue
if right <= left or bottom <= top:
continue
box_width = max(1.0, right - left)
box_height = max(1.0, bottom - top)
font_name = _safe_pdf_font_name(line.get("font_family_guess") or "Helvetica")
try:
saved_font_size = float(line.get("font_size_guess"))
except (TypeError, ValueError):
saved_font_size = None
font_size = _font_size_for_box(text_line, font_name, box_width, box_height, saved_font_size)
baseline_y = _baseline_for_box(font_name, font_size, page_h - bottom, box_height)
horizontal_scale = _horizontal_scale_for_box(text_line, font_name, font_size, box_width)
entries.append({
"text": text_line,
"pdf_x": left,
"pdf_y": baseline_y,
"box_width": box_width,
"box_height": box_height,
"font_family_guess": font_name,
"font_size_guess": font_size,
"horizontal_scale": horizontal_scale,
"text_color_guess": line.get("text_color_guess") or "#000000",
"text_render_mode_clean": line.get("text_render_mode_clean", 0),
"text_render_mode_scan_backed": line.get("text_render_mode_scan_backed", 3),
"bbox_source": [left, top, right, bottom],
})
return entries
def _page_layout_line_entries(page_layout: dict) -> list[dict]:
region_lines = []
for region in page_layout.get("regions", []) or []:
@ -1056,9 +1157,11 @@ def _render_replica_pdf_from_layout(
page_layout = pages.get(page_num, {"lines": []})
render_entries = []
if page_layout.get("words"):
if page_layout.get("lines"):
render_entries = _build_line_entries_for_page(page_layout, page_h)
if not render_entries and page_layout.get("words"):
render_entries = _build_word_entries_for_page(page_layout, page_h)
else:
if not render_entries:
render_entries = _page_layout_line_entries(page_layout)
for line in render_entries:
@ -1072,8 +1175,15 @@ def _render_replica_pdf_from_layout(
else:
text_obj.setTextRenderMode(0)
text_obj.setFont(line.get("font_family_guess") or "Helvetica", float(line.get("font_size_guess") or 10))
text_obj.setTextOrigin(float(line["pdf_x"]), float(line["pdf_y"]) + 1)
font_size = float(line.get("font_size_guess") or 10)
font_name = _safe_pdf_font_name(line.get("font_family_guess") or "Helvetica")
text_obj.setFont(font_name, font_size)
horizontal_scale = float(line.get("horizontal_scale") or 100.0)
if horizontal_scale != 100.0:
text_obj.setHorizScale(horizontal_scale)
text_obj.setTextOrigin(float(line["pdf_x"]), float(line["pdf_y"]))
if mode == "debug_overlay":
c.setStrokeColorRGB(1, 0, 0)