utility-app/tools/doc_generator/logic/excel_mapper.py

212 lines
6.5 KiB
Python

import json
import re
import shutil
import zipfile
import xml.etree.ElementTree as ET
from pathlib import Path
from openpyxl import load_workbook
from tools.doc_generator.logic.calculations import apply_calculations
from tools.doc_generator.logic.core_fields import merge_core_fields
from tools.doc_generator.logic.document_types import get_document_type
BASE_DIR = Path(__file__).resolve().parent.parent
PROJECT_ROOT = BASE_DIR.parent.parent
CONTENT_DIR = BASE_DIR / "content"
EXCEL_MAPS_DIR = CONTENT_DIR / "excel_maps"
TEMPLATES_DIR = CONTENT_DIR / "templates"
EXCEL_TEMPLATE_DIR = TEMPLATES_DIR / "legacy_excel"
OLD_WORD_DOC_GENERATOR_DIR = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
EXPORTS_DIR = PROJECT_ROOT / "exports"
MAIN_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
ET.register_namespace("", MAIN_NS)
def safe_filename(value: str) -> str:
value = str(value or "legacy_export").strip()
value = re.sub(r"[^A-Za-z0-9._ -]+", "", value)
value = re.sub(r"\s+", "_", value)
return value or "legacy_export"
def get_excel_map_set_id(document_type_id: str) -> str:
document_type = get_document_type(document_type_id)
return document_type.get("excelMapSet") or document_type_id
def load_excel_map_set(document_type_id: str) -> dict:
map_set_id = get_excel_map_set_id(document_type_id)
path = EXCEL_MAPS_DIR / f"{map_set_id}.json"
if not path.exists():
raise FileNotFoundError(f"Excel map set not found: {path}")
return json.loads(path.read_text(encoding="utf-8"))
def load_excel_maps(document_type_id: str) -> list[dict]:
return load_excel_map_set(document_type_id).get("maps", [])
def load_excel_map(document_type_id: str, map_id: str) -> dict:
for item in load_excel_maps(document_type_id):
if item.get("id") == map_id:
return item
raise FileNotFoundError(f"Excel map not found for {document_type_id}: {map_id}")
def resolve_excel_template(template_name: str) -> Path:
template_name = str(template_name or "").strip()
template_basename = Path(template_name).name
# Prefer explicit profile-library template paths from the map file.
candidate_paths = [
TEMPLATES_DIR / template_name,
CONTENT_DIR / "templates" / template_name,
# Backward-compatible fallbacks.
EXCEL_TEMPLATE_DIR / template_basename,
OLD_WORD_DOC_GENERATOR_DIR / template_basename,
]
# Last-resort recursive lookups.
candidate_paths.extend(sorted(TEMPLATES_DIR.rglob(template_basename)))
candidate_paths.extend(sorted(OLD_WORD_DOC_GENERATOR_DIR.rglob(template_basename)))
template_path = next((candidate for candidate in candidate_paths if candidate.exists()), None)
if template_path is None:
searched = "\n".join(str(candidate) for candidate in candidate_paths[:30])
raise FileNotFoundError(f"Excel template not found: {template_name}. Searched:\n{searched}")
return template_path
def cell_parts(cell_ref: str):
col = "".join(ch for ch in cell_ref if ch.isalpha()).upper()
row = int("".join(ch for ch in cell_ref if ch.isdigit()))
return col, row
def col_number(col: str) -> int:
total = 0
for ch in col:
total = total * 26 + (ord(ch) - ord("A") + 1)
return total
def cell_sort_key(cell_ref: str):
col, row = cell_parts(cell_ref)
return row, col_number(col)
def find_or_create_row(sheet_data, row_number: int):
ns = f"{{{MAIN_NS}}}"
rows = sheet_data.findall(ns + "row")
for row in rows:
if int(row.attrib.get("r", "0")) == row_number:
return row
new_row = ET.Element(ns + "row", {"r": str(row_number)})
inserted = False
for index, row in enumerate(rows):
if int(row.attrib.get("r", "0")) > row_number:
sheet_data.insert(index, new_row)
inserted = True
break
if not inserted:
sheet_data.append(new_row)
return new_row
def find_or_create_cell(row, cell_ref: str):
ns = f"{{{MAIN_NS}}}"
cells = row.findall(ns + "c")
for cell in cells:
if cell.attrib.get("r") == cell_ref:
return cell
new_cell = ET.Element(ns + "c", {"r": cell_ref})
inserted = False
for index, cell in enumerate(cells):
if cell_sort_key(cell.attrib.get("r", "A1")) > cell_sort_key(cell_ref):
row.insert(index, new_cell)
inserted = True
break
if not inserted:
row.append(new_cell)
return new_cell
def set_cell_text(cell, value):
ns = f"{{{MAIN_NS}}}"
for child in list(cell):
cell.remove(child)
cell.attrib.pop("s", None)
cell.attrib["t"] = "inlineStr"
inline = ET.SubElement(cell, ns + "is")
text = ET.SubElement(inline, ns + "t")
text.text = "" if value is None else str(value)
def write_sheet1_values(xlsx_path: Path, output_path: Path, field_to_cell: dict, data: dict):
workbook = load_workbook(xlsx_path)
worksheet = workbook.worksheets[0]
for field, cell_ref in field_to_cell.items():
worksheet[cell_ref] = "" if data.get(field) is None else data.get(field, "")
# Ask Excel-compatible apps to recalculate formulas when the workbook opens.
try:
workbook.calculation.fullCalcOnLoad = True
workbook.calculation.forceFullCalc = True
except Exception:
pass
workbook.save(output_path)
def export_profile_excel(document_type_id: str, data: dict, map_id: str) -> Path:
document_type = get_document_type(document_type_id)
final_data = merge_core_fields(data)
final_data = apply_calculations(document_type, final_data)
excel_map = load_excel_map(document_type_id, map_id)
template_name = excel_map.get("template") or "template.xlsx"
template_path = resolve_excel_template(template_name)
EXPORTS_DIR.mkdir(parents=True, exist_ok=True)
case_number = safe_filename(final_data.get("caseNumber") or "no_case_number")
timestamp = safe_filename(final_data.get("timestamp_YYYY-MM-DD_HH-mm-ss") or "export")
output_path = EXPORTS_DIR / f"{document_type_id}_{map_id}_{case_number}_{timestamp}.xlsx"
write_sheet1_values(
xlsx_path=template_path,
output_path=output_path,
field_to_cell=excel_map.get("fields", {}),
data=final_data,
)
return output_path
# Backward-compatible function name while routes are transitioning.
def export_legacy_excel(document_type_id: str, data: dict, map_id: str = "legacy_datafile") -> Path:
return export_profile_excel(document_type_id, data, map_id)