import re from pathlib import Path from docx import Document from tools.doc_generator.logic.core_fields import merge_core_fields from tools.doc_generator.logic.document_types import get_document_type BASE_DIR = Path(__file__).resolve().parent.parent CONTENT_DIR = BASE_DIR / "content" TEMPLATES_DIR = CONTENT_DIR / "templates" PROJECT_ROOT = BASE_DIR.parent.parent EXPORTS_DIR = PROJECT_ROOT / "exports" def safe_filename(value: str) -> str: value = str(value or "document").strip() value = re.sub(r"[^A-Za-z0-9._ -]+", "", value) value = re.sub(r"\s+", "_", value) return value or "document" def render_filename(pattern: str, data: dict) -> str: filename = pattern for key, value in data.items(): filename = filename.replace("{" + key + "}", safe_filename(value)) return safe_filename(filename) def replace_placeholders_in_paragraph(paragraph, data: dict): full_text = "".join(run.text for run in paragraph.runs) new_text = full_text for key, value in data.items(): new_text = new_text.replace("{" + key + "}", "" if value is None else str(value)) if new_text == full_text: return for run in paragraph.runs: run.text = "" if paragraph.runs: paragraph.runs[0].text = new_text else: paragraph.add_run(new_text) def replace_placeholders_in_table(table, data: dict): for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: replace_placeholders_in_paragraph(paragraph, data) for nested_table in cell.tables: replace_placeholders_in_table(nested_table, data) def generate_docx(document_type_id: str, data: dict) -> Path: data = merge_core_fields(data) document_type = get_document_type(document_type_id) template_path = TEMPLATES_DIR / document_type["template"] if not template_path.exists(): raise FileNotFoundError(f"Template not found: {template_path}") EXPORTS_DIR.mkdir(parents=True, exist_ok=True) output_pattern = document_type.get("outputFilename", f"{document_type_id}.docx") output_filename = render_filename(output_pattern, data) if not output_filename.lower().endswith(".docx"): output_filename += ".docx" output_path = EXPORTS_DIR / output_filename document = Document(template_path) for paragraph in document.paragraphs: replace_placeholders_in_paragraph(paragraph, data) for table in document.tables: replace_placeholders_in_table(table, data) document.save(output_path) return output_path