import json import re from pathlib import Path try: from docx import Document except Exception: Document = None OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator") OUT_DIR = Path("diagnostics") OUT_DIR.mkdir(parents=True, exist_ok=True) PLACEHOLDER_RE = re.compile(r"\{([A-Za-z0-9_:\-]+)\}") def read_text(path): try: return path.read_text(encoding="utf-8", errors="ignore") except Exception: return "" def find_placeholders_in_text(text): return sorted(set(PLACEHOLDER_RE.findall(text))) def find_placeholders_in_docx(path): if Document is None: return [] found = set() try: doc = Document(path) except Exception: return [] def scan_paragraphs(paragraphs): for p in paragraphs: found.update(find_placeholders_in_text(p.text)) def scan_table(table): for row in table.rows: for cell in row.cells: scan_paragraphs(cell.paragraphs) for nested in cell.tables: scan_table(nested) scan_paragraphs(doc.paragraphs) for table in doc.tables: scan_table(table) return sorted(found) def categorize_field(name): lower = name.lower() if lower.startswith("client2"): return "Client 2 Information" if lower.startswith("client") or lower in {"dob", "ssn", "ssnlastfour", "alias", "email"}: return "Client Information" if lower.startswith("case"): return "Case Information" if lower.startswith("settlement"): return "Settlement Information" if lower.startswith("installment") or lower.startswith("fee") or lower in {"nameoncard", "cardnumber", "securitycode", "expiration", "billingaddress", "billingzip"}: return "Fee / Payment Information" if lower.startswith("debtcollector"): return "Debt Collector Information" if lower.startswith("disco"): return "Discovery Information" if lower in {"today", "currentdate", "currentdatemm-dd-yyyy"}: return "Date Fields" if lower == "notes": return "Notes" return "Other Fields" def field_type(name): lower = name.lower() if "notes" in lower or "appearanceinfo" in lower or "paymentoptions" in lower: return "textarea" if "date" in lower or lower in {"dob"}: return "date" if "email" in lower: return "email" if "phone" in lower or "fax" in lower: return "tel" return "text" def make_sections(fields): grouped = {} for name in fields: grouped.setdefault(categorize_field(name), []).append(name) preferred_order = [ "Date Fields", "Client Information", "Client 2 Information", "Case Information", "Discovery Information", "Settlement Information", "Fee / Payment Information", "Debt Collector Information", "Notes", "Other Fields", ] sections = [] for heading in preferred_order: names = grouped.get(heading) if not names: continue sections.append({ "heading": heading, "collapsible": heading not in {"Client Information", "Case Information"}, "defaultOpen": heading in {"Client Information", "Case Information"}, "fields": [ { "name": name, "label": re.sub(r"([a-z])([A-Z])", r"\1 \2", name).replace("_", " ").strip().title(), "type": field_type(name), "required": False } for name in sorted(names) ] }) return sections js_files = sorted(OLD_APP.rglob("*.js")) html_files = sorted(OLD_APP.rglob("*.html")) css_files = sorted(OLD_APP.rglob("*.css")) docx_files = sorted(OLD_APP.rglob("*.docx")) xlsx_files = sorted(OLD_APP.rglob("*.xlsx")) all_text_placeholders = set() function_hits = [] function_terms = { "DOCX generation": ["docx", "Docxtemplater", "generateDocument", "generateDoc"], "Excel generation": ["xlsx", "generateExcel", "template.xlsx"], "vCard generation": ["vcard", "vCard", "BEGIN:VCARD"], "Calendar / ICS generation": ["ics", "BEGIN:VCALENDAR", "VEVENT"], "Client folder generation": ["generateClientFolder", "client folder"], "Settlement calculations": ["settlementPayment", "settlementInstallment", "remainingBalance"], } for path in js_files + html_files: text = read_text(path) all_text_placeholders.update(find_placeholders_in_text(text)) for label, terms in function_terms.items(): if any(term in text for term in terms): function_hits.append((label, str(path.relative_to(OLD_APP)))) template_rows = [] all_template_placeholders = set() for path in docx_files: placeholders = find_placeholders_in_docx(path) all_template_placeholders.update(placeholders) template_rows.append({ "template": str(path.relative_to(OLD_APP)), "placeholder_count": len(placeholders), "placeholders": placeholders, }) all_fields = sorted(all_text_placeholders | all_template_placeholders) profile = { "id": "legacy_word_doc_generator", "name": "Legacy Word Doc Generator Profile", "description": "Draft profile generated from the legacy word-doc-generator app.", "template": "REPLACE_WITH_SELECTED_TEMPLATE.docx", "outputFilename": "legacy_document_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx", "sourceApp": str(OLD_APP), "sections": make_sections(all_fields), "legacyFeatures": sorted(set(label for label, _ in function_hits)), "templatesFound": template_rows, } profile_path = OUT_DIR / "legacy_word_doc_generator_profile_draft.json" profile_path.write_text(json.dumps(profile, indent=2), encoding="utf-8") report = [] report.append("# Legacy Word Doc Generator Review") report.append("") report.append(f"Source app: `{OLD_APP}`") report.append("") report.append("## Files Found") report.append("") report.append(f"- JS files: {len(js_files)}") report.append(f"- HTML files: {len(html_files)}") report.append(f"- CSS files: {len(css_files)}") report.append(f"- DOCX templates: {len(docx_files)}") report.append(f"- XLSX files: {len(xlsx_files)}") report.append("") report.append("## Legacy Features Detected") report.append("") if function_hits: seen = set() for label, rel in function_hits: key = (label, rel) if key in seen: continue seen.add(key) report.append(f"- {label}: `{rel}`") else: report.append("- No major legacy feature signatures detected.") report.append("") report.append("## Templates Found") report.append("") if template_rows: for row in template_rows: report.append(f"### `{row['template']}`") report.append(f"- Placeholder count: {row['placeholder_count']}") if row["placeholders"]: report.append("- Placeholders:") for name in row["placeholders"]: report.append(f" - `{{{name}}}`") report.append("") else: report.append("- No DOCX templates found.") report.append("") report.append("## All Fields Detected") report.append("") for name in all_fields: report.append(f"- `{{{name}}}`") report.append("") report.append("## Draft Profile") report.append("") report.append(f"Generated: `{profile_path}`") report.append("") report_path = OUT_DIR / "legacy_word_doc_generator_review.md" report_path.write_text("\n".join(report), encoding="utf-8") print(f"Wrote {report_path}") print(f"Wrote {profile_path}") print(f"Detected {len(all_fields)} unique fields/placeholders")