utility-app/scripts/review-old-word-doc-generat...

252 lines
7.5 KiB
Python

import json
import re
from pathlib import Path
try:
from docx import Document
except Exception:
Document = None
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
OUT_DIR = Path("diagnostics")
OUT_DIR.mkdir(parents=True, exist_ok=True)
PLACEHOLDER_RE = re.compile(r"\{([A-Za-z0-9_:\-]+)\}")
def read_text(path):
try:
return path.read_text(encoding="utf-8", errors="ignore")
except Exception:
return ""
def find_placeholders_in_text(text):
return sorted(set(PLACEHOLDER_RE.findall(text)))
def find_placeholders_in_docx(path):
if Document is None:
return []
found = set()
try:
doc = Document(path)
except Exception:
return []
def scan_paragraphs(paragraphs):
for p in paragraphs:
found.update(find_placeholders_in_text(p.text))
def scan_table(table):
for row in table.rows:
for cell in row.cells:
scan_paragraphs(cell.paragraphs)
for nested in cell.tables:
scan_table(nested)
scan_paragraphs(doc.paragraphs)
for table in doc.tables:
scan_table(table)
return sorted(found)
def categorize_field(name):
lower = name.lower()
if lower.startswith("client2"):
return "Client 2 Information"
if lower.startswith("client") or lower in {"dob", "ssn", "ssnlastfour", "alias", "email"}:
return "Client Information"
if lower.startswith("case"):
return "Case Information"
if lower.startswith("settlement"):
return "Settlement Information"
if lower.startswith("installment") or lower.startswith("fee") or lower in {"nameoncard", "cardnumber", "securitycode", "expiration", "billingaddress", "billingzip"}:
return "Fee / Payment Information"
if lower.startswith("debtcollector"):
return "Debt Collector Information"
if lower.startswith("disco"):
return "Discovery Information"
if lower in {"today", "currentdate", "currentdatemm-dd-yyyy"}:
return "Date Fields"
if lower == "notes":
return "Notes"
return "Other Fields"
def field_type(name):
lower = name.lower()
if "notes" in lower or "appearanceinfo" in lower or "paymentoptions" in lower:
return "textarea"
if "date" in lower or lower in {"dob"}:
return "date"
if "email" in lower:
return "email"
if "phone" in lower or "fax" in lower:
return "tel"
return "text"
def make_sections(fields):
grouped = {}
for name in fields:
grouped.setdefault(categorize_field(name), []).append(name)
preferred_order = [
"Date Fields",
"Client Information",
"Client 2 Information",
"Case Information",
"Discovery Information",
"Settlement Information",
"Fee / Payment Information",
"Debt Collector Information",
"Notes",
"Other Fields",
]
sections = []
for heading in preferred_order:
names = grouped.get(heading)
if not names:
continue
sections.append({
"heading": heading,
"collapsible": heading not in {"Client Information", "Case Information"},
"defaultOpen": heading in {"Client Information", "Case Information"},
"fields": [
{
"name": name,
"label": re.sub(r"([a-z])([A-Z])", r"\1 \2", name).replace("_", " ").strip().title(),
"type": field_type(name),
"required": False
}
for name in sorted(names)
]
})
return sections
js_files = sorted(OLD_APP.rglob("*.js"))
html_files = sorted(OLD_APP.rglob("*.html"))
css_files = sorted(OLD_APP.rglob("*.css"))
docx_files = sorted(OLD_APP.rglob("*.docx"))
xlsx_files = sorted(OLD_APP.rglob("*.xlsx"))
all_text_placeholders = set()
function_hits = []
function_terms = {
"DOCX generation": ["docx", "Docxtemplater", "generateDocument", "generateDoc"],
"Excel generation": ["xlsx", "generateExcel", "template.xlsx"],
"vCard generation": ["vcard", "vCard", "BEGIN:VCARD"],
"Calendar / ICS generation": ["ics", "BEGIN:VCALENDAR", "VEVENT"],
"Client folder generation": ["generateClientFolder", "client folder"],
"Settlement calculations": ["settlementPayment", "settlementInstallment", "remainingBalance"],
}
for path in js_files + html_files:
text = read_text(path)
all_text_placeholders.update(find_placeholders_in_text(text))
for label, terms in function_terms.items():
if any(term in text for term in terms):
function_hits.append((label, str(path.relative_to(OLD_APP))))
template_rows = []
all_template_placeholders = set()
for path in docx_files:
placeholders = find_placeholders_in_docx(path)
all_template_placeholders.update(placeholders)
template_rows.append({
"template": str(path.relative_to(OLD_APP)),
"placeholder_count": len(placeholders),
"placeholders": placeholders,
})
all_fields = sorted(all_text_placeholders | all_template_placeholders)
profile = {
"id": "legacy_word_doc_generator",
"name": "Legacy Word Doc Generator Profile",
"description": "Draft profile generated from the legacy word-doc-generator app.",
"template": "REPLACE_WITH_SELECTED_TEMPLATE.docx",
"outputFilename": "legacy_document_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
"sourceApp": str(OLD_APP),
"sections": make_sections(all_fields),
"legacyFeatures": sorted(set(label for label, _ in function_hits)),
"templatesFound": template_rows,
}
profile_path = OUT_DIR / "legacy_word_doc_generator_profile_draft.json"
profile_path.write_text(json.dumps(profile, indent=2), encoding="utf-8")
report = []
report.append("# Legacy Word Doc Generator Review")
report.append("")
report.append(f"Source app: `{OLD_APP}`")
report.append("")
report.append("## Files Found")
report.append("")
report.append(f"- JS files: {len(js_files)}")
report.append(f"- HTML files: {len(html_files)}")
report.append(f"- CSS files: {len(css_files)}")
report.append(f"- DOCX templates: {len(docx_files)}")
report.append(f"- XLSX files: {len(xlsx_files)}")
report.append("")
report.append("## Legacy Features Detected")
report.append("")
if function_hits:
seen = set()
for label, rel in function_hits:
key = (label, rel)
if key in seen:
continue
seen.add(key)
report.append(f"- {label}: `{rel}`")
else:
report.append("- No major legacy feature signatures detected.")
report.append("")
report.append("## Templates Found")
report.append("")
if template_rows:
for row in template_rows:
report.append(f"### `{row['template']}`")
report.append(f"- Placeholder count: {row['placeholder_count']}")
if row["placeholders"]:
report.append("- Placeholders:")
for name in row["placeholders"]:
report.append(f" - `{{{name}}}`")
report.append("")
else:
report.append("- No DOCX templates found.")
report.append("")
report.append("## All Fields Detected")
report.append("")
for name in all_fields:
report.append(f"- `{{{name}}}`")
report.append("")
report.append("## Draft Profile")
report.append("")
report.append(f"Generated: `{profile_path}`")
report.append("")
report_path = OUT_DIR / "legacy_word_doc_generator_review.md"
report_path.write_text("\n".join(report), encoding="utf-8")
print(f"Wrote {report_path}")
print(f"Wrote {profile_path}")
print(f"Detected {len(all_fields)} unique fields/placeholders")