import json import re import subprocess from pathlib import Path OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator") OLD_PUBLIC = OLD_APP / "public" OLD_HTML = OLD_PUBLIC / "index.html" OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json") TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy") TAG_RE = re.compile(r'<(input|select|textarea)\b[^>]*>', re.IGNORECASE | re.DOTALL) ATTR_RE = re.compile(r'([a-zA-Z_:][-a-zA-Z0-9_:.]*)=["\']([^"\']*)["\']') LABEL_RE = re.compile( r']*for=["\']([^"\']+)["\'][^>]*>(.*?)', re.IGNORECASE | re.DOTALL ) EXCLUDE_FIELD_NAMES = { "letterTemplateFile", "discoTemplateFile", "excelFile", "csvFile", "templateFile", "file", "SSNLastFour", "SSN2LastFour", "caseAccLastFour", "casePlaintiffFileName", "caseAnswerDateString", "caseAnswerDateYYYY-MM-DD", "caseAnswerDateYyyyMmDd", "caseAnswerFiledDateString", "caseFilingDateString", "caseDispositionDateString", "discoCosDateString", "discoResponseCosDateString", } EXCLUDE_PATTERNS = [ r"^settlementPaymentDate\d{2}$", r"^settlementPaymentAmount\d{2}$", r"^settlementRemaingBalance\d{2}$", r"^settlementRemainingBalance\d{2}$", r"^debtCollector\d+AccLastFour$", ] def attrs_from_tag(tag): return dict(ATTR_RE.findall(tag)) def clean_html_label(value): value = re.sub(r"<[^>]+>", "", value) value = value.replace(":", "") value = re.sub(r"\s+", " ", value).strip() return value def nice_label(name): label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name) label = label.replace("_", " ").replace("-", " ") label = label.title() label = label.replace("Ssn", "SSN") label = label.replace("Dob", "DOB") label = label.replace("Mm Dd Yyyy", "MM DD YYYY") return label def should_exclude(name): if not name: return True if name in EXCLUDE_FIELD_NAMES: return True if name.endswith("TemplateFile"): return True return any(re.match(pattern, name) for pattern in EXCLUDE_PATTERNS) def run_node_list_extractor(): js = f""" import {{ pathToFileURL }} from 'url'; const files = [ 'casePlaintiffInfo.js', 'opposingCounselInfo.js', 'judgeInfo.js', 'caseFilingAttorneyInfo.js', 'filingAttorneyInfo.js', 'debtCollectorInfo.js' ]; const base = {json.dumps(str(OLD_PUBLIC))}; const result = {{}}; for (const file of files) {{ try {{ const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href); for (const [exportName, value] of Object.entries(mod)) {{ if (value && typeof value === 'object' && !Array.isArray(value)) {{ result[exportName] = Object.keys(value).sort(); }} }} }} catch (err) {{}} }} console.log(JSON.stringify(result)); """ try: completed = subprocess.run( ["node", "--input-type=module", "-e", js], check=True, capture_output=True, text=True, ) return json.loads(completed.stdout) except Exception: return {} def field_type(name, tag_name, attrs): lower = name.lower() if tag_name.lower() == "textarea": return "textarea" html_type = attrs.get("type", "").lower() if html_type in {"date", "email", "tel", "number"}: return html_type if "date" in lower or lower == "dob": return "date" if "email" in lower: return "email" if "phone" in lower or "fax" in lower: return "tel" if list_name_for_field(name): return "autocomplete" return "text" def list_name_for_field(name): if name == "casePlaintiff": return "plaintiffs" if name == "caseOpposingCounsel": return "opposingCounsel" if name == "caseDivisionJudge": return "judges" if name == "caseFilingAttorney": return "filingAttorneys" if name in {"caseState", "homeState", "client2homeState"}: return "states" if name == "caseDesignation": return "caseDesignations" if re.fullmatch(r"debtCollector\d+Name", name): return "debtCollectors" return None def section_for(name): lower = name.lower() if lower.startswith("client2"): return "Client 2 Information" if lower.startswith("client") or lower in { "ssn", "dob", "alias", "email", "homeaddress", "homecity", "homestate", "homezip", "homecounty", "homephone", "cellphone" }: return "Client Information" if lower.startswith("case"): return "Case Information" if lower.startswith("disco"): return "Discovery Information" if lower.startswith("settlement"): return "Settlement Information" if lower.startswith("installment") or lower.startswith("fee") or lower in { "nameoncard", "cardnumber", "securitycode", "expiration", "billingaddress", "billingzip" }: return "Fee / Payment Information" if lower.startswith("debtcollector") or name == "numCollectors": return "Debt Collector Information" if lower == "notes": return "Notes" return "Other Fields" def discover_templates(): templates = [] for path in sorted(TEMPLATES_OUT.rglob("*.docx")): rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix() template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower() label = path.relative_to(TEMPLATES_OUT).as_posix() label = label.replace(".docx", "") label = label.replace("/", " / ") label = label.replace("_", " ") templates.append({ "id": template_id, "label": label, "template": rel, "outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx" }) return templates html = OLD_HTML.read_text(encoding="utf-8", errors="ignore") labels = { field_id: clean_html_label(label) for field_id, label in LABEL_RE.findall(html) } fields_seen = [] field_meta = {} for match in TAG_RE.finditer(html): tag_name = match.group(1) tag = match.group(0) attrs = attrs_from_tag(tag) name = attrs.get("name") or attrs.get("id") if should_exclude(name): continue if name not in fields_seen: fields_seen.append(name) field_meta[name] = (tag_name, attrs) grouped = {} for name in fields_seen: tag_name, attrs = field_meta[name] ftype = field_type(name, tag_name, attrs) field = { "name": name, "label": labels.get(name) or nice_label(name), "type": ftype, "required": False } list_name = list_name_for_field(name) if list_name: field["list"] = list_name grouped.setdefault(section_for(name), []).append(field) preferred_order = [ "Client Information", "Client 2 Information", "Case Information", "Discovery Information", "Settlement Information", "Fee / Payment Information", "Debt Collector Information", "Notes", "Other Fields", ] sections = [] for heading in preferred_order: fields = grouped.get(heading) if not fields: continue sections.append({ "heading": heading, "collapsible": heading not in {"Client Information", "Case Information"}, "defaultOpen": heading in {"Client Information", "Case Information"}, "fields": fields }) lists_raw = run_node_list_extractor() lists = { "plaintiffs": lists_raw.get("casePlaintiffInfo", []), "opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []), "judges": lists_raw.get("judgeInfo", []), "filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []), "debtCollectors": lists_raw.get("debtCollectorInfo", []), "states": ["MO", "KS"], "caseDesignations": [ "Associate Circuit", "Circuit", "Limited Actions", "Small Claims" ] } templates = discover_templates() profile = { "id": "legal_profile", "name": "Legal Profile", "description": "Consumer debt defense legal profile based on the legacy app form fields. Additional template fields are calculated at generation time.", "template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx", "outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx", "lists": lists, "templates": templates, "calculations": [ { "script": "legacy_legal", "runOn": "generate", "description": "Generate old-template compatible calculated fields.", "outputsDynamic": { "settlementSchedule": { "countField": "settlementInstallmentNo", "indexFormat": "decimal2", "maxCount": 120, "fields": [ "settlementPaymentDate", "settlementPaymentAmount", "settlementRemaingBalance", "settlementRemainingBalance" ] } } } ], "sections": sections } OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8") print(f"Wrote {OUT_PROFILE}") print(f"Visible HTML fields: {len(fields_seen)}") for section in sections: print(f"- {section['heading']}: {len(section['fields'])}")