utility-app/scripts/build-legal-profile-from-ol...

338 lines
9.3 KiB
Python

import json
import re
import subprocess
from pathlib import Path
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
OLD_PUBLIC = OLD_APP / "public"
OLD_HTML = OLD_PUBLIC / "index.html"
OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy")
TAG_RE = re.compile(r'<(input|select|textarea)\b[^>]*>', re.IGNORECASE | re.DOTALL)
ATTR_RE = re.compile(r'([a-zA-Z_:][-a-zA-Z0-9_:.]*)=["\']([^"\']*)["\']')
LABEL_RE = re.compile(
r'<label[^>]*for=["\']([^"\']+)["\'][^>]*>(.*?)</label>',
re.IGNORECASE | re.DOTALL
)
EXCLUDE_FIELD_NAMES = {
"letterTemplateFile",
"discoTemplateFile",
"excelFile",
"csvFile",
"templateFile",
"file",
"SSNLastFour",
"SSN2LastFour",
"caseAccLastFour",
"casePlaintiffFileName",
"caseAnswerDateString",
"caseAnswerDateYYYY-MM-DD",
"caseAnswerDateYyyyMmDd",
"caseAnswerFiledDateString",
"caseFilingDateString",
"caseDispositionDateString",
"discoCosDateString",
"discoResponseCosDateString",
}
EXCLUDE_PATTERNS = [
r"^settlementPaymentDate\d{2}$",
r"^settlementPaymentAmount\d{2}$",
r"^settlementRemaingBalance\d{2}$",
r"^settlementRemainingBalance\d{2}$",
r"^debtCollector\d+AccLastFour$",
]
def attrs_from_tag(tag):
return dict(ATTR_RE.findall(tag))
def clean_html_label(value):
value = re.sub(r"<[^>]+>", "", value)
value = value.replace(":", "")
value = re.sub(r"\s+", " ", value).strip()
return value
def nice_label(name):
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
label = label.replace("_", " ").replace("-", " ")
label = label.title()
label = label.replace("Ssn", "SSN")
label = label.replace("Dob", "DOB")
label = label.replace("Mm Dd Yyyy", "MM DD YYYY")
return label
def should_exclude(name):
if not name:
return True
if name in EXCLUDE_FIELD_NAMES:
return True
if name.endswith("TemplateFile"):
return True
return any(re.match(pattern, name) for pattern in EXCLUDE_PATTERNS)
def run_node_list_extractor():
js = f"""
import {{ pathToFileURL }} from 'url';
const files = [
'casePlaintiffInfo.js',
'opposingCounselInfo.js',
'judgeInfo.js',
'caseFilingAttorneyInfo.js',
'filingAttorneyInfo.js',
'debtCollectorInfo.js'
];
const base = {json.dumps(str(OLD_PUBLIC))};
const result = {{}};
for (const file of files) {{
try {{
const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href);
for (const [exportName, value] of Object.entries(mod)) {{
if (value && typeof value === 'object' && !Array.isArray(value)) {{
result[exportName] = Object.keys(value).sort();
}}
}}
}} catch (err) {{}}
}}
console.log(JSON.stringify(result));
"""
try:
completed = subprocess.run(
["node", "--input-type=module", "-e", js],
check=True,
capture_output=True,
text=True,
)
return json.loads(completed.stdout)
except Exception:
return {}
def field_type(name, tag_name, attrs):
lower = name.lower()
if tag_name.lower() == "textarea":
return "textarea"
html_type = attrs.get("type", "").lower()
if html_type in {"date", "email", "tel", "number"}:
return html_type
if "date" in lower or lower == "dob":
return "date"
if "email" in lower:
return "email"
if "phone" in lower or "fax" in lower:
return "tel"
if list_name_for_field(name):
return "autocomplete"
return "text"
def list_name_for_field(name):
if name == "casePlaintiff":
return "plaintiffs"
if name == "caseOpposingCounsel":
return "opposingCounsel"
if name == "caseDivisionJudge":
return "judges"
if name == "caseFilingAttorney":
return "filingAttorneys"
if name in {"caseState", "homeState", "client2homeState"}:
return "states"
if name == "caseDesignation":
return "caseDesignations"
if re.fullmatch(r"debtCollector\d+Name", name):
return "debtCollectors"
return None
def section_for(name):
lower = name.lower()
if lower.startswith("client2"):
return "Client 2 Information"
if lower.startswith("client") or lower in {
"ssn", "dob", "alias", "email",
"homeaddress", "homecity", "homestate", "homezip", "homecounty",
"homephone", "cellphone"
}:
return "Client Information"
if lower.startswith("case"):
return "Case Information"
if lower.startswith("disco"):
return "Discovery Information"
if lower.startswith("settlement"):
return "Settlement Information"
if lower.startswith("installment") or lower.startswith("fee") or lower in {
"nameoncard", "cardnumber", "securitycode", "expiration",
"billingaddress", "billingzip"
}:
return "Fee / Payment Information"
if lower.startswith("debtcollector") or name == "numCollectors":
return "Debt Collector Information"
if lower == "notes":
return "Notes"
return "Other Fields"
def discover_templates():
templates = []
for path in sorted(TEMPLATES_OUT.rglob("*.docx")):
rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix()
template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower()
label = path.relative_to(TEMPLATES_OUT).as_posix()
label = label.replace(".docx", "")
label = label.replace("/", " / ")
label = label.replace("_", " ")
templates.append({
"id": template_id,
"label": label,
"template": rel,
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
})
return templates
html = OLD_HTML.read_text(encoding="utf-8", errors="ignore")
labels = {
field_id: clean_html_label(label)
for field_id, label in LABEL_RE.findall(html)
}
fields_seen = []
field_meta = {}
for match in TAG_RE.finditer(html):
tag_name = match.group(1)
tag = match.group(0)
attrs = attrs_from_tag(tag)
name = attrs.get("name") or attrs.get("id")
if should_exclude(name):
continue
if name not in fields_seen:
fields_seen.append(name)
field_meta[name] = (tag_name, attrs)
grouped = {}
for name in fields_seen:
tag_name, attrs = field_meta[name]
ftype = field_type(name, tag_name, attrs)
field = {
"name": name,
"label": labels.get(name) or nice_label(name),
"type": ftype,
"required": False
}
list_name = list_name_for_field(name)
if list_name:
field["list"] = list_name
grouped.setdefault(section_for(name), []).append(field)
preferred_order = [
"Client Information",
"Client 2 Information",
"Case Information",
"Discovery Information",
"Settlement Information",
"Fee / Payment Information",
"Debt Collector Information",
"Notes",
"Other Fields",
]
sections = []
for heading in preferred_order:
fields = grouped.get(heading)
if not fields:
continue
sections.append({
"heading": heading,
"collapsible": heading not in {"Client Information", "Case Information"},
"defaultOpen": heading in {"Client Information", "Case Information"},
"fields": fields
})
lists_raw = run_node_list_extractor()
lists = {
"plaintiffs": lists_raw.get("casePlaintiffInfo", []),
"opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []),
"judges": lists_raw.get("judgeInfo", []),
"filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []),
"debtCollectors": lists_raw.get("debtCollectorInfo", []),
"states": ["MO", "KS"],
"caseDesignations": [
"Associate Circuit",
"Circuit",
"Limited Actions",
"Small Claims"
]
}
templates = discover_templates()
profile = {
"id": "legal_profile",
"name": "Legal Profile",
"description": "Consumer debt defense legal profile based on the legacy app form fields. Additional template fields are calculated at generation time.",
"template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx",
"outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
"lists": lists,
"templates": templates,
"calculations": [
{
"script": "legacy_legal",
"runOn": "generate",
"description": "Generate old-template compatible calculated fields.",
"outputsDynamic": {
"settlementSchedule": {
"countField": "settlementInstallmentNo",
"indexFormat": "decimal2",
"maxCount": 120,
"fields": [
"settlementPaymentDate",
"settlementPaymentAmount",
"settlementRemaingBalance",
"settlementRemainingBalance"
]
}
}
}
],
"sections": sections
}
OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8")
print(f"Wrote {OUT_PROFILE}")
print(f"Visible HTML fields: {len(fields_seen)}")
for section in sections:
print(f"- {section['heading']}: {len(section['fields'])}")