338 lines
9.3 KiB
Python
338 lines
9.3 KiB
Python
import json
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
|
|
OLD_PUBLIC = OLD_APP / "public"
|
|
OLD_HTML = OLD_PUBLIC / "index.html"
|
|
|
|
OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
|
|
TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy")
|
|
|
|
TAG_RE = re.compile(r'<(input|select|textarea)\b[^>]*>', re.IGNORECASE | re.DOTALL)
|
|
ATTR_RE = re.compile(r'([a-zA-Z_:][-a-zA-Z0-9_:.]*)=["\']([^"\']*)["\']')
|
|
LABEL_RE = re.compile(
|
|
r'<label[^>]*for=["\']([^"\']+)["\'][^>]*>(.*?)</label>',
|
|
re.IGNORECASE | re.DOTALL
|
|
)
|
|
|
|
EXCLUDE_FIELD_NAMES = {
|
|
"letterTemplateFile",
|
|
"discoTemplateFile",
|
|
"excelFile",
|
|
"csvFile",
|
|
"templateFile",
|
|
"file",
|
|
"SSNLastFour",
|
|
"SSN2LastFour",
|
|
"caseAccLastFour",
|
|
"casePlaintiffFileName",
|
|
"caseAnswerDateString",
|
|
"caseAnswerDateYYYY-MM-DD",
|
|
"caseAnswerDateYyyyMmDd",
|
|
"caseAnswerFiledDateString",
|
|
"caseFilingDateString",
|
|
"caseDispositionDateString",
|
|
"discoCosDateString",
|
|
"discoResponseCosDateString",
|
|
}
|
|
|
|
EXCLUDE_PATTERNS = [
|
|
r"^settlementPaymentDate\d{2}$",
|
|
r"^settlementPaymentAmount\d{2}$",
|
|
r"^settlementRemaingBalance\d{2}$",
|
|
r"^settlementRemainingBalance\d{2}$",
|
|
r"^debtCollector\d+AccLastFour$",
|
|
]
|
|
|
|
|
|
def attrs_from_tag(tag):
|
|
return dict(ATTR_RE.findall(tag))
|
|
|
|
|
|
def clean_html_label(value):
|
|
value = re.sub(r"<[^>]+>", "", value)
|
|
value = value.replace(":", "")
|
|
value = re.sub(r"\s+", " ", value).strip()
|
|
return value
|
|
|
|
|
|
def nice_label(name):
|
|
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
|
|
label = label.replace("_", " ").replace("-", " ")
|
|
label = label.title()
|
|
label = label.replace("Ssn", "SSN")
|
|
label = label.replace("Dob", "DOB")
|
|
label = label.replace("Mm Dd Yyyy", "MM DD YYYY")
|
|
return label
|
|
|
|
|
|
def should_exclude(name):
|
|
if not name:
|
|
return True
|
|
if name in EXCLUDE_FIELD_NAMES:
|
|
return True
|
|
if name.endswith("TemplateFile"):
|
|
return True
|
|
return any(re.match(pattern, name) for pattern in EXCLUDE_PATTERNS)
|
|
|
|
|
|
def run_node_list_extractor():
|
|
js = f"""
|
|
import {{ pathToFileURL }} from 'url';
|
|
|
|
const files = [
|
|
'casePlaintiffInfo.js',
|
|
'opposingCounselInfo.js',
|
|
'judgeInfo.js',
|
|
'caseFilingAttorneyInfo.js',
|
|
'filingAttorneyInfo.js',
|
|
'debtCollectorInfo.js'
|
|
];
|
|
|
|
const base = {json.dumps(str(OLD_PUBLIC))};
|
|
const result = {{}};
|
|
|
|
for (const file of files) {{
|
|
try {{
|
|
const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href);
|
|
for (const [exportName, value] of Object.entries(mod)) {{
|
|
if (value && typeof value === 'object' && !Array.isArray(value)) {{
|
|
result[exportName] = Object.keys(value).sort();
|
|
}}
|
|
}}
|
|
}} catch (err) {{}}
|
|
}}
|
|
|
|
console.log(JSON.stringify(result));
|
|
"""
|
|
try:
|
|
completed = subprocess.run(
|
|
["node", "--input-type=module", "-e", js],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
return json.loads(completed.stdout)
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def field_type(name, tag_name, attrs):
|
|
lower = name.lower()
|
|
|
|
if tag_name.lower() == "textarea":
|
|
return "textarea"
|
|
|
|
html_type = attrs.get("type", "").lower()
|
|
if html_type in {"date", "email", "tel", "number"}:
|
|
return html_type
|
|
|
|
if "date" in lower or lower == "dob":
|
|
return "date"
|
|
if "email" in lower:
|
|
return "email"
|
|
if "phone" in lower or "fax" in lower:
|
|
return "tel"
|
|
if list_name_for_field(name):
|
|
return "autocomplete"
|
|
|
|
return "text"
|
|
|
|
|
|
def list_name_for_field(name):
|
|
if name == "casePlaintiff":
|
|
return "plaintiffs"
|
|
if name == "caseOpposingCounsel":
|
|
return "opposingCounsel"
|
|
if name == "caseDivisionJudge":
|
|
return "judges"
|
|
if name == "caseFilingAttorney":
|
|
return "filingAttorneys"
|
|
if name in {"caseState", "homeState", "client2homeState"}:
|
|
return "states"
|
|
if name == "caseDesignation":
|
|
return "caseDesignations"
|
|
if re.fullmatch(r"debtCollector\d+Name", name):
|
|
return "debtCollectors"
|
|
return None
|
|
|
|
|
|
def section_for(name):
|
|
lower = name.lower()
|
|
|
|
if lower.startswith("client2"):
|
|
return "Client 2 Information"
|
|
if lower.startswith("client") or lower in {
|
|
"ssn", "dob", "alias", "email",
|
|
"homeaddress", "homecity", "homestate", "homezip", "homecounty",
|
|
"homephone", "cellphone"
|
|
}:
|
|
return "Client Information"
|
|
if lower.startswith("case"):
|
|
return "Case Information"
|
|
if lower.startswith("disco"):
|
|
return "Discovery Information"
|
|
if lower.startswith("settlement"):
|
|
return "Settlement Information"
|
|
if lower.startswith("installment") or lower.startswith("fee") or lower in {
|
|
"nameoncard", "cardnumber", "securitycode", "expiration",
|
|
"billingaddress", "billingzip"
|
|
}:
|
|
return "Fee / Payment Information"
|
|
if lower.startswith("debtcollector") or name == "numCollectors":
|
|
return "Debt Collector Information"
|
|
if lower == "notes":
|
|
return "Notes"
|
|
|
|
return "Other Fields"
|
|
|
|
|
|
def discover_templates():
|
|
templates = []
|
|
|
|
for path in sorted(TEMPLATES_OUT.rglob("*.docx")):
|
|
rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix()
|
|
template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower()
|
|
|
|
label = path.relative_to(TEMPLATES_OUT).as_posix()
|
|
label = label.replace(".docx", "")
|
|
label = label.replace("/", " / ")
|
|
label = label.replace("_", " ")
|
|
|
|
templates.append({
|
|
"id": template_id,
|
|
"label": label,
|
|
"template": rel,
|
|
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
|
|
})
|
|
|
|
return templates
|
|
|
|
|
|
html = OLD_HTML.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
labels = {
|
|
field_id: clean_html_label(label)
|
|
for field_id, label in LABEL_RE.findall(html)
|
|
}
|
|
|
|
fields_seen = []
|
|
field_meta = {}
|
|
|
|
for match in TAG_RE.finditer(html):
|
|
tag_name = match.group(1)
|
|
tag = match.group(0)
|
|
attrs = attrs_from_tag(tag)
|
|
|
|
name = attrs.get("name") or attrs.get("id")
|
|
if should_exclude(name):
|
|
continue
|
|
|
|
if name not in fields_seen:
|
|
fields_seen.append(name)
|
|
field_meta[name] = (tag_name, attrs)
|
|
|
|
grouped = {}
|
|
|
|
for name in fields_seen:
|
|
tag_name, attrs = field_meta[name]
|
|
ftype = field_type(name, tag_name, attrs)
|
|
|
|
field = {
|
|
"name": name,
|
|
"label": labels.get(name) or nice_label(name),
|
|
"type": ftype,
|
|
"required": False
|
|
}
|
|
|
|
list_name = list_name_for_field(name)
|
|
if list_name:
|
|
field["list"] = list_name
|
|
|
|
grouped.setdefault(section_for(name), []).append(field)
|
|
|
|
preferred_order = [
|
|
"Client Information",
|
|
"Client 2 Information",
|
|
"Case Information",
|
|
"Discovery Information",
|
|
"Settlement Information",
|
|
"Fee / Payment Information",
|
|
"Debt Collector Information",
|
|
"Notes",
|
|
"Other Fields",
|
|
]
|
|
|
|
sections = []
|
|
|
|
for heading in preferred_order:
|
|
fields = grouped.get(heading)
|
|
if not fields:
|
|
continue
|
|
|
|
sections.append({
|
|
"heading": heading,
|
|
"collapsible": heading not in {"Client Information", "Case Information"},
|
|
"defaultOpen": heading in {"Client Information", "Case Information"},
|
|
"fields": fields
|
|
})
|
|
|
|
lists_raw = run_node_list_extractor()
|
|
|
|
lists = {
|
|
"plaintiffs": lists_raw.get("casePlaintiffInfo", []),
|
|
"opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []),
|
|
"judges": lists_raw.get("judgeInfo", []),
|
|
"filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []),
|
|
"debtCollectors": lists_raw.get("debtCollectorInfo", []),
|
|
"states": ["MO", "KS"],
|
|
"caseDesignations": [
|
|
"Associate Circuit",
|
|
"Circuit",
|
|
"Limited Actions",
|
|
"Small Claims"
|
|
]
|
|
}
|
|
|
|
templates = discover_templates()
|
|
|
|
profile = {
|
|
"id": "legal_profile",
|
|
"name": "Legal Profile",
|
|
"description": "Consumer debt defense legal profile based on the legacy app form fields. Additional template fields are calculated at generation time.",
|
|
"template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx",
|
|
"outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
|
|
"lists": lists,
|
|
"templates": templates,
|
|
"calculations": [
|
|
{
|
|
"script": "legacy_legal",
|
|
"runOn": "generate",
|
|
"description": "Generate old-template compatible calculated fields.",
|
|
"outputsDynamic": {
|
|
"settlementSchedule": {
|
|
"countField": "settlementInstallmentNo",
|
|
"indexFormat": "decimal2",
|
|
"maxCount": 120,
|
|
"fields": [
|
|
"settlementPaymentDate",
|
|
"settlementPaymentAmount",
|
|
"settlementRemaingBalance",
|
|
"settlementRemainingBalance"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"sections": sections
|
|
}
|
|
|
|
OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8")
|
|
|
|
print(f"Wrote {OUT_PROFILE}")
|
|
print(f"Visible HTML fields: {len(fields_seen)}")
|
|
for section in sections:
|
|
print(f"- {section['heading']}: {len(section['fields'])}")
|