Add legal profile template library and helper scripts

This commit is contained in:
Sean McElwain 2026-06-10 17:33:58 -05:00
parent 1a877714e9
commit adcba89350
201 changed files with 1333 additions and 7 deletions

View File

@ -3,3 +3,4 @@ uvicorn[standard]
python-multipart python-multipart
python-docx python-docx
pendulum pendulum
openpyxl

View File

@ -0,0 +1,137 @@
import json
import re
from pathlib import Path
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
OLD_CONSTANTS_CANDIDATES = [
OLD_APP / "public" / "constants.js",
OLD_APP / "constants.js",
]
OUT_DIR = Path("tools/doc_generator/content/excel_maps")
OUT_FILE = OUT_DIR / "legacy_excel_maps.json"
CELL_RE = re.compile(r"([A-Za-z_][A-Za-z0-9_]*)\s*:\s*['\"]([A-Z]{1,3}[0-9]{1,5})['\"]")
def find_constants_file():
for path in OLD_CONSTANTS_CANDIDATES:
if path.exists():
return path
raise SystemExit("Could not find old constants.js")
def extract_object_blocks(text):
"""
Finds JS object-ish assignment/export blocks that contain Excel cell mappings.
This is intentionally simple and robust for the old constants.js style.
"""
blocks = []
# Match things like:
# const fieldToCellMap = { ... };
# export const fieldToCellMap = { ... };
# let someMap = { ... };
pattern = re.compile(
r"(?:export\s+)?(?:const|let|var)\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*\{",
re.MULTILINE,
)
for match in pattern.finditer(text):
name = match.group(1)
start = match.end() - 1
depth = 0
end = None
for i in range(start, len(text)):
char = text[i]
if char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0:
end = i + 1
break
if end:
block = text[start:end]
cells = dict(CELL_RE.findall(block))
if cells:
blocks.append((name, cells))
return blocks
def label_from_name(name):
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
label = label.replace("_", " ").replace("-", " ")
label = re.sub(r"\s+", " ", label).strip()
return label.title()
def normalize_id(name):
value = re.sub(r"([a-z])([A-Z])", r"\1_\2", name)
value = re.sub(r"[^A-Za-z0-9]+", "_", value).strip("_").lower()
return value or "excel_map"
def discover_excel_templates():
templates = []
for path in sorted(OLD_APP.rglob("*.xlsx")):
if ".git" in path.parts or "node_modules" in path.parts:
continue
rel = path.relative_to(OLD_APP).as_posix()
templates.append({
"label": rel,
"legacyPath": str(path),
"filename": path.name
})
return templates
def main():
constants_path = find_constants_file()
text = constants_path.read_text(encoding="utf-8", errors="ignore")
blocks = extract_object_blocks(text)
excel_templates = discover_excel_templates()
maps = []
for name, cells in blocks:
map_id = normalize_id(name)
maps.append({
"id": map_id,
"sourceName": name,
"label": label_from_name(name),
"description": f"Generated from {constants_path.relative_to(OLD_APP)} object {name}.",
"template": excel_templates[0]["filename"] if excel_templates else "",
"legacyTemplateCandidates": excel_templates,
"fields": dict(sorted(cells.items(), key=lambda item: item[1]))
})
OUT_DIR.mkdir(parents=True, exist_ok=True)
OUT_FILE.write_text(json.dumps({
"id": "legacy_excel_maps",
"source": str(constants_path),
"maps": maps
}, indent=2), encoding="utf-8")
print(f"Wrote {OUT_FILE}")
print(f"Source: {constants_path}")
print(f"Excel templates found: {len(excel_templates)}")
for t in excel_templates:
print(f"- {t['legacyPath']}")
print(f"Maps found: {len(maps)}")
for item in maps:
print(f"- {item['id']}: {len(item['fields'])} fields")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,192 @@
import json
import re
import subprocess
from pathlib import Path
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
DRAFT_PROFILE = Path("diagnostics/legacy_word_doc_generator_profile_draft.json")
OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy")
OLD_PUBLIC = OLD_APP / "public"
def run_node_list_extractor():
js = f"""
import {{ pathToFileURL }} from 'url';
const files = [
'casePlaintiffInfo.js',
'opposingCounselInfo.js',
'judgeInfo.js',
'caseFilingAttorneyInfo.js',
'filingAttorneyInfo.js',
'debtCollectorInfo.js'
];
const base = {json.dumps(str(OLD_PUBLIC))};
const result = {{}};
for (const file of files) {{
try {{
const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href);
for (const [exportName, value] of Object.entries(mod)) {{
if (value && typeof value === 'object' && !Array.isArray(value)) {{
result[exportName] = Object.keys(value).sort();
}}
}}
}} catch (err) {{
// Some optional info files may not exist.
}}
}}
console.log(JSON.stringify(result));
"""
try:
completed = subprocess.run(
["node", "--input-type=module", "-e", js],
check=True,
capture_output=True,
text=True,
)
return json.loads(completed.stdout)
except Exception:
return {}
def nice_label(name):
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
label = label.replace("_", " ").replace("-", " ")
label = re.sub(r"\bSsn\b", "SSN", label.title())
label = label.replace("Dob", "DOB")
return label
def apply_legal_field_metadata(field):
name = field["name"]
lower = name.lower()
# Normalize generated labels.
field["label"] = nice_label(name)
# Autocomplete fields.
if name == "casePlaintiff":
field["type"] = "autocomplete"
field["list"] = "plaintiffs"
elif name == "caseOpposingCounsel":
field["type"] = "autocomplete"
field["list"] = "opposingCounsel"
elif name == "caseDivisionJudge":
field["type"] = "autocomplete"
field["list"] = "judges"
elif name == "caseFilingAttorney":
field["type"] = "autocomplete"
field["list"] = "filingAttorneys"
elif re.fullmatch(r"debtCollector\d+Name", name):
field["type"] = "autocomplete"
field["list"] = "debtCollectors"
elif lower.endswith("state") or name in {"caseState", "homeState", "client2homeState"}:
field["type"] = "autocomplete"
field["list"] = "states"
elif name == "caseDesignation":
field["type"] = "autocomplete"
field["list"] = "caseDesignations"
# Long text fields.
if name in {"notes", "caseAppearanceInfo", "paymentOptions", "paymentOptions1", "paymentOptions2", "paymentOptions3", "paymentOptions4", "paymentOptions5"}:
field["type"] = "textarea"
return field
def walk_sections(sections):
for section in sections:
section["collapsible"] = section.get("heading") not in {
"Client Information",
"Case Information",
}
section["defaultOpen"] = section.get("heading") in {
"Client Information",
"Case Information",
}
section["fields"] = [
apply_legal_field_metadata(field)
for field in section.get("fields", [])
]
for subsection in section.get("subsections", []):
walk_sections([subsection])
def discover_templates():
templates = []
for path in sorted(TEMPLATES_OUT.rglob("*.docx")):
rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix()
template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower()
label = path.relative_to(TEMPLATES_OUT).as_posix()
label = label.replace(".docx", "")
label = label.replace("/", " / ")
label = label.replace("_", " ")
templates.append({
"id": template_id,
"label": label,
"template": rel,
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
})
return templates
def main():
if not DRAFT_PROFILE.exists():
raise SystemExit(f"Missing {DRAFT_PROFILE}. Run review-old-word-doc-generator.py first.")
draft = json.loads(DRAFT_PROFILE.read_text(encoding="utf-8"))
lists_raw = run_node_list_extractor()
lists = {
"plaintiffs": lists_raw.get("casePlaintiffInfo", []),
"opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []),
"judges": lists_raw.get("judgeInfo", []),
"filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []),
"debtCollectors": lists_raw.get("debtCollectorInfo", []),
"states": ["MO", "KS"],
"caseDesignations": [
"Associate Circuit",
"Circuit",
"Limited Actions",
"Small Claims"
]
}
sections = draft["sections"]
walk_sections(sections)
templates = discover_templates()
profile = {
"id": "legal_profile",
"name": "Legal Profile",
"description": "Consumer debt defense legal profile generated from the legacy word-doc-generator app.",
"template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx",
"outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
"lists": lists,
"templates": templates,
"sections": sections
}
OUT_PROFILE.parent.mkdir(parents=True, exist_ok=True)
OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8")
print(f"Wrote {OUT_PROFILE}")
print(f"Lists: {', '.join(f'{k}={len(v)}' for k, v in lists.items())}")
print(f"Templates: {len(templates)}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,337 @@
import json
import re
import subprocess
from pathlib import Path
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
OLD_PUBLIC = OLD_APP / "public"
OLD_HTML = OLD_PUBLIC / "index.html"
OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy")
TAG_RE = re.compile(r'<(input|select|textarea)\b[^>]*>', re.IGNORECASE | re.DOTALL)
ATTR_RE = re.compile(r'([a-zA-Z_:][-a-zA-Z0-9_:.]*)=["\']([^"\']*)["\']')
LABEL_RE = re.compile(
r'<label[^>]*for=["\']([^"\']+)["\'][^>]*>(.*?)</label>',
re.IGNORECASE | re.DOTALL
)
EXCLUDE_FIELD_NAMES = {
"letterTemplateFile",
"discoTemplateFile",
"excelFile",
"csvFile",
"templateFile",
"file",
"SSNLastFour",
"SSN2LastFour",
"caseAccLastFour",
"casePlaintiffFileName",
"caseAnswerDateString",
"caseAnswerDateYYYY-MM-DD",
"caseAnswerDateYyyyMmDd",
"caseAnswerFiledDateString",
"caseFilingDateString",
"caseDispositionDateString",
"discoCosDateString",
"discoResponseCosDateString",
}
EXCLUDE_PATTERNS = [
r"^settlementPaymentDate\d{2}$",
r"^settlementPaymentAmount\d{2}$",
r"^settlementRemaingBalance\d{2}$",
r"^settlementRemainingBalance\d{2}$",
r"^debtCollector\d+AccLastFour$",
]
def attrs_from_tag(tag):
return dict(ATTR_RE.findall(tag))
def clean_html_label(value):
value = re.sub(r"<[^>]+>", "", value)
value = value.replace(":", "")
value = re.sub(r"\s+", " ", value).strip()
return value
def nice_label(name):
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
label = label.replace("_", " ").replace("-", " ")
label = label.title()
label = label.replace("Ssn", "SSN")
label = label.replace("Dob", "DOB")
label = label.replace("Mm Dd Yyyy", "MM DD YYYY")
return label
def should_exclude(name):
if not name:
return True
if name in EXCLUDE_FIELD_NAMES:
return True
if name.endswith("TemplateFile"):
return True
return any(re.match(pattern, name) for pattern in EXCLUDE_PATTERNS)
def run_node_list_extractor():
js = f"""
import {{ pathToFileURL }} from 'url';
const files = [
'casePlaintiffInfo.js',
'opposingCounselInfo.js',
'judgeInfo.js',
'caseFilingAttorneyInfo.js',
'filingAttorneyInfo.js',
'debtCollectorInfo.js'
];
const base = {json.dumps(str(OLD_PUBLIC))};
const result = {{}};
for (const file of files) {{
try {{
const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href);
for (const [exportName, value] of Object.entries(mod)) {{
if (value && typeof value === 'object' && !Array.isArray(value)) {{
result[exportName] = Object.keys(value).sort();
}}
}}
}} catch (err) {{}}
}}
console.log(JSON.stringify(result));
"""
try:
completed = subprocess.run(
["node", "--input-type=module", "-e", js],
check=True,
capture_output=True,
text=True,
)
return json.loads(completed.stdout)
except Exception:
return {}
def field_type(name, tag_name, attrs):
lower = name.lower()
if tag_name.lower() == "textarea":
return "textarea"
html_type = attrs.get("type", "").lower()
if html_type in {"date", "email", "tel", "number"}:
return html_type
if "date" in lower or lower == "dob":
return "date"
if "email" in lower:
return "email"
if "phone" in lower or "fax" in lower:
return "tel"
if list_name_for_field(name):
return "autocomplete"
return "text"
def list_name_for_field(name):
if name == "casePlaintiff":
return "plaintiffs"
if name == "caseOpposingCounsel":
return "opposingCounsel"
if name == "caseDivisionJudge":
return "judges"
if name == "caseFilingAttorney":
return "filingAttorneys"
if name in {"caseState", "homeState", "client2homeState"}:
return "states"
if name == "caseDesignation":
return "caseDesignations"
if re.fullmatch(r"debtCollector\d+Name", name):
return "debtCollectors"
return None
def section_for(name):
lower = name.lower()
if lower.startswith("client2"):
return "Client 2 Information"
if lower.startswith("client") or lower in {
"ssn", "dob", "alias", "email",
"homeaddress", "homecity", "homestate", "homezip", "homecounty",
"homephone", "cellphone"
}:
return "Client Information"
if lower.startswith("case"):
return "Case Information"
if lower.startswith("disco"):
return "Discovery Information"
if lower.startswith("settlement"):
return "Settlement Information"
if lower.startswith("installment") or lower.startswith("fee") or lower in {
"nameoncard", "cardnumber", "securitycode", "expiration",
"billingaddress", "billingzip"
}:
return "Fee / Payment Information"
if lower.startswith("debtcollector") or name == "numCollectors":
return "Debt Collector Information"
if lower == "notes":
return "Notes"
return "Other Fields"
def discover_templates():
templates = []
for path in sorted(TEMPLATES_OUT.rglob("*.docx")):
rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix()
template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower()
label = path.relative_to(TEMPLATES_OUT).as_posix()
label = label.replace(".docx", "")
label = label.replace("/", " / ")
label = label.replace("_", " ")
templates.append({
"id": template_id,
"label": label,
"template": rel,
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
})
return templates
html = OLD_HTML.read_text(encoding="utf-8", errors="ignore")
labels = {
field_id: clean_html_label(label)
for field_id, label in LABEL_RE.findall(html)
}
fields_seen = []
field_meta = {}
for match in TAG_RE.finditer(html):
tag_name = match.group(1)
tag = match.group(0)
attrs = attrs_from_tag(tag)
name = attrs.get("name") or attrs.get("id")
if should_exclude(name):
continue
if name not in fields_seen:
fields_seen.append(name)
field_meta[name] = (tag_name, attrs)
grouped = {}
for name in fields_seen:
tag_name, attrs = field_meta[name]
ftype = field_type(name, tag_name, attrs)
field = {
"name": name,
"label": labels.get(name) or nice_label(name),
"type": ftype,
"required": False
}
list_name = list_name_for_field(name)
if list_name:
field["list"] = list_name
grouped.setdefault(section_for(name), []).append(field)
preferred_order = [
"Client Information",
"Client 2 Information",
"Case Information",
"Discovery Information",
"Settlement Information",
"Fee / Payment Information",
"Debt Collector Information",
"Notes",
"Other Fields",
]
sections = []
for heading in preferred_order:
fields = grouped.get(heading)
if not fields:
continue
sections.append({
"heading": heading,
"collapsible": heading not in {"Client Information", "Case Information"},
"defaultOpen": heading in {"Client Information", "Case Information"},
"fields": fields
})
lists_raw = run_node_list_extractor()
lists = {
"plaintiffs": lists_raw.get("casePlaintiffInfo", []),
"opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []),
"judges": lists_raw.get("judgeInfo", []),
"filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []),
"debtCollectors": lists_raw.get("debtCollectorInfo", []),
"states": ["MO", "KS"],
"caseDesignations": [
"Associate Circuit",
"Circuit",
"Limited Actions",
"Small Claims"
]
}
templates = discover_templates()
profile = {
"id": "legal_profile",
"name": "Legal Profile",
"description": "Consumer debt defense legal profile based on the legacy app form fields. Additional template fields are calculated at generation time.",
"template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx",
"outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
"lists": lists,
"templates": templates,
"calculations": [
{
"script": "legacy_legal",
"runOn": "generate",
"description": "Generate old-template compatible calculated fields.",
"outputsDynamic": {
"settlementSchedule": {
"countField": "settlementInstallmentNo",
"indexFormat": "decimal2",
"maxCount": 120,
"fields": [
"settlementPaymentDate",
"settlementPaymentAmount",
"settlementRemaingBalance",
"settlementRemainingBalance"
]
}
}
}
],
"sections": sections
}
OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8")
print(f"Wrote {OUT_PROFILE}")
print(f"Visible HTML fields: {len(fields_seen)}")
for section in sections:
print(f"- {section['heading']}: {len(section['fields'])}")

View File

@ -0,0 +1,106 @@
import argparse
import csv
import json
import zipfile
import xml.etree.ElementTree as ET
from pathlib import Path
MAP_FILE = Path("tools/doc_generator/content/excel_maps/legacy_excel_maps.json")
NS = {
"main": "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
"rel": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
}
def load_map(map_id):
data = json.loads(MAP_FILE.read_text(encoding="utf-8"))
for item in data["maps"]:
if item["id"] == map_id:
return item
raise SystemExit(f"Map not found: {map_id}")
def col_row(cell):
col = "".join(ch for ch in cell if ch.isalpha())
row = "".join(ch for ch in cell if ch.isdigit())
return col, int(row)
def shared_strings(z):
try:
xml = z.read("xl/sharedStrings.xml")
except KeyError:
return []
root = ET.fromstring(xml)
values = []
for si in root.findall("main:si", NS):
parts = []
for t in si.findall(".//main:t", NS):
parts.append(t.text or "")
values.append("".join(parts))
return values
def read_xlsx_cells(path):
values = {}
with zipfile.ZipFile(path) as z:
strings = shared_strings(z)
# MVP: first worksheet only.
sheet_xml = z.read("xl/worksheets/sheet1.xml")
root = ET.fromstring(sheet_xml)
for cell in root.findall(".//main:c", NS):
ref = cell.attrib.get("r")
cell_type = cell.attrib.get("t")
v = cell.find("main:v", NS)
if not ref or v is None:
continue
raw = v.text or ""
if cell_type == "s":
try:
values[ref] = strings[int(raw)]
except Exception:
values[ref] = raw
else:
values[ref] = raw
return values
def export_csv(map_id, xlsx_path, csv_path):
mapping = load_map(map_id)
cells = read_xlsx_cells(xlsx_path)
row = {}
for field, cell in mapping["fields"].items():
row[field] = cells.get(cell, "")
with open(csv_path, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=list(mapping["fields"].keys()))
writer.writeheader()
writer.writerow(row)
print(f"Exported {csv_path}")
def main():
parser = argparse.ArgumentParser(description="Export legacy Excel workbook cells to new app CSV datafile.")
parser.add_argument("map_id", help="Map id from legacy_excel_maps.json")
parser.add_argument("xlsx", help="Legacy Excel workbook to read")
parser.add_argument("csv", help="CSV datafile to write")
args = parser.parse_args()
export_csv(args.map_id, Path(args.xlsx), Path(args.csv))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,112 @@
import json
import re
from pathlib import Path
PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
TEMPLATES_ROOT = Path("tools/doc_generator/content/templates")
LEGACY_ROOT = TEMPLATES_ROOT / "legacy"
CATEGORY_RULES = [
("discovery", ["disco", "discovery", "interrog", "request-for-production", "rfp", "admission"]),
("answers", ["answer", "entry-of-appearance"]),
("settlement", ["settlement", "stip", "payment"]),
("client", ["client", "engagement", "fee", "contract"]),
("motions", ["motion", "dismiss", "compel", "summary"]),
("letters", ["letter", "email", "canned"]),
("pleadings", ["petition", "complaint", "counterclaim"]),
]
def title_case(value):
value = value.replace("_", " ").replace("-", " ")
value = re.sub(r"\s+", " ", value).strip()
replacements = {
"disco": "discovery",
"rfp": "request for production",
"cos": "certificate of service",
"oc": "opposing counsel",
"atty": "attorney",
"mo": "Missouri",
"ks": "Kansas",
}
words = []
for word in value.split():
lower = word.lower()
words.append(replacements.get(lower, lower))
return " ".join(words)
def slug(value):
value = title_case(value).lower()
value = re.sub(r"[^a-z0-9]+", "_", value)
return value.strip("_") or "template"
def category_for(relative_path):
text = relative_path.as_posix().lower()
for category, needles in CATEGORY_RULES:
if any(needle in text for needle in needles):
return category
return "general"
def label_for(path):
rel = path.relative_to(LEGACY_ROOT)
parts = list(rel.parts)
parts[-1] = Path(parts[-1]).stem
clean_parts = [title_case(part) for part in parts]
return " / ".join(clean_parts)
def main():
data = json.loads(PROFILE.read_text(encoding="utf-8"))
templates = []
used_ids = set()
for path in sorted(LEGACY_ROOT.rglob("*.docx")):
rel_from_templates = path.relative_to(TEMPLATES_ROOT).as_posix()
rel_from_legacy = path.relative_to(LEGACY_ROOT)
category = category_for(rel_from_legacy)
base_id = f"{category}_{slug(rel_from_legacy.with_suffix('').as_posix())}"
template_id = base_id
n = 2
while template_id in used_ids:
template_id = f"{base_id}_{n}"
n += 1
used_ids.add(template_id)
templates.append({
"id": template_id,
"category": category,
"label": label_for(path),
"template": rel_from_templates,
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
})
templates.sort(key=lambda item: (item["category"], item["label"]))
data["templates"] = templates
if templates:
data["defaultTemplateId"] = templates[0]["id"]
data["template"] = templates[0]["template"]
PROFILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
print(f"Updated {PROFILE}")
print(f"Templates: {len(templates)}")
for category in sorted({item["category"] for item in templates}):
count = sum(1 for item in templates if item["category"] == category)
print(f"- {category}: {count}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,251 @@
import json
import re
from pathlib import Path
try:
from docx import Document
except Exception:
Document = None
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
OUT_DIR = Path("diagnostics")
OUT_DIR.mkdir(parents=True, exist_ok=True)
PLACEHOLDER_RE = re.compile(r"\{([A-Za-z0-9_:\-]+)\}")
def read_text(path):
try:
return path.read_text(encoding="utf-8", errors="ignore")
except Exception:
return ""
def find_placeholders_in_text(text):
return sorted(set(PLACEHOLDER_RE.findall(text)))
def find_placeholders_in_docx(path):
if Document is None:
return []
found = set()
try:
doc = Document(path)
except Exception:
return []
def scan_paragraphs(paragraphs):
for p in paragraphs:
found.update(find_placeholders_in_text(p.text))
def scan_table(table):
for row in table.rows:
for cell in row.cells:
scan_paragraphs(cell.paragraphs)
for nested in cell.tables:
scan_table(nested)
scan_paragraphs(doc.paragraphs)
for table in doc.tables:
scan_table(table)
return sorted(found)
def categorize_field(name):
lower = name.lower()
if lower.startswith("client2"):
return "Client 2 Information"
if lower.startswith("client") or lower in {"dob", "ssn", "ssnlastfour", "alias", "email"}:
return "Client Information"
if lower.startswith("case"):
return "Case Information"
if lower.startswith("settlement"):
return "Settlement Information"
if lower.startswith("installment") or lower.startswith("fee") or lower in {"nameoncard", "cardnumber", "securitycode", "expiration", "billingaddress", "billingzip"}:
return "Fee / Payment Information"
if lower.startswith("debtcollector"):
return "Debt Collector Information"
if lower.startswith("disco"):
return "Discovery Information"
if lower in {"today", "currentdate", "currentdatemm-dd-yyyy"}:
return "Date Fields"
if lower == "notes":
return "Notes"
return "Other Fields"
def field_type(name):
lower = name.lower()
if "notes" in lower or "appearanceinfo" in lower or "paymentoptions" in lower:
return "textarea"
if "date" in lower or lower in {"dob"}:
return "date"
if "email" in lower:
return "email"
if "phone" in lower or "fax" in lower:
return "tel"
return "text"
def make_sections(fields):
grouped = {}
for name in fields:
grouped.setdefault(categorize_field(name), []).append(name)
preferred_order = [
"Date Fields",
"Client Information",
"Client 2 Information",
"Case Information",
"Discovery Information",
"Settlement Information",
"Fee / Payment Information",
"Debt Collector Information",
"Notes",
"Other Fields",
]
sections = []
for heading in preferred_order:
names = grouped.get(heading)
if not names:
continue
sections.append({
"heading": heading,
"collapsible": heading not in {"Client Information", "Case Information"},
"defaultOpen": heading in {"Client Information", "Case Information"},
"fields": [
{
"name": name,
"label": re.sub(r"([a-z])([A-Z])", r"\1 \2", name).replace("_", " ").strip().title(),
"type": field_type(name),
"required": False
}
for name in sorted(names)
]
})
return sections
js_files = sorted(OLD_APP.rglob("*.js"))
html_files = sorted(OLD_APP.rglob("*.html"))
css_files = sorted(OLD_APP.rglob("*.css"))
docx_files = sorted(OLD_APP.rglob("*.docx"))
xlsx_files = sorted(OLD_APP.rglob("*.xlsx"))
all_text_placeholders = set()
function_hits = []
function_terms = {
"DOCX generation": ["docx", "Docxtemplater", "generateDocument", "generateDoc"],
"Excel generation": ["xlsx", "generateExcel", "template.xlsx"],
"vCard generation": ["vcard", "vCard", "BEGIN:VCARD"],
"Calendar / ICS generation": ["ics", "BEGIN:VCALENDAR", "VEVENT"],
"Client folder generation": ["generateClientFolder", "client folder"],
"Settlement calculations": ["settlementPayment", "settlementInstallment", "remainingBalance"],
}
for path in js_files + html_files:
text = read_text(path)
all_text_placeholders.update(find_placeholders_in_text(text))
for label, terms in function_terms.items():
if any(term in text for term in terms):
function_hits.append((label, str(path.relative_to(OLD_APP))))
template_rows = []
all_template_placeholders = set()
for path in docx_files:
placeholders = find_placeholders_in_docx(path)
all_template_placeholders.update(placeholders)
template_rows.append({
"template": str(path.relative_to(OLD_APP)),
"placeholder_count": len(placeholders),
"placeholders": placeholders,
})
all_fields = sorted(all_text_placeholders | all_template_placeholders)
profile = {
"id": "legacy_word_doc_generator",
"name": "Legacy Word Doc Generator Profile",
"description": "Draft profile generated from the legacy word-doc-generator app.",
"template": "REPLACE_WITH_SELECTED_TEMPLATE.docx",
"outputFilename": "legacy_document_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
"sourceApp": str(OLD_APP),
"sections": make_sections(all_fields),
"legacyFeatures": sorted(set(label for label, _ in function_hits)),
"templatesFound": template_rows,
}
profile_path = OUT_DIR / "legacy_word_doc_generator_profile_draft.json"
profile_path.write_text(json.dumps(profile, indent=2), encoding="utf-8")
report = []
report.append("# Legacy Word Doc Generator Review")
report.append("")
report.append(f"Source app: `{OLD_APP}`")
report.append("")
report.append("## Files Found")
report.append("")
report.append(f"- JS files: {len(js_files)}")
report.append(f"- HTML files: {len(html_files)}")
report.append(f"- CSS files: {len(css_files)}")
report.append(f"- DOCX templates: {len(docx_files)}")
report.append(f"- XLSX files: {len(xlsx_files)}")
report.append("")
report.append("## Legacy Features Detected")
report.append("")
if function_hits:
seen = set()
for label, rel in function_hits:
key = (label, rel)
if key in seen:
continue
seen.add(key)
report.append(f"- {label}: `{rel}`")
else:
report.append("- No major legacy feature signatures detected.")
report.append("")
report.append("## Templates Found")
report.append("")
if template_rows:
for row in template_rows:
report.append(f"### `{row['template']}`")
report.append(f"- Placeholder count: {row['placeholder_count']}")
if row["placeholders"]:
report.append("- Placeholders:")
for name in row["placeholders"]:
report.append(f" - `{{{name}}}`")
report.append("")
else:
report.append("- No DOCX templates found.")
report.append("")
report.append("## All Fields Detected")
report.append("")
for name in all_fields:
report.append(f"- `{{{name}}}`")
report.append("")
report.append("## Draft Profile")
report.append("")
report.append(f"Generated: `{profile_path}`")
report.append("")
report_path = OUT_DIR / "legacy_word_doc_generator_review.md"
report_path.write_text("\n".join(report), encoding="utf-8")
print(f"Wrote {report_path}")
print(f"Wrote {profile_path}")
print(f"Detected {len(all_fields)} unique fields/placeholders")

Some files were not shown because too many files have changed in this diff Show More