Add legal profile template library and helper scripts
This commit is contained in:
parent
1a877714e9
commit
adcba89350
|
|
@ -3,3 +3,4 @@ uvicorn[standard]
|
|||
python-multipart
|
||||
python-docx
|
||||
pendulum
|
||||
openpyxl
|
||||
|
|
|
|||
|
|
@ -0,0 +1,137 @@
|
|||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
|
||||
OLD_CONSTANTS_CANDIDATES = [
|
||||
OLD_APP / "public" / "constants.js",
|
||||
OLD_APP / "constants.js",
|
||||
]
|
||||
|
||||
OUT_DIR = Path("tools/doc_generator/content/excel_maps")
|
||||
OUT_FILE = OUT_DIR / "legacy_excel_maps.json"
|
||||
|
||||
CELL_RE = re.compile(r"([A-Za-z_][A-Za-z0-9_]*)\s*:\s*['\"]([A-Z]{1,3}[0-9]{1,5})['\"]")
|
||||
|
||||
|
||||
def find_constants_file():
|
||||
for path in OLD_CONSTANTS_CANDIDATES:
|
||||
if path.exists():
|
||||
return path
|
||||
raise SystemExit("Could not find old constants.js")
|
||||
|
||||
|
||||
def extract_object_blocks(text):
|
||||
"""
|
||||
Finds JS object-ish assignment/export blocks that contain Excel cell mappings.
|
||||
This is intentionally simple and robust for the old constants.js style.
|
||||
"""
|
||||
blocks = []
|
||||
|
||||
# Match things like:
|
||||
# const fieldToCellMap = { ... };
|
||||
# export const fieldToCellMap = { ... };
|
||||
# let someMap = { ... };
|
||||
pattern = re.compile(
|
||||
r"(?:export\s+)?(?:const|let|var)\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*\{",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
for match in pattern.finditer(text):
|
||||
name = match.group(1)
|
||||
start = match.end() - 1
|
||||
|
||||
depth = 0
|
||||
end = None
|
||||
|
||||
for i in range(start, len(text)):
|
||||
char = text[i]
|
||||
if char == "{":
|
||||
depth += 1
|
||||
elif char == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
end = i + 1
|
||||
break
|
||||
|
||||
if end:
|
||||
block = text[start:end]
|
||||
cells = dict(CELL_RE.findall(block))
|
||||
if cells:
|
||||
blocks.append((name, cells))
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def label_from_name(name):
|
||||
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
|
||||
label = label.replace("_", " ").replace("-", " ")
|
||||
label = re.sub(r"\s+", " ", label).strip()
|
||||
return label.title()
|
||||
|
||||
|
||||
def normalize_id(name):
|
||||
value = re.sub(r"([a-z])([A-Z])", r"\1_\2", name)
|
||||
value = re.sub(r"[^A-Za-z0-9]+", "_", value).strip("_").lower()
|
||||
return value or "excel_map"
|
||||
|
||||
|
||||
def discover_excel_templates():
|
||||
templates = []
|
||||
|
||||
for path in sorted(OLD_APP.rglob("*.xlsx")):
|
||||
if ".git" in path.parts or "node_modules" in path.parts:
|
||||
continue
|
||||
|
||||
rel = path.relative_to(OLD_APP).as_posix()
|
||||
templates.append({
|
||||
"label": rel,
|
||||
"legacyPath": str(path),
|
||||
"filename": path.name
|
||||
})
|
||||
|
||||
return templates
|
||||
|
||||
|
||||
def main():
|
||||
constants_path = find_constants_file()
|
||||
text = constants_path.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
blocks = extract_object_blocks(text)
|
||||
excel_templates = discover_excel_templates()
|
||||
|
||||
maps = []
|
||||
|
||||
for name, cells in blocks:
|
||||
map_id = normalize_id(name)
|
||||
|
||||
maps.append({
|
||||
"id": map_id,
|
||||
"sourceName": name,
|
||||
"label": label_from_name(name),
|
||||
"description": f"Generated from {constants_path.relative_to(OLD_APP)} object {name}.",
|
||||
"template": excel_templates[0]["filename"] if excel_templates else "",
|
||||
"legacyTemplateCandidates": excel_templates,
|
||||
"fields": dict(sorted(cells.items(), key=lambda item: item[1]))
|
||||
})
|
||||
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
OUT_FILE.write_text(json.dumps({
|
||||
"id": "legacy_excel_maps",
|
||||
"source": str(constants_path),
|
||||
"maps": maps
|
||||
}, indent=2), encoding="utf-8")
|
||||
|
||||
print(f"Wrote {OUT_FILE}")
|
||||
print(f"Source: {constants_path}")
|
||||
print(f"Excel templates found: {len(excel_templates)}")
|
||||
for t in excel_templates:
|
||||
print(f"- {t['legacyPath']}")
|
||||
|
||||
print(f"Maps found: {len(maps)}")
|
||||
for item in maps:
|
||||
print(f"- {item['id']}: {len(item['fields'])} fields")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
|
||||
DRAFT_PROFILE = Path("diagnostics/legacy_word_doc_generator_profile_draft.json")
|
||||
OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
|
||||
TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy")
|
||||
|
||||
OLD_PUBLIC = OLD_APP / "public"
|
||||
|
||||
|
||||
def run_node_list_extractor():
|
||||
js = f"""
|
||||
import {{ pathToFileURL }} from 'url';
|
||||
|
||||
const files = [
|
||||
'casePlaintiffInfo.js',
|
||||
'opposingCounselInfo.js',
|
||||
'judgeInfo.js',
|
||||
'caseFilingAttorneyInfo.js',
|
||||
'filingAttorneyInfo.js',
|
||||
'debtCollectorInfo.js'
|
||||
];
|
||||
|
||||
const base = {json.dumps(str(OLD_PUBLIC))};
|
||||
const result = {{}};
|
||||
|
||||
for (const file of files) {{
|
||||
try {{
|
||||
const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href);
|
||||
for (const [exportName, value] of Object.entries(mod)) {{
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {{
|
||||
result[exportName] = Object.keys(value).sort();
|
||||
}}
|
||||
}}
|
||||
}} catch (err) {{
|
||||
// Some optional info files may not exist.
|
||||
}}
|
||||
}}
|
||||
|
||||
console.log(JSON.stringify(result));
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["node", "--input-type=module", "-e", js],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return json.loads(completed.stdout)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def nice_label(name):
|
||||
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
|
||||
label = label.replace("_", " ").replace("-", " ")
|
||||
label = re.sub(r"\bSsn\b", "SSN", label.title())
|
||||
label = label.replace("Dob", "DOB")
|
||||
return label
|
||||
|
||||
|
||||
def apply_legal_field_metadata(field):
|
||||
name = field["name"]
|
||||
lower = name.lower()
|
||||
|
||||
# Normalize generated labels.
|
||||
field["label"] = nice_label(name)
|
||||
|
||||
# Autocomplete fields.
|
||||
if name == "casePlaintiff":
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "plaintiffs"
|
||||
elif name == "caseOpposingCounsel":
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "opposingCounsel"
|
||||
elif name == "caseDivisionJudge":
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "judges"
|
||||
elif name == "caseFilingAttorney":
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "filingAttorneys"
|
||||
elif re.fullmatch(r"debtCollector\d+Name", name):
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "debtCollectors"
|
||||
elif lower.endswith("state") or name in {"caseState", "homeState", "client2homeState"}:
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "states"
|
||||
elif name == "caseDesignation":
|
||||
field["type"] = "autocomplete"
|
||||
field["list"] = "caseDesignations"
|
||||
|
||||
# Long text fields.
|
||||
if name in {"notes", "caseAppearanceInfo", "paymentOptions", "paymentOptions1", "paymentOptions2", "paymentOptions3", "paymentOptions4", "paymentOptions5"}:
|
||||
field["type"] = "textarea"
|
||||
|
||||
return field
|
||||
|
||||
|
||||
def walk_sections(sections):
|
||||
for section in sections:
|
||||
section["collapsible"] = section.get("heading") not in {
|
||||
"Client Information",
|
||||
"Case Information",
|
||||
}
|
||||
section["defaultOpen"] = section.get("heading") in {
|
||||
"Client Information",
|
||||
"Case Information",
|
||||
}
|
||||
|
||||
section["fields"] = [
|
||||
apply_legal_field_metadata(field)
|
||||
for field in section.get("fields", [])
|
||||
]
|
||||
|
||||
for subsection in section.get("subsections", []):
|
||||
walk_sections([subsection])
|
||||
|
||||
|
||||
def discover_templates():
|
||||
templates = []
|
||||
|
||||
for path in sorted(TEMPLATES_OUT.rglob("*.docx")):
|
||||
rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix()
|
||||
template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower()
|
||||
|
||||
label = path.relative_to(TEMPLATES_OUT).as_posix()
|
||||
label = label.replace(".docx", "")
|
||||
label = label.replace("/", " / ")
|
||||
label = label.replace("_", " ")
|
||||
|
||||
templates.append({
|
||||
"id": template_id,
|
||||
"label": label,
|
||||
"template": rel,
|
||||
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
|
||||
})
|
||||
|
||||
return templates
|
||||
|
||||
|
||||
def main():
|
||||
if not DRAFT_PROFILE.exists():
|
||||
raise SystemExit(f"Missing {DRAFT_PROFILE}. Run review-old-word-doc-generator.py first.")
|
||||
|
||||
draft = json.loads(DRAFT_PROFILE.read_text(encoding="utf-8"))
|
||||
|
||||
lists_raw = run_node_list_extractor()
|
||||
|
||||
lists = {
|
||||
"plaintiffs": lists_raw.get("casePlaintiffInfo", []),
|
||||
"opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []),
|
||||
"judges": lists_raw.get("judgeInfo", []),
|
||||
"filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []),
|
||||
"debtCollectors": lists_raw.get("debtCollectorInfo", []),
|
||||
"states": ["MO", "KS"],
|
||||
"caseDesignations": [
|
||||
"Associate Circuit",
|
||||
"Circuit",
|
||||
"Limited Actions",
|
||||
"Small Claims"
|
||||
]
|
||||
}
|
||||
|
||||
sections = draft["sections"]
|
||||
walk_sections(sections)
|
||||
|
||||
templates = discover_templates()
|
||||
|
||||
profile = {
|
||||
"id": "legal_profile",
|
||||
"name": "Legal Profile",
|
||||
"description": "Consumer debt defense legal profile generated from the legacy word-doc-generator app.",
|
||||
"template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx",
|
||||
"outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
|
||||
"lists": lists,
|
||||
"templates": templates,
|
||||
"sections": sections
|
||||
}
|
||||
|
||||
OUT_PROFILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8")
|
||||
|
||||
print(f"Wrote {OUT_PROFILE}")
|
||||
print(f"Lists: {', '.join(f'{k}={len(v)}' for k, v in lists.items())}")
|
||||
print(f"Templates: {len(templates)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,337 @@
|
|||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
|
||||
OLD_PUBLIC = OLD_APP / "public"
|
||||
OLD_HTML = OLD_PUBLIC / "index.html"
|
||||
|
||||
OUT_PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
|
||||
TEMPLATES_OUT = Path("tools/doc_generator/content/templates/legacy")
|
||||
|
||||
TAG_RE = re.compile(r'<(input|select|textarea)\b[^>]*>', re.IGNORECASE | re.DOTALL)
|
||||
ATTR_RE = re.compile(r'([a-zA-Z_:][-a-zA-Z0-9_:.]*)=["\']([^"\']*)["\']')
|
||||
LABEL_RE = re.compile(
|
||||
r'<label[^>]*for=["\']([^"\']+)["\'][^>]*>(.*?)</label>',
|
||||
re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
|
||||
EXCLUDE_FIELD_NAMES = {
|
||||
"letterTemplateFile",
|
||||
"discoTemplateFile",
|
||||
"excelFile",
|
||||
"csvFile",
|
||||
"templateFile",
|
||||
"file",
|
||||
"SSNLastFour",
|
||||
"SSN2LastFour",
|
||||
"caseAccLastFour",
|
||||
"casePlaintiffFileName",
|
||||
"caseAnswerDateString",
|
||||
"caseAnswerDateYYYY-MM-DD",
|
||||
"caseAnswerDateYyyyMmDd",
|
||||
"caseAnswerFiledDateString",
|
||||
"caseFilingDateString",
|
||||
"caseDispositionDateString",
|
||||
"discoCosDateString",
|
||||
"discoResponseCosDateString",
|
||||
}
|
||||
|
||||
EXCLUDE_PATTERNS = [
|
||||
r"^settlementPaymentDate\d{2}$",
|
||||
r"^settlementPaymentAmount\d{2}$",
|
||||
r"^settlementRemaingBalance\d{2}$",
|
||||
r"^settlementRemainingBalance\d{2}$",
|
||||
r"^debtCollector\d+AccLastFour$",
|
||||
]
|
||||
|
||||
|
||||
def attrs_from_tag(tag):
|
||||
return dict(ATTR_RE.findall(tag))
|
||||
|
||||
|
||||
def clean_html_label(value):
|
||||
value = re.sub(r"<[^>]+>", "", value)
|
||||
value = value.replace(":", "")
|
||||
value = re.sub(r"\s+", " ", value).strip()
|
||||
return value
|
||||
|
||||
|
||||
def nice_label(name):
|
||||
label = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
|
||||
label = label.replace("_", " ").replace("-", " ")
|
||||
label = label.title()
|
||||
label = label.replace("Ssn", "SSN")
|
||||
label = label.replace("Dob", "DOB")
|
||||
label = label.replace("Mm Dd Yyyy", "MM DD YYYY")
|
||||
return label
|
||||
|
||||
|
||||
def should_exclude(name):
|
||||
if not name:
|
||||
return True
|
||||
if name in EXCLUDE_FIELD_NAMES:
|
||||
return True
|
||||
if name.endswith("TemplateFile"):
|
||||
return True
|
||||
return any(re.match(pattern, name) for pattern in EXCLUDE_PATTERNS)
|
||||
|
||||
|
||||
def run_node_list_extractor():
|
||||
js = f"""
|
||||
import {{ pathToFileURL }} from 'url';
|
||||
|
||||
const files = [
|
||||
'casePlaintiffInfo.js',
|
||||
'opposingCounselInfo.js',
|
||||
'judgeInfo.js',
|
||||
'caseFilingAttorneyInfo.js',
|
||||
'filingAttorneyInfo.js',
|
||||
'debtCollectorInfo.js'
|
||||
];
|
||||
|
||||
const base = {json.dumps(str(OLD_PUBLIC))};
|
||||
const result = {{}};
|
||||
|
||||
for (const file of files) {{
|
||||
try {{
|
||||
const mod = await import(pathToFileURL(`${{base}}/${{file}}`).href);
|
||||
for (const [exportName, value] of Object.entries(mod)) {{
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {{
|
||||
result[exportName] = Object.keys(value).sort();
|
||||
}}
|
||||
}}
|
||||
}} catch (err) {{}}
|
||||
}}
|
||||
|
||||
console.log(JSON.stringify(result));
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["node", "--input-type=module", "-e", js],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return json.loads(completed.stdout)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def field_type(name, tag_name, attrs):
|
||||
lower = name.lower()
|
||||
|
||||
if tag_name.lower() == "textarea":
|
||||
return "textarea"
|
||||
|
||||
html_type = attrs.get("type", "").lower()
|
||||
if html_type in {"date", "email", "tel", "number"}:
|
||||
return html_type
|
||||
|
||||
if "date" in lower or lower == "dob":
|
||||
return "date"
|
||||
if "email" in lower:
|
||||
return "email"
|
||||
if "phone" in lower or "fax" in lower:
|
||||
return "tel"
|
||||
if list_name_for_field(name):
|
||||
return "autocomplete"
|
||||
|
||||
return "text"
|
||||
|
||||
|
||||
def list_name_for_field(name):
|
||||
if name == "casePlaintiff":
|
||||
return "plaintiffs"
|
||||
if name == "caseOpposingCounsel":
|
||||
return "opposingCounsel"
|
||||
if name == "caseDivisionJudge":
|
||||
return "judges"
|
||||
if name == "caseFilingAttorney":
|
||||
return "filingAttorneys"
|
||||
if name in {"caseState", "homeState", "client2homeState"}:
|
||||
return "states"
|
||||
if name == "caseDesignation":
|
||||
return "caseDesignations"
|
||||
if re.fullmatch(r"debtCollector\d+Name", name):
|
||||
return "debtCollectors"
|
||||
return None
|
||||
|
||||
|
||||
def section_for(name):
|
||||
lower = name.lower()
|
||||
|
||||
if lower.startswith("client2"):
|
||||
return "Client 2 Information"
|
||||
if lower.startswith("client") or lower in {
|
||||
"ssn", "dob", "alias", "email",
|
||||
"homeaddress", "homecity", "homestate", "homezip", "homecounty",
|
||||
"homephone", "cellphone"
|
||||
}:
|
||||
return "Client Information"
|
||||
if lower.startswith("case"):
|
||||
return "Case Information"
|
||||
if lower.startswith("disco"):
|
||||
return "Discovery Information"
|
||||
if lower.startswith("settlement"):
|
||||
return "Settlement Information"
|
||||
if lower.startswith("installment") or lower.startswith("fee") or lower in {
|
||||
"nameoncard", "cardnumber", "securitycode", "expiration",
|
||||
"billingaddress", "billingzip"
|
||||
}:
|
||||
return "Fee / Payment Information"
|
||||
if lower.startswith("debtcollector") or name == "numCollectors":
|
||||
return "Debt Collector Information"
|
||||
if lower == "notes":
|
||||
return "Notes"
|
||||
|
||||
return "Other Fields"
|
||||
|
||||
|
||||
def discover_templates():
|
||||
templates = []
|
||||
|
||||
for path in sorted(TEMPLATES_OUT.rglob("*.docx")):
|
||||
rel = path.relative_to(Path("tools/doc_generator/content/templates")).as_posix()
|
||||
template_id = re.sub(r"[^a-zA-Z0-9]+", "_", path.stem).strip("_").lower()
|
||||
|
||||
label = path.relative_to(TEMPLATES_OUT).as_posix()
|
||||
label = label.replace(".docx", "")
|
||||
label = label.replace("/", " / ")
|
||||
label = label.replace("_", " ")
|
||||
|
||||
templates.append({
|
||||
"id": template_id,
|
||||
"label": label,
|
||||
"template": rel,
|
||||
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
|
||||
})
|
||||
|
||||
return templates
|
||||
|
||||
|
||||
html = OLD_HTML.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
labels = {
|
||||
field_id: clean_html_label(label)
|
||||
for field_id, label in LABEL_RE.findall(html)
|
||||
}
|
||||
|
||||
fields_seen = []
|
||||
field_meta = {}
|
||||
|
||||
for match in TAG_RE.finditer(html):
|
||||
tag_name = match.group(1)
|
||||
tag = match.group(0)
|
||||
attrs = attrs_from_tag(tag)
|
||||
|
||||
name = attrs.get("name") or attrs.get("id")
|
||||
if should_exclude(name):
|
||||
continue
|
||||
|
||||
if name not in fields_seen:
|
||||
fields_seen.append(name)
|
||||
field_meta[name] = (tag_name, attrs)
|
||||
|
||||
grouped = {}
|
||||
|
||||
for name in fields_seen:
|
||||
tag_name, attrs = field_meta[name]
|
||||
ftype = field_type(name, tag_name, attrs)
|
||||
|
||||
field = {
|
||||
"name": name,
|
||||
"label": labels.get(name) or nice_label(name),
|
||||
"type": ftype,
|
||||
"required": False
|
||||
}
|
||||
|
||||
list_name = list_name_for_field(name)
|
||||
if list_name:
|
||||
field["list"] = list_name
|
||||
|
||||
grouped.setdefault(section_for(name), []).append(field)
|
||||
|
||||
preferred_order = [
|
||||
"Client Information",
|
||||
"Client 2 Information",
|
||||
"Case Information",
|
||||
"Discovery Information",
|
||||
"Settlement Information",
|
||||
"Fee / Payment Information",
|
||||
"Debt Collector Information",
|
||||
"Notes",
|
||||
"Other Fields",
|
||||
]
|
||||
|
||||
sections = []
|
||||
|
||||
for heading in preferred_order:
|
||||
fields = grouped.get(heading)
|
||||
if not fields:
|
||||
continue
|
||||
|
||||
sections.append({
|
||||
"heading": heading,
|
||||
"collapsible": heading not in {"Client Information", "Case Information"},
|
||||
"defaultOpen": heading in {"Client Information", "Case Information"},
|
||||
"fields": fields
|
||||
})
|
||||
|
||||
lists_raw = run_node_list_extractor()
|
||||
|
||||
lists = {
|
||||
"plaintiffs": lists_raw.get("casePlaintiffInfo", []),
|
||||
"opposingCounsel": lists_raw.get("caseOpposingCounselInfo", []) or lists_raw.get("opposingCounselInfo", []),
|
||||
"judges": lists_raw.get("judgeInfo", []),
|
||||
"filingAttorneys": lists_raw.get("caseFilingAttorneyInfo", []) or lists_raw.get("filingAttorneyInfo", []),
|
||||
"debtCollectors": lists_raw.get("debtCollectorInfo", []),
|
||||
"states": ["MO", "KS"],
|
||||
"caseDesignations": [
|
||||
"Associate Circuit",
|
||||
"Circuit",
|
||||
"Limited Actions",
|
||||
"Small Claims"
|
||||
]
|
||||
}
|
||||
|
||||
templates = discover_templates()
|
||||
|
||||
profile = {
|
||||
"id": "legal_profile",
|
||||
"name": "Legal Profile",
|
||||
"description": "Consumer debt defense legal profile based on the legacy app form fields. Additional template fields are calculated at generation time.",
|
||||
"template": templates[0]["template"] if templates else "legacy/Canned-Emails.docx",
|
||||
"outputFilename": "legal_{caseNumber}_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
|
||||
"lists": lists,
|
||||
"templates": templates,
|
||||
"calculations": [
|
||||
{
|
||||
"script": "legacy_legal",
|
||||
"runOn": "generate",
|
||||
"description": "Generate old-template compatible calculated fields.",
|
||||
"outputsDynamic": {
|
||||
"settlementSchedule": {
|
||||
"countField": "settlementInstallmentNo",
|
||||
"indexFormat": "decimal2",
|
||||
"maxCount": 120,
|
||||
"fields": [
|
||||
"settlementPaymentDate",
|
||||
"settlementPaymentAmount",
|
||||
"settlementRemaingBalance",
|
||||
"settlementRemainingBalance"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"sections": sections
|
||||
}
|
||||
|
||||
OUT_PROFILE.write_text(json.dumps(profile, indent=2), encoding="utf-8")
|
||||
|
||||
print(f"Wrote {OUT_PROFILE}")
|
||||
print(f"Visible HTML fields: {len(fields_seen)}")
|
||||
for section in sections:
|
||||
print(f"- {section['heading']}: {len(section['fields'])}")
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
MAP_FILE = Path("tools/doc_generator/content/excel_maps/legacy_excel_maps.json")
|
||||
|
||||
NS = {
|
||||
"main": "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
|
||||
"rel": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
||||
}
|
||||
|
||||
|
||||
def load_map(map_id):
|
||||
data = json.loads(MAP_FILE.read_text(encoding="utf-8"))
|
||||
for item in data["maps"]:
|
||||
if item["id"] == map_id:
|
||||
return item
|
||||
raise SystemExit(f"Map not found: {map_id}")
|
||||
|
||||
|
||||
def col_row(cell):
|
||||
col = "".join(ch for ch in cell if ch.isalpha())
|
||||
row = "".join(ch for ch in cell if ch.isdigit())
|
||||
return col, int(row)
|
||||
|
||||
|
||||
def shared_strings(z):
|
||||
try:
|
||||
xml = z.read("xl/sharedStrings.xml")
|
||||
except KeyError:
|
||||
return []
|
||||
|
||||
root = ET.fromstring(xml)
|
||||
values = []
|
||||
|
||||
for si in root.findall("main:si", NS):
|
||||
parts = []
|
||||
for t in si.findall(".//main:t", NS):
|
||||
parts.append(t.text or "")
|
||||
values.append("".join(parts))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def read_xlsx_cells(path):
|
||||
values = {}
|
||||
|
||||
with zipfile.ZipFile(path) as z:
|
||||
strings = shared_strings(z)
|
||||
|
||||
# MVP: first worksheet only.
|
||||
sheet_xml = z.read("xl/worksheets/sheet1.xml")
|
||||
root = ET.fromstring(sheet_xml)
|
||||
|
||||
for cell in root.findall(".//main:c", NS):
|
||||
ref = cell.attrib.get("r")
|
||||
cell_type = cell.attrib.get("t")
|
||||
v = cell.find("main:v", NS)
|
||||
|
||||
if not ref or v is None:
|
||||
continue
|
||||
|
||||
raw = v.text or ""
|
||||
|
||||
if cell_type == "s":
|
||||
try:
|
||||
values[ref] = strings[int(raw)]
|
||||
except Exception:
|
||||
values[ref] = raw
|
||||
else:
|
||||
values[ref] = raw
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def export_csv(map_id, xlsx_path, csv_path):
|
||||
mapping = load_map(map_id)
|
||||
cells = read_xlsx_cells(xlsx_path)
|
||||
|
||||
row = {}
|
||||
for field, cell in mapping["fields"].items():
|
||||
row[field] = cells.get(cell, "")
|
||||
|
||||
with open(csv_path, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=list(mapping["fields"].keys()))
|
||||
writer.writeheader()
|
||||
writer.writerow(row)
|
||||
|
||||
print(f"Exported {csv_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Export legacy Excel workbook cells to new app CSV datafile.")
|
||||
parser.add_argument("map_id", help="Map id from legacy_excel_maps.json")
|
||||
parser.add_argument("xlsx", help="Legacy Excel workbook to read")
|
||||
parser.add_argument("csv", help="CSV datafile to write")
|
||||
args = parser.parse_args()
|
||||
|
||||
export_csv(args.map_id, Path(args.xlsx), Path(args.csv))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
PROFILE = Path("tools/doc_generator/content/document_types/legal_profile.json")
|
||||
TEMPLATES_ROOT = Path("tools/doc_generator/content/templates")
|
||||
LEGACY_ROOT = TEMPLATES_ROOT / "legacy"
|
||||
|
||||
CATEGORY_RULES = [
|
||||
("discovery", ["disco", "discovery", "interrog", "request-for-production", "rfp", "admission"]),
|
||||
("answers", ["answer", "entry-of-appearance"]),
|
||||
("settlement", ["settlement", "stip", "payment"]),
|
||||
("client", ["client", "engagement", "fee", "contract"]),
|
||||
("motions", ["motion", "dismiss", "compel", "summary"]),
|
||||
("letters", ["letter", "email", "canned"]),
|
||||
("pleadings", ["petition", "complaint", "counterclaim"]),
|
||||
]
|
||||
|
||||
|
||||
def title_case(value):
|
||||
value = value.replace("_", " ").replace("-", " ")
|
||||
value = re.sub(r"\s+", " ", value).strip()
|
||||
|
||||
replacements = {
|
||||
"disco": "discovery",
|
||||
"rfp": "request for production",
|
||||
"cos": "certificate of service",
|
||||
"oc": "opposing counsel",
|
||||
"atty": "attorney",
|
||||
"mo": "Missouri",
|
||||
"ks": "Kansas",
|
||||
}
|
||||
|
||||
words = []
|
||||
for word in value.split():
|
||||
lower = word.lower()
|
||||
words.append(replacements.get(lower, lower))
|
||||
|
||||
return " ".join(words)
|
||||
|
||||
|
||||
def slug(value):
|
||||
value = title_case(value).lower()
|
||||
value = re.sub(r"[^a-z0-9]+", "_", value)
|
||||
return value.strip("_") or "template"
|
||||
|
||||
|
||||
def category_for(relative_path):
|
||||
text = relative_path.as_posix().lower()
|
||||
for category, needles in CATEGORY_RULES:
|
||||
if any(needle in text for needle in needles):
|
||||
return category
|
||||
return "general"
|
||||
|
||||
|
||||
def label_for(path):
|
||||
rel = path.relative_to(LEGACY_ROOT)
|
||||
parts = list(rel.parts)
|
||||
parts[-1] = Path(parts[-1]).stem
|
||||
|
||||
clean_parts = [title_case(part) for part in parts]
|
||||
return " / ".join(clean_parts)
|
||||
|
||||
|
||||
def main():
|
||||
data = json.loads(PROFILE.read_text(encoding="utf-8"))
|
||||
|
||||
templates = []
|
||||
used_ids = set()
|
||||
|
||||
for path in sorted(LEGACY_ROOT.rglob("*.docx")):
|
||||
rel_from_templates = path.relative_to(TEMPLATES_ROOT).as_posix()
|
||||
rel_from_legacy = path.relative_to(LEGACY_ROOT)
|
||||
|
||||
category = category_for(rel_from_legacy)
|
||||
base_id = f"{category}_{slug(rel_from_legacy.with_suffix('').as_posix())}"
|
||||
template_id = base_id
|
||||
|
||||
n = 2
|
||||
while template_id in used_ids:
|
||||
template_id = f"{base_id}_{n}"
|
||||
n += 1
|
||||
|
||||
used_ids.add(template_id)
|
||||
|
||||
templates.append({
|
||||
"id": template_id,
|
||||
"category": category,
|
||||
"label": label_for(path),
|
||||
"template": rel_from_templates,
|
||||
"outputFilename": f"{template_id}_{{caseNumber}}_{{timestamp_YYYY-MM-DD_HH-mm-ss}}.docx"
|
||||
})
|
||||
|
||||
templates.sort(key=lambda item: (item["category"], item["label"]))
|
||||
|
||||
data["templates"] = templates
|
||||
|
||||
if templates:
|
||||
data["defaultTemplateId"] = templates[0]["id"]
|
||||
data["template"] = templates[0]["template"]
|
||||
|
||||
PROFILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
|
||||
print(f"Updated {PROFILE}")
|
||||
print(f"Templates: {len(templates)}")
|
||||
for category in sorted({item["category"] for item in templates}):
|
||||
count = sum(1 for item in templates if item["category"] == category)
|
||||
print(f"- {category}: {count}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,251 @@
|
|||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from docx import Document
|
||||
except Exception:
|
||||
Document = None
|
||||
|
||||
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
|
||||
OUT_DIR = Path("diagnostics")
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
PLACEHOLDER_RE = re.compile(r"\{([A-Za-z0-9_:\-]+)\}")
|
||||
|
||||
|
||||
def read_text(path):
|
||||
try:
|
||||
return path.read_text(encoding="utf-8", errors="ignore")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def find_placeholders_in_text(text):
|
||||
return sorted(set(PLACEHOLDER_RE.findall(text)))
|
||||
|
||||
|
||||
def find_placeholders_in_docx(path):
|
||||
if Document is None:
|
||||
return []
|
||||
|
||||
found = set()
|
||||
try:
|
||||
doc = Document(path)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def scan_paragraphs(paragraphs):
|
||||
for p in paragraphs:
|
||||
found.update(find_placeholders_in_text(p.text))
|
||||
|
||||
def scan_table(table):
|
||||
for row in table.rows:
|
||||
for cell in row.cells:
|
||||
scan_paragraphs(cell.paragraphs)
|
||||
for nested in cell.tables:
|
||||
scan_table(nested)
|
||||
|
||||
scan_paragraphs(doc.paragraphs)
|
||||
for table in doc.tables:
|
||||
scan_table(table)
|
||||
|
||||
return sorted(found)
|
||||
|
||||
|
||||
def categorize_field(name):
|
||||
lower = name.lower()
|
||||
|
||||
if lower.startswith("client2"):
|
||||
return "Client 2 Information"
|
||||
if lower.startswith("client") or lower in {"dob", "ssn", "ssnlastfour", "alias", "email"}:
|
||||
return "Client Information"
|
||||
if lower.startswith("case"):
|
||||
return "Case Information"
|
||||
if lower.startswith("settlement"):
|
||||
return "Settlement Information"
|
||||
if lower.startswith("installment") or lower.startswith("fee") or lower in {"nameoncard", "cardnumber", "securitycode", "expiration", "billingaddress", "billingzip"}:
|
||||
return "Fee / Payment Information"
|
||||
if lower.startswith("debtcollector"):
|
||||
return "Debt Collector Information"
|
||||
if lower.startswith("disco"):
|
||||
return "Discovery Information"
|
||||
if lower in {"today", "currentdate", "currentdatemm-dd-yyyy"}:
|
||||
return "Date Fields"
|
||||
if lower == "notes":
|
||||
return "Notes"
|
||||
|
||||
return "Other Fields"
|
||||
|
||||
|
||||
def field_type(name):
|
||||
lower = name.lower()
|
||||
if "notes" in lower or "appearanceinfo" in lower or "paymentoptions" in lower:
|
||||
return "textarea"
|
||||
if "date" in lower or lower in {"dob"}:
|
||||
return "date"
|
||||
if "email" in lower:
|
||||
return "email"
|
||||
if "phone" in lower or "fax" in lower:
|
||||
return "tel"
|
||||
return "text"
|
||||
|
||||
|
||||
def make_sections(fields):
|
||||
grouped = {}
|
||||
for name in fields:
|
||||
grouped.setdefault(categorize_field(name), []).append(name)
|
||||
|
||||
preferred_order = [
|
||||
"Date Fields",
|
||||
"Client Information",
|
||||
"Client 2 Information",
|
||||
"Case Information",
|
||||
"Discovery Information",
|
||||
"Settlement Information",
|
||||
"Fee / Payment Information",
|
||||
"Debt Collector Information",
|
||||
"Notes",
|
||||
"Other Fields",
|
||||
]
|
||||
|
||||
sections = []
|
||||
for heading in preferred_order:
|
||||
names = grouped.get(heading)
|
||||
if not names:
|
||||
continue
|
||||
|
||||
sections.append({
|
||||
"heading": heading,
|
||||
"collapsible": heading not in {"Client Information", "Case Information"},
|
||||
"defaultOpen": heading in {"Client Information", "Case Information"},
|
||||
"fields": [
|
||||
{
|
||||
"name": name,
|
||||
"label": re.sub(r"([a-z])([A-Z])", r"\1 \2", name).replace("_", " ").strip().title(),
|
||||
"type": field_type(name),
|
||||
"required": False
|
||||
}
|
||||
for name in sorted(names)
|
||||
]
|
||||
})
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
js_files = sorted(OLD_APP.rglob("*.js"))
|
||||
html_files = sorted(OLD_APP.rglob("*.html"))
|
||||
css_files = sorted(OLD_APP.rglob("*.css"))
|
||||
docx_files = sorted(OLD_APP.rglob("*.docx"))
|
||||
xlsx_files = sorted(OLD_APP.rglob("*.xlsx"))
|
||||
|
||||
all_text_placeholders = set()
|
||||
function_hits = []
|
||||
|
||||
function_terms = {
|
||||
"DOCX generation": ["docx", "Docxtemplater", "generateDocument", "generateDoc"],
|
||||
"Excel generation": ["xlsx", "generateExcel", "template.xlsx"],
|
||||
"vCard generation": ["vcard", "vCard", "BEGIN:VCARD"],
|
||||
"Calendar / ICS generation": ["ics", "BEGIN:VCALENDAR", "VEVENT"],
|
||||
"Client folder generation": ["generateClientFolder", "client folder"],
|
||||
"Settlement calculations": ["settlementPayment", "settlementInstallment", "remainingBalance"],
|
||||
}
|
||||
|
||||
for path in js_files + html_files:
|
||||
text = read_text(path)
|
||||
all_text_placeholders.update(find_placeholders_in_text(text))
|
||||
|
||||
for label, terms in function_terms.items():
|
||||
if any(term in text for term in terms):
|
||||
function_hits.append((label, str(path.relative_to(OLD_APP))))
|
||||
|
||||
template_rows = []
|
||||
all_template_placeholders = set()
|
||||
|
||||
for path in docx_files:
|
||||
placeholders = find_placeholders_in_docx(path)
|
||||
all_template_placeholders.update(placeholders)
|
||||
template_rows.append({
|
||||
"template": str(path.relative_to(OLD_APP)),
|
||||
"placeholder_count": len(placeholders),
|
||||
"placeholders": placeholders,
|
||||
})
|
||||
|
||||
all_fields = sorted(all_text_placeholders | all_template_placeholders)
|
||||
|
||||
profile = {
|
||||
"id": "legacy_word_doc_generator",
|
||||
"name": "Legacy Word Doc Generator Profile",
|
||||
"description": "Draft profile generated from the legacy word-doc-generator app.",
|
||||
"template": "REPLACE_WITH_SELECTED_TEMPLATE.docx",
|
||||
"outputFilename": "legacy_document_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
|
||||
"sourceApp": str(OLD_APP),
|
||||
"sections": make_sections(all_fields),
|
||||
"legacyFeatures": sorted(set(label for label, _ in function_hits)),
|
||||
"templatesFound": template_rows,
|
||||
}
|
||||
|
||||
profile_path = OUT_DIR / "legacy_word_doc_generator_profile_draft.json"
|
||||
profile_path.write_text(json.dumps(profile, indent=2), encoding="utf-8")
|
||||
|
||||
report = []
|
||||
report.append("# Legacy Word Doc Generator Review")
|
||||
report.append("")
|
||||
report.append(f"Source app: `{OLD_APP}`")
|
||||
report.append("")
|
||||
report.append("## Files Found")
|
||||
report.append("")
|
||||
report.append(f"- JS files: {len(js_files)}")
|
||||
report.append(f"- HTML files: {len(html_files)}")
|
||||
report.append(f"- CSS files: {len(css_files)}")
|
||||
report.append(f"- DOCX templates: {len(docx_files)}")
|
||||
report.append(f"- XLSX files: {len(xlsx_files)}")
|
||||
report.append("")
|
||||
report.append("## Legacy Features Detected")
|
||||
report.append("")
|
||||
|
||||
if function_hits:
|
||||
seen = set()
|
||||
for label, rel in function_hits:
|
||||
key = (label, rel)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
report.append(f"- {label}: `{rel}`")
|
||||
else:
|
||||
report.append("- No major legacy feature signatures detected.")
|
||||
|
||||
report.append("")
|
||||
report.append("## Templates Found")
|
||||
report.append("")
|
||||
|
||||
if template_rows:
|
||||
for row in template_rows:
|
||||
report.append(f"### `{row['template']}`")
|
||||
report.append(f"- Placeholder count: {row['placeholder_count']}")
|
||||
if row["placeholders"]:
|
||||
report.append("- Placeholders:")
|
||||
for name in row["placeholders"]:
|
||||
report.append(f" - `{{{name}}}`")
|
||||
report.append("")
|
||||
else:
|
||||
report.append("- No DOCX templates found.")
|
||||
|
||||
report.append("")
|
||||
report.append("## All Fields Detected")
|
||||
report.append("")
|
||||
for name in all_fields:
|
||||
report.append(f"- `{{{name}}}`")
|
||||
|
||||
report.append("")
|
||||
report.append("## Draft Profile")
|
||||
report.append("")
|
||||
report.append(f"Generated: `{profile_path}`")
|
||||
report.append("")
|
||||
|
||||
report_path = OUT_DIR / "legacy_word_doc_generator_review.md"
|
||||
report_path.write_text("\n".join(report), encoding="utf-8")
|
||||
|
||||
print(f"Wrote {report_path}")
|
||||
print(f"Wrote {profile_path}")
|
||||
print(f"Detected {len(all_fields)} unique fields/placeholders")
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
tools/doc_generator/content/templates/legacy/Discovery/BlittKS-disco - Copy.docx
Executable file
BIN
tools/doc_generator/content/templates/legacy/Discovery/BlittKS-disco - Copy.docx
Executable file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
tools/doc_generator/content/templates/legacy/Discovery/Burns-Walsh-KS-DB-CC.docx
Executable file
BIN
tools/doc_generator/content/templates/legacy/Discovery/Burns-Walsh-KS-DB-CC.docx
Executable file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
tools/doc_generator/content/templates/legacy/Discovery/Southlaw-MO-OC-disco.docx
Executable file
BIN
tools/doc_generator/content/templates/legacy/Discovery/Southlaw-MO-OC-disco.docx
Executable file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue