252 lines
7.5 KiB
Python
252 lines
7.5 KiB
Python
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from docx import Document
|
|
except Exception:
|
|
Document = None
|
|
|
|
OLD_APP = Path("/mnt/storage/sftp/mcelwain/repository/word-doc-generator")
|
|
OUT_DIR = Path("diagnostics")
|
|
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
PLACEHOLDER_RE = re.compile(r"\{([A-Za-z0-9_:\-]+)\}")
|
|
|
|
|
|
def read_text(path):
|
|
try:
|
|
return path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def find_placeholders_in_text(text):
|
|
return sorted(set(PLACEHOLDER_RE.findall(text)))
|
|
|
|
|
|
def find_placeholders_in_docx(path):
|
|
if Document is None:
|
|
return []
|
|
|
|
found = set()
|
|
try:
|
|
doc = Document(path)
|
|
except Exception:
|
|
return []
|
|
|
|
def scan_paragraphs(paragraphs):
|
|
for p in paragraphs:
|
|
found.update(find_placeholders_in_text(p.text))
|
|
|
|
def scan_table(table):
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
scan_paragraphs(cell.paragraphs)
|
|
for nested in cell.tables:
|
|
scan_table(nested)
|
|
|
|
scan_paragraphs(doc.paragraphs)
|
|
for table in doc.tables:
|
|
scan_table(table)
|
|
|
|
return sorted(found)
|
|
|
|
|
|
def categorize_field(name):
|
|
lower = name.lower()
|
|
|
|
if lower.startswith("client2"):
|
|
return "Client 2 Information"
|
|
if lower.startswith("client") or lower in {"dob", "ssn", "ssnlastfour", "alias", "email"}:
|
|
return "Client Information"
|
|
if lower.startswith("case"):
|
|
return "Case Information"
|
|
if lower.startswith("settlement"):
|
|
return "Settlement Information"
|
|
if lower.startswith("installment") or lower.startswith("fee") or lower in {"nameoncard", "cardnumber", "securitycode", "expiration", "billingaddress", "billingzip"}:
|
|
return "Fee / Payment Information"
|
|
if lower.startswith("debtcollector"):
|
|
return "Debt Collector Information"
|
|
if lower.startswith("disco"):
|
|
return "Discovery Information"
|
|
if lower in {"today", "currentdate", "currentdatemm-dd-yyyy"}:
|
|
return "Date Fields"
|
|
if lower == "notes":
|
|
return "Notes"
|
|
|
|
return "Other Fields"
|
|
|
|
|
|
def field_type(name):
|
|
lower = name.lower()
|
|
if "notes" in lower or "appearanceinfo" in lower or "paymentoptions" in lower:
|
|
return "textarea"
|
|
if "date" in lower or lower in {"dob"}:
|
|
return "date"
|
|
if "email" in lower:
|
|
return "email"
|
|
if "phone" in lower or "fax" in lower:
|
|
return "tel"
|
|
return "text"
|
|
|
|
|
|
def make_sections(fields):
|
|
grouped = {}
|
|
for name in fields:
|
|
grouped.setdefault(categorize_field(name), []).append(name)
|
|
|
|
preferred_order = [
|
|
"Date Fields",
|
|
"Client Information",
|
|
"Client 2 Information",
|
|
"Case Information",
|
|
"Discovery Information",
|
|
"Settlement Information",
|
|
"Fee / Payment Information",
|
|
"Debt Collector Information",
|
|
"Notes",
|
|
"Other Fields",
|
|
]
|
|
|
|
sections = []
|
|
for heading in preferred_order:
|
|
names = grouped.get(heading)
|
|
if not names:
|
|
continue
|
|
|
|
sections.append({
|
|
"heading": heading,
|
|
"collapsible": heading not in {"Client Information", "Case Information"},
|
|
"defaultOpen": heading in {"Client Information", "Case Information"},
|
|
"fields": [
|
|
{
|
|
"name": name,
|
|
"label": re.sub(r"([a-z])([A-Z])", r"\1 \2", name).replace("_", " ").strip().title(),
|
|
"type": field_type(name),
|
|
"required": False
|
|
}
|
|
for name in sorted(names)
|
|
]
|
|
})
|
|
|
|
return sections
|
|
|
|
|
|
js_files = sorted(OLD_APP.rglob("*.js"))
|
|
html_files = sorted(OLD_APP.rglob("*.html"))
|
|
css_files = sorted(OLD_APP.rglob("*.css"))
|
|
docx_files = sorted(OLD_APP.rglob("*.docx"))
|
|
xlsx_files = sorted(OLD_APP.rglob("*.xlsx"))
|
|
|
|
all_text_placeholders = set()
|
|
function_hits = []
|
|
|
|
function_terms = {
|
|
"DOCX generation": ["docx", "Docxtemplater", "generateDocument", "generateDoc"],
|
|
"Excel generation": ["xlsx", "generateExcel", "template.xlsx"],
|
|
"vCard generation": ["vcard", "vCard", "BEGIN:VCARD"],
|
|
"Calendar / ICS generation": ["ics", "BEGIN:VCALENDAR", "VEVENT"],
|
|
"Client folder generation": ["generateClientFolder", "client folder"],
|
|
"Settlement calculations": ["settlementPayment", "settlementInstallment", "remainingBalance"],
|
|
}
|
|
|
|
for path in js_files + html_files:
|
|
text = read_text(path)
|
|
all_text_placeholders.update(find_placeholders_in_text(text))
|
|
|
|
for label, terms in function_terms.items():
|
|
if any(term in text for term in terms):
|
|
function_hits.append((label, str(path.relative_to(OLD_APP))))
|
|
|
|
template_rows = []
|
|
all_template_placeholders = set()
|
|
|
|
for path in docx_files:
|
|
placeholders = find_placeholders_in_docx(path)
|
|
all_template_placeholders.update(placeholders)
|
|
template_rows.append({
|
|
"template": str(path.relative_to(OLD_APP)),
|
|
"placeholder_count": len(placeholders),
|
|
"placeholders": placeholders,
|
|
})
|
|
|
|
all_fields = sorted(all_text_placeholders | all_template_placeholders)
|
|
|
|
profile = {
|
|
"id": "legacy_word_doc_generator",
|
|
"name": "Legacy Word Doc Generator Profile",
|
|
"description": "Draft profile generated from the legacy word-doc-generator app.",
|
|
"template": "REPLACE_WITH_SELECTED_TEMPLATE.docx",
|
|
"outputFilename": "legacy_document_{timestamp_YYYY-MM-DD_HH-mm-ss}.docx",
|
|
"sourceApp": str(OLD_APP),
|
|
"sections": make_sections(all_fields),
|
|
"legacyFeatures": sorted(set(label for label, _ in function_hits)),
|
|
"templatesFound": template_rows,
|
|
}
|
|
|
|
profile_path = OUT_DIR / "legacy_word_doc_generator_profile_draft.json"
|
|
profile_path.write_text(json.dumps(profile, indent=2), encoding="utf-8")
|
|
|
|
report = []
|
|
report.append("# Legacy Word Doc Generator Review")
|
|
report.append("")
|
|
report.append(f"Source app: `{OLD_APP}`")
|
|
report.append("")
|
|
report.append("## Files Found")
|
|
report.append("")
|
|
report.append(f"- JS files: {len(js_files)}")
|
|
report.append(f"- HTML files: {len(html_files)}")
|
|
report.append(f"- CSS files: {len(css_files)}")
|
|
report.append(f"- DOCX templates: {len(docx_files)}")
|
|
report.append(f"- XLSX files: {len(xlsx_files)}")
|
|
report.append("")
|
|
report.append("## Legacy Features Detected")
|
|
report.append("")
|
|
|
|
if function_hits:
|
|
seen = set()
|
|
for label, rel in function_hits:
|
|
key = (label, rel)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
report.append(f"- {label}: `{rel}`")
|
|
else:
|
|
report.append("- No major legacy feature signatures detected.")
|
|
|
|
report.append("")
|
|
report.append("## Templates Found")
|
|
report.append("")
|
|
|
|
if template_rows:
|
|
for row in template_rows:
|
|
report.append(f"### `{row['template']}`")
|
|
report.append(f"- Placeholder count: {row['placeholder_count']}")
|
|
if row["placeholders"]:
|
|
report.append("- Placeholders:")
|
|
for name in row["placeholders"]:
|
|
report.append(f" - `{{{name}}}`")
|
|
report.append("")
|
|
else:
|
|
report.append("- No DOCX templates found.")
|
|
|
|
report.append("")
|
|
report.append("## All Fields Detected")
|
|
report.append("")
|
|
for name in all_fields:
|
|
report.append(f"- `{{{name}}}`")
|
|
|
|
report.append("")
|
|
report.append("## Draft Profile")
|
|
report.append("")
|
|
report.append(f"Generated: `{profile_path}`")
|
|
report.append("")
|
|
|
|
report_path = OUT_DIR / "legacy_word_doc_generator_review.md"
|
|
report_path.write_text("\n".join(report), encoding="utf-8")
|
|
|
|
print(f"Wrote {report_path}")
|
|
print(f"Wrote {profile_path}")
|
|
print(f"Detected {len(all_fields)} unique fields/placeholders")
|