import argparse import csv import json import zipfile import xml.etree.ElementTree as ET from pathlib import Path MAP_FILE = Path("tools/doc_generator/content/excel_maps/legacy_excel_maps.json") NS = { "main": "http://schemas.openxmlformats.org/spreadsheetml/2006/main", "rel": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", } def load_map(map_id): data = json.loads(MAP_FILE.read_text(encoding="utf-8")) for item in data["maps"]: if item["id"] == map_id: return item raise SystemExit(f"Map not found: {map_id}") def col_row(cell): col = "".join(ch for ch in cell if ch.isalpha()) row = "".join(ch for ch in cell if ch.isdigit()) return col, int(row) def shared_strings(z): try: xml = z.read("xl/sharedStrings.xml") except KeyError: return [] root = ET.fromstring(xml) values = [] for si in root.findall("main:si", NS): parts = [] for t in si.findall(".//main:t", NS): parts.append(t.text or "") values.append("".join(parts)) return values def read_xlsx_cells(path): values = {} with zipfile.ZipFile(path) as z: strings = shared_strings(z) # MVP: first worksheet only. sheet_xml = z.read("xl/worksheets/sheet1.xml") root = ET.fromstring(sheet_xml) for cell in root.findall(".//main:c", NS): ref = cell.attrib.get("r") cell_type = cell.attrib.get("t") v = cell.find("main:v", NS) if not ref or v is None: continue raw = v.text or "" if cell_type == "s": try: values[ref] = strings[int(raw)] except Exception: values[ref] = raw else: values[ref] = raw return values def export_csv(map_id, xlsx_path, csv_path): mapping = load_map(map_id) cells = read_xlsx_cells(xlsx_path) row = {} for field, cell in mapping["fields"].items(): row[field] = cells.get(cell, "") with open(csv_path, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=list(mapping["fields"].keys())) writer.writeheader() writer.writerow(row) print(f"Exported {csv_path}") def main(): parser = argparse.ArgumentParser(description="Export legacy Excel workbook cells to new app CSV datafile.") parser.add_argument("map_id", help="Map id from legacy_excel_maps.json") parser.add_argument("xlsx", help="Legacy Excel workbook to read") parser.add_argument("csv", help="CSV datafile to write") args = parser.parse_args() export_csv(args.map_id, Path(args.xlsx), Path(args.csv)) if __name__ == "__main__": main()