utility-app/scripts/legacy-excel-datafile.py

107 lines
2.7 KiB
Python

import argparse
import csv
import json
import zipfile
import xml.etree.ElementTree as ET
from pathlib import Path
MAP_FILE = Path("tools/doc_generator/content/excel_maps/legacy_excel_maps.json")
NS = {
"main": "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
"rel": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
}
def load_map(map_id):
data = json.loads(MAP_FILE.read_text(encoding="utf-8"))
for item in data["maps"]:
if item["id"] == map_id:
return item
raise SystemExit(f"Map not found: {map_id}")
def col_row(cell):
col = "".join(ch for ch in cell if ch.isalpha())
row = "".join(ch for ch in cell if ch.isdigit())
return col, int(row)
def shared_strings(z):
try:
xml = z.read("xl/sharedStrings.xml")
except KeyError:
return []
root = ET.fromstring(xml)
values = []
for si in root.findall("main:si", NS):
parts = []
for t in si.findall(".//main:t", NS):
parts.append(t.text or "")
values.append("".join(parts))
return values
def read_xlsx_cells(path):
values = {}
with zipfile.ZipFile(path) as z:
strings = shared_strings(z)
# MVP: first worksheet only.
sheet_xml = z.read("xl/worksheets/sheet1.xml")
root = ET.fromstring(sheet_xml)
for cell in root.findall(".//main:c", NS):
ref = cell.attrib.get("r")
cell_type = cell.attrib.get("t")
v = cell.find("main:v", NS)
if not ref or v is None:
continue
raw = v.text or ""
if cell_type == "s":
try:
values[ref] = strings[int(raw)]
except Exception:
values[ref] = raw
else:
values[ref] = raw
return values
def export_csv(map_id, xlsx_path, csv_path):
mapping = load_map(map_id)
cells = read_xlsx_cells(xlsx_path)
row = {}
for field, cell in mapping["fields"].items():
row[field] = cells.get(cell, "")
with open(csv_path, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=list(mapping["fields"].keys()))
writer.writeheader()
writer.writerow(row)
print(f"Exported {csv_path}")
def main():
parser = argparse.ArgumentParser(description="Export legacy Excel workbook cells to new app CSV datafile.")
parser.add_argument("map_id", help="Map id from legacy_excel_maps.json")
parser.add_argument("xlsx", help="Legacy Excel workbook to read")
parser.add_argument("csv", help="CSV datafile to write")
args = parser.parse_args()
export_csv(args.map_id, Path(args.xlsx), Path(args.csv))
if __name__ == "__main__":
main()