107 lines
2.7 KiB
Python
107 lines
2.7 KiB
Python
import argparse
|
|
import csv
|
|
import json
|
|
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
from pathlib import Path
|
|
|
|
MAP_FILE = Path("tools/doc_generator/content/excel_maps/legacy_excel_maps.json")
|
|
|
|
NS = {
|
|
"main": "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
|
|
"rel": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
|
}
|
|
|
|
|
|
def load_map(map_id):
|
|
data = json.loads(MAP_FILE.read_text(encoding="utf-8"))
|
|
for item in data["maps"]:
|
|
if item["id"] == map_id:
|
|
return item
|
|
raise SystemExit(f"Map not found: {map_id}")
|
|
|
|
|
|
def col_row(cell):
|
|
col = "".join(ch for ch in cell if ch.isalpha())
|
|
row = "".join(ch for ch in cell if ch.isdigit())
|
|
return col, int(row)
|
|
|
|
|
|
def shared_strings(z):
|
|
try:
|
|
xml = z.read("xl/sharedStrings.xml")
|
|
except KeyError:
|
|
return []
|
|
|
|
root = ET.fromstring(xml)
|
|
values = []
|
|
|
|
for si in root.findall("main:si", NS):
|
|
parts = []
|
|
for t in si.findall(".//main:t", NS):
|
|
parts.append(t.text or "")
|
|
values.append("".join(parts))
|
|
|
|
return values
|
|
|
|
|
|
def read_xlsx_cells(path):
|
|
values = {}
|
|
|
|
with zipfile.ZipFile(path) as z:
|
|
strings = shared_strings(z)
|
|
|
|
# MVP: first worksheet only.
|
|
sheet_xml = z.read("xl/worksheets/sheet1.xml")
|
|
root = ET.fromstring(sheet_xml)
|
|
|
|
for cell in root.findall(".//main:c", NS):
|
|
ref = cell.attrib.get("r")
|
|
cell_type = cell.attrib.get("t")
|
|
v = cell.find("main:v", NS)
|
|
|
|
if not ref or v is None:
|
|
continue
|
|
|
|
raw = v.text or ""
|
|
|
|
if cell_type == "s":
|
|
try:
|
|
values[ref] = strings[int(raw)]
|
|
except Exception:
|
|
values[ref] = raw
|
|
else:
|
|
values[ref] = raw
|
|
|
|
return values
|
|
|
|
|
|
def export_csv(map_id, xlsx_path, csv_path):
|
|
mapping = load_map(map_id)
|
|
cells = read_xlsx_cells(xlsx_path)
|
|
|
|
row = {}
|
|
for field, cell in mapping["fields"].items():
|
|
row[field] = cells.get(cell, "")
|
|
|
|
with open(csv_path, "w", newline="", encoding="utf-8") as f:
|
|
writer = csv.DictWriter(f, fieldnames=list(mapping["fields"].keys()))
|
|
writer.writeheader()
|
|
writer.writerow(row)
|
|
|
|
print(f"Exported {csv_path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Export legacy Excel workbook cells to new app CSV datafile.")
|
|
parser.add_argument("map_id", help="Map id from legacy_excel_maps.json")
|
|
parser.add_argument("xlsx", help="Legacy Excel workbook to read")
|
|
parser.add_argument("csv", help="CSV datafile to write")
|
|
args = parser.parse_args()
|
|
|
|
export_csv(args.map_id, Path(args.xlsx), Path(args.csv))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|