r"""
MS Project XML -> Watchdog-TPB JSON Konverter

Liest eine MS-Project-XML (MSPDI) und schreibt eine gefilterte JSON im
Format des bestehenden terminprogramm_data.json - inkl. Bonus-Feldern
(critical, milestone, outline_level, predecessors), die die TPB-Extension
fuer kritischen Pfad und Abhaengigkeits-Pfeile auswerten kann.

Aufruf:
  python mpp_xml_to_tpb.py "Pfad\zur\Datei.xml" [--von YYYY-MM-DD] [--bis YYYY-MM-DD]
                          [--levels 3-5] [--out Pfad\zur\out.json]
                          [--include-milestones]

Default: Datumsfenster = heute bis +28 Tage, OutlineLevels 3-5,
output ins selbe Verzeichnis als <basename>_tpb.json
"""
import sys
import os
import re
import json
import argparse
import xml.etree.ElementTree as ET
from datetime import date, datetime, timedelta


def parse_iso_date(s):
    if not s:
        return None
    return datetime.strptime(s[:10], "%Y-%m-%d").date()


def parse_duration_days(s):
    """ISO-8601 Duration 'PT8H0M0S' -> Tage, basis 8h/Tag"""
    if not s:
        return 0
    m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", s)
    if not m:
        return 0
    h = int(m.group(1) or 0); mi = int(m.group(2) or 0); se = int(m.group(3) or 0)
    total_h = h + mi / 60 + se / 3600
    return round(total_h / 8, 2)


def parse_levels(spec):
    """'3-5' -> [3,4,5], '4' -> [4], '3,5' -> [3,5]"""
    if "-" in spec:
        a, b = spec.split("-", 1)
        return list(range(int(a), int(b) + 1))
    return [int(x) for x in spec.split(",")]


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("xml_path")
    ap.add_argument("--von", help="ISO-Datum, default: heute")
    ap.add_argument("--bis", help="ISO-Datum, default: heute + 28 Tage")
    ap.add_argument("--levels", default="3-5", help="OutlineLevels, z.B. '3-5'")
    ap.add_argument("--out", help="Output-JSON-Pfad")
    ap.add_argument("--include-milestones", action="store_true",
                    help="Meilensteine mit auflisten")
    args = ap.parse_args()

    if not os.path.isfile(args.xml_path):
        print(f"Datei nicht gefunden: {args.xml_path}")
        sys.exit(1)

    von = parse_iso_date(args.von) if args.von else date.today()
    bis = parse_iso_date(args.bis) if args.bis else (date.today() + timedelta(days=28))
    levels = parse_levels(args.levels)

    print(f"Lese: {args.xml_path}")
    print(f"Filter: {von} -> {bis}, Levels {levels}, Milestones={args.include_milestones}")

    tree = ET.parse(args.xml_path)
    root = tree.getroot()
    nsp = "{" + root.tag.split("}")[0].strip("{") + "}" if "}" in root.tag else ""

    proj_title = root.findtext(nsp + "Title", "")

    # Resource-Map
    res_map = {}
    for r in root.findall(nsp + "Resources/" + nsp + "Resource"):
        uid = r.findtext(nsp + "UID", "")
        name = r.findtext(nsp + "Name", "")
        if uid and name:
            res_map[uid] = name

    # Task-zu-Ressource-Map ueber Assignments
    task_res = {}  # TaskUID -> [Resource Names]
    for a in root.findall(nsp + "Assignments/" + nsp + "Assignment"):
        tuid = a.findtext(nsp + "TaskUID", "")
        ruid = a.findtext(nsp + "ResourceUID", "")
        if tuid and ruid in res_map:
            task_res.setdefault(tuid, []).append(res_map[ruid])

    # Tasks linear einlesen
    raw = []
    for t in root.findall(nsp + "Tasks/" + nsp + "Task"):
        raw.append({
            "uid": t.findtext(nsp + "UID", ""),
            "id": t.findtext(nsp + "ID", ""),
            "name": (t.findtext(nsp + "Name", "") or "").strip(),
            "outline_level": int(t.findtext(nsp + "OutlineLevel", "0") or 0),
            "outline_number": t.findtext(nsp + "OutlineNumber", ""),
            "start": t.findtext(nsp + "Start", ""),
            "finish": t.findtext(nsp + "Finish", ""),
            "duration": t.findtext(nsp + "Duration", ""),
            "summary": t.findtext(nsp + "Summary", "0") == "1",
            "milestone": t.findtext(nsp + "Milestone", "0") == "1",
            "critical": t.findtext(nsp + "Critical", "0") == "1",
            "predecessors": [
                {
                    "uid": pl.findtext(nsp + "PredecessorUID", ""),
                    "type": pl.findtext(nsp + "Type", ""),
                    "lag": pl.findtext(nsp + "LinkLag", "0"),
                }
                for pl in t.findall(nsp + "PredecessorLink")
            ],
        })

    # Parent-Map per Stack: walk back to first task with smaller outline_level
    parent_chain = {}  # uid -> [parents from L1...up]
    stack = []  # list of (level, task)
    for t in raw:
        while stack and stack[-1]["outline_level"] >= t["outline_level"]:
            stack.pop()
        parent_chain[t["uid"]] = list(stack)
        stack.append(t)

    def bereich_for(t):
        chain = parent_chain.get(t["uid"], [])
        if not chain:
            return ""
        # Bevorzuge L2 + L3 falls vorhanden, sonst L1
        l2 = next((p["name"] for p in chain if p["outline_level"] == 2), "")
        l3 = next((p["name"] for p in chain if p["outline_level"] == 3), "")
        if l2 and l3:
            return f"{l2} / {l3}"
        return l3 or l2 or chain[-1]["name"]

    # Filter
    out_rows = []
    for t in raw:
        if t["summary"]:
            continue
        if t["milestone"] and not args.include_milestones:
            continue
        if t["outline_level"] not in levels and not (t["milestone"] and args.include_milestones):
            continue
        sd = parse_iso_date(t["start"])
        ed = parse_iso_date(t["finish"])
        if not sd or not ed:
            continue
        # Datumsfenster-Schnitt: Vorgang aktiv im Range
        if ed < von or sd > bis:
            continue
        out_rows.append({
            "nr": int(t["id"]) if t["id"].isdigit() else t["id"],
            "bereich": bereich_for(t),
            "name": t["name"],
            "start": sd.isoformat(),
            "ende": ed.isoformat(),
            "dauer_tage": parse_duration_days(t["duration"]) or (ed - sd).days + 1,
            "ressource": " | ".join(task_res.get(t["uid"], [])),
            "outline_level": t["outline_level"],
            "outline_number": t["outline_number"],
            "milestone": t["milestone"],
            "critical": t["critical"],
            "predecessors": [p["uid"] for p in t["predecessors"]],
        })

    out_rows.sort(key=lambda r: (r["start"], r["bereich"], r["nr"]))

    out = {
        "meta": {
            "projekt": proj_title,
            "quelle": os.path.basename(args.xml_path),
            "stand": date.today().isoformat(),
            "datumsfenster": {"von": von.isoformat(), "bis": bis.isoformat()},
            "outline_levels": levels,
            "include_milestones": args.include_milestones,
        },
        "vorgaenge": out_rows,
    }

    out_path = args.out or os.path.splitext(args.xml_path)[0] + "_tpb.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(out, f, ensure_ascii=False, indent=2)

    print(f"\nGeschrieben: {out_path}")
    print(f"  -> {len(out_rows)} Vorgaenge im Datumsfenster")
    crit = sum(1 for r in out_rows if r.get("critical"))
    ms = sum(1 for r in out_rows if r.get("milestone"))
    print(f"     davon kritischer Pfad: {crit}, Meilensteine: {ms}")
    bereiche = {}
    for r in out_rows:
        bereiche[r["bereich"]] = bereiche.get(r["bereich"], 0) + 1
    print("  Bereiche:")
    for b, n in sorted(bereiche.items(), key=lambda x: -x[1])[:10]:
        print(f"    {n:3d}  {b}")


if __name__ == "__main__":
    main()
