#!/usr/bin/env python3
"""
One-shot backfill of team_members + 2026-onward time entries from the
old spreadsheet at .planning/Misc/Time Sheet _ Time Log _ Hours _ Tingang.id _ Adi.xlsx.

Run from /srv/apps/work after the migration has been applied:
    python3 scripts/backfill_time_tracking.py

Idempotent on team_members (upsert by org+email).
On time_entries it BULK INSERTS — if you've already run it and want to
redo, delete the rows from the previous backfill first:
    delete from public.time_entries where import_id is null;
"""

from __future__ import annotations

import datetime as dt
import json
import os
import sys
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path

import openpyxl

# ---------- Config -----------------------------------------------------------

PROJECT_ROOT = Path(__file__).resolve().parent.parent
XLSX_PATH = (
    PROJECT_ROOT
    / ".planning/Misc/Time Sheet _ Time Log _ Hours _ Tingang.id _ Adi.xlsx"
)
ORG_SLUG = "bowden-works"
BACKFILL_CUTOFF = dt.date(2026, 1, 1)
ADI_EMAIL = "info@adipramono.com"
CONSOLIDATED_LABEL = "Tingang"
CHUNK = 500


# ---------- .env loader (no extra deps) --------------------------------------


def load_env(path: Path) -> None:
    if not path.exists():
        return
    for line in path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        k, v = line.split("=", 1)
        os.environ.setdefault(k.strip(), v.strip())


load_env(PROJECT_ROOT / ".env")

SUPABASE_URL = os.environ.get("NEXT_PUBLIC_SUPABASE_URL", "").rstrip("/")
SECRET_KEY = os.environ.get("SUPABASE_SECRET_KEY", "")

if not SUPABASE_URL or "REPLACE_WITH" in SUPABASE_URL:
    print("ERROR: NEXT_PUBLIC_SUPABASE_URL not set in .env", file=sys.stderr)
    sys.exit(1)
if not SECRET_KEY or "REPLACE_WITH" in SECRET_KEY:
    print("ERROR: SUPABASE_SECRET_KEY not set in .env", file=sys.stderr)
    sys.exit(1)


# ---------- Supabase REST client ---------------------------------------------


def supabase_request(
    method: str,
    path: str,
    *,
    body: object | None = None,
    params: dict[str, str] | None = None,
    prefer: str | None = None,
) -> object:
    url = f"{SUPABASE_URL}/rest/v1{path}"
    if params:
        url += "?" + urllib.parse.urlencode(params)
    data = None
    headers = {
        "apikey": SECRET_KEY,
        "Authorization": f"Bearer {SECRET_KEY}",
        "Content-Type": "application/json",
    }
    if prefer:
        headers["Prefer"] = prefer
    if body is not None:
        data = json.dumps(body).encode()
    req = urllib.request.Request(url, data=data, headers=headers, method=method)
    try:
        with urllib.request.urlopen(req) as resp:
            raw = resp.read()
            if not raw:
                return None
            return json.loads(raw)
    except urllib.error.HTTPError as e:
        msg = e.read().decode(errors="replace")
        raise RuntimeError(
            f"{method} {path} failed: {e.code} {msg[:400]}"
        ) from None


# ---------- Cell coercion ----------------------------------------------------


def cell_str(v: object) -> str | None:
    if v is None:
        return None
    if isinstance(v, str):
        v = v.strip()
        return v if v else None
    return str(v).strip() or None


def cell_email(v: object) -> str | None:
    s = cell_str(v)
    return s.lower() if s else None


def cell_bool(v: object) -> bool | None:
    if v is None:
        return None
    if isinstance(v, bool):
        return v
    s = str(v).strip().lower()
    if s in ("yes", "true", "1", "y"):
        return True
    if s in ("no", "false", "0", "n"):
        return False
    return None


def coerce_time(v: object) -> dt.time | None:
    """Excel times in openpyxl come back as datetime.time or datetime.timedelta."""
    if v is None:
        return None
    if isinstance(v, dt.time):
        return v
    if isinstance(v, dt.timedelta):
        total = int(v.total_seconds())
        h = (total // 3600) % 24
        m = (total // 60) % 60
        s = total % 60
        return dt.time(h, m, s)
    if isinstance(v, dt.datetime):
        return v.time()
    return None


def coerce_date(v: object) -> dt.date | None:
    if v is None:
        return None
    if isinstance(v, dt.datetime):
        return v.date()
    if isinstance(v, dt.date):
        return v
    return None


def coerce_seconds(v: object) -> int | None:
    """Duration cells in openpyxl: timedelta or datetime.time."""
    if v is None:
        return None
    if isinstance(v, dt.timedelta):
        return int(round(v.total_seconds()))
    if isinstance(v, dt.time):
        return v.hour * 3600 + v.minute * 60 + v.second
    if isinstance(v, (int, float)):
        # Likely a fraction of a day (Excel's native time storage)
        return int(round(float(v) * 86400))
    return None


def naive_ts(d: dt.date | None, t: dt.time | None) -> str | None:
    if not d or not t:
        return None
    return f"{d.isoformat()} {t.strftime('%H:%M:%S')}"


def derive_name(email: str) -> str:
    """Best-effort: 'firstname.surname@…' -> 'Firstname Surname'."""
    local = email.split("@", 1)[0]
    # Strip trailing digits (e.g. "bayu071002" -> "bayu")
    cleaned = local.rstrip("0123456789")
    # Split on common separators
    parts = []
    for chunk in cleaned.replace("_", ".").split("."):
        if chunk:
            parts.append(chunk.capitalize())
    return " ".join(parts) if parts else local


# ---------- Step 1: org lookup -----------------------------------------------


def get_org_id() -> str:
    rows = supabase_request(
        "GET",
        "/organizations",
        params={"slug": f"eq.{ORG_SLUG}", "select": "id"},
    )
    if not rows or not isinstance(rows, list):
        raise RuntimeError(f"Organization '{ORG_SLUG}' not found")
    return rows[0]["id"]


# ---------- Step 2: seed team_members ----------------------------------------


def seed_team_members(wb: openpyxl.Workbook, org_id: str) -> None:
    ws = wb["Rate Lookup"]
    seen: set[str] = set()
    payload: list[dict[str, object]] = []

    for i, row in enumerate(ws.iter_rows(values_only=True), start=1):
        if i == 1:
            continue
        email = cell_email(row[0])
        proportion = row[1]
        rate = row[2]
        if not email or proportion is None:
            continue
        if email in seen:
            continue
        seen.add(email)

        is_adi = email == ADI_EMAIL
        payload.append(
            {
                "org_id": org_id,
                "email": email,
                "display_name": "Adi Pramono" if is_adi else derive_name(email),
                "rate_proportion": float(proportion),
                "hourly_rate_usd": (
                    round(float(rate), 2) if isinstance(rate, (int, float)) else None
                ),
                "consolidate_as": None if is_adi else CONSOLIDATED_LABEL,
                "is_active": True,
            }
        )

    if not payload:
        print("  no team members found in Rate Lookup")
        return

    supabase_request(
        "POST",
        "/team_members",
        body=payload,
        prefer="resolution=merge-duplicates,return=minimal",
    )
    print(f"  upserted {len(payload)} team members")


# ---------- Step 3: backfill time_entries ------------------------------------


def fetch_team_lookup(org_id: str) -> dict[str, str]:
    rows = supabase_request(
        "GET",
        "/team_members",
        params={
            "org_id": f"eq.{org_id}",
            "select": "id,email",
        },
    )
    return {r["email"].lower(): r["id"] for r in (rows or [])}


def backfill_time_entries(wb: openpyxl.Workbook, org_id: str) -> None:
    ws = wb["Converted Entries"]
    lookup = fetch_team_lookup(org_id)

    # Converted Entries columns:
    # A:User  B:Email  C:Client  D:Project  E:Task  F:Description  G:Billable
    # H:Start date  I:Start time  J:End date  K:End time  L:Duration
    # M:Tags  N:Transfered

    rows_to_insert: list[dict[str, object]] = []
    skipped_pre_2026 = 0
    skipped_no_start = 0
    transferred_count = 0
    unmatched = 0

    for i, row in enumerate(ws.iter_rows(values_only=True), start=1):
        if i == 1:
            continue
        start_d = coerce_date(row[7])
        if not start_d:
            skipped_no_start += 1
            continue
        if start_d < BACKFILL_CUTOFF:
            skipped_pre_2026 += 1
            continue

        start_t = coerce_time(row[8])
        end_d = coerce_date(row[9])
        end_t = coerce_time(row[10])
        start_at = naive_ts(start_d, start_t)
        if not start_at:
            skipped_no_start += 1
            continue
        end_at = naive_ts(end_d, end_t)

        source_email = cell_email(row[12])
        team_member_id = lookup.get(source_email) if source_email else None
        if not team_member_id:
            unmatched += 1

        transfered = row[13] is True or (
            isinstance(row[13], str) and row[13].strip().lower() in ("true", "yes", "1")
        )
        if transfered:
            transferred_count += 1

        converted_user = cell_str(row[0])
        duration_seconds = coerce_seconds(row[11])

        rows_to_insert.append(
            {
                "org_id": org_id,
                "import_id": None,
                "source_user_email": source_email,
                "source_user_name": None,
                "client": cell_str(row[2]),
                "project": cell_str(row[3]),
                "description": cell_str(row[5]),
                "billable": cell_bool(row[6]),
                "start_at": start_at,
                "end_at": end_at,
                "duration_seconds": None,
                "source_rate_usd": None,
                "source_amount_usd": None,
                "team_member_id": team_member_id,
                "converted_user": converted_user,
                "converted_duration_seconds": duration_seconds,
                "transferred_at": (
                    dt.datetime.now(dt.timezone.utc).isoformat()
                    if transfered
                    else None
                ),
                "transferred_by": None,
                "transfer_batch_id": None,
            }
        )

    print(
        f"  read {len(rows_to_insert)} rows  "
        f"(skipped pre-2026: {skipped_pre_2026}, no-start: {skipped_no_start})"
    )
    print(
        f"  of those: {transferred_count} were already transferred, "
        f"{unmatched} have unknown source email"
    )

    if not rows_to_insert:
        return

    for start in range(0, len(rows_to_insert), CHUNK):
        batch = rows_to_insert[start : start + CHUNK]
        supabase_request(
            "POST",
            "/time_entries",
            body=batch,
            prefer="return=minimal",
        )
        print(f"    inserted {start + len(batch)} / {len(rows_to_insert)}")


# ---------- Main -------------------------------------------------------------


def main() -> None:
    if not XLSX_PATH.exists():
        print(f"ERROR: spreadsheet not found at {XLSX_PATH}", file=sys.stderr)
        sys.exit(1)

    print(f"Reading {XLSX_PATH.name}...")
    wb = openpyxl.load_workbook(XLSX_PATH, data_only=True, read_only=True)

    print("Resolving organization...")
    org_id = get_org_id()
    print(f"  org_id = {org_id}")

    print("Seeding team_members from Rate Lookup...")
    seed_team_members(wb, org_id)

    print("Backfilling time_entries from Converted Entries (>= 2026-01-01)...")
    backfill_time_entries(wb, org_id)

    print("Done.")


if __name__ == "__main__":
    main()
