#!/usr/bin/env python3
"""
One-shot restore for operator / client / project on backfilled rows.

Background: a bulk "edit fields" action incorrectly updated every backfilled
row's operator / client / project. The fix walks the original spreadsheet
(.planning/Misc/Time Sheet ....xlsx, Converted Entries tab) and matches
each row to its time_entries row by:

  start_at + source_user_email + description

For each match, restores:
  - operator   = the spreadsheet's "Client" column (raw)
  - client     = parsed left of " : " in the spreadsheet's "Project" column
  - project    = parsed right of " : "  (or whole string if no delimiter)
  - billout_cost_usd = recomputed from converted_duration_seconds × base_rate
    (so the locked cost stays consistent if any rows were also touched)

Run from /srv/apps/work:
    python3 scripts/restore_client_project_operator.py

The script is idempotent — re-running matches the same rows and writes the
same values. It only touches rows belonging to the "Initial 2026 migration"
clockify_imports batch.
"""

from __future__ import annotations

import datetime as dt
import json
import os
import sys
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path

import openpyxl

PROJECT_ROOT = Path(__file__).resolve().parent.parent
XLSX_PATH = (
    PROJECT_ROOT
    / ".planning/Misc/Time Sheet _ Time Log _ Hours _ Tingang.id _ Adi.xlsx"
)
ORG_SLUG = "bowden-works"
BACKFILL_CUTOFF = dt.date(2026, 1, 1)

# Load .env
for line in (PROJECT_ROOT / ".env").read_text().splitlines():
    line = line.strip()
    if line and not line.startswith("#") and "=" in line:
        k, v = line.split("=", 1)
        os.environ.setdefault(k.strip(), v.strip())

SUPABASE_URL = os.environ["NEXT_PUBLIC_SUPABASE_URL"].rstrip("/")
SECRET_KEY = os.environ["SUPABASE_SECRET_KEY"]


def sb(
    method: str,
    path: str,
    *,
    body: object | None = None,
    params: dict[str, str] | None = None,
    prefer: str | None = None,
) -> object:
    url = f"{SUPABASE_URL}/rest/v1{path}"
    if params:
        url += "?" + urllib.parse.urlencode(params)
    headers = {
        "apikey": SECRET_KEY,
        "Authorization": f"Bearer {SECRET_KEY}",
        "Content-Type": "application/json",
    }
    if prefer:
        headers["Prefer"] = prefer
    data = json.dumps(body).encode() if body is not None else None
    req = urllib.request.Request(url, data=data, headers=headers, method=method)
    try:
        with urllib.request.urlopen(req) as resp:
            raw = resp.read()
            return json.loads(raw) if raw else None
    except urllib.error.HTTPError as e:
        msg = e.read().decode(errors="replace")
        raise RuntimeError(f"{method} {path} failed: {e.code} {msg[:400]}")


def cell_str(v):
    if v is None:
        return None
    if isinstance(v, str):
        v = v.strip()
        return v or None
    return str(v).strip() or None


def cell_email(v):
    s = cell_str(v)
    return s.lower() if s else None


def coerce_time(v):
    if isinstance(v, dt.time):
        return v
    if isinstance(v, dt.timedelta):
        total = int(v.total_seconds())
        return dt.time((total // 3600) % 24, (total // 60) % 60, total % 60)
    if isinstance(v, dt.datetime):
        return v.time()
    return None


def coerce_date(v):
    if isinstance(v, dt.datetime):
        return v.date()
    if isinstance(v, dt.date):
        return v
    return None


def coerce_seconds(v):
    if isinstance(v, dt.timedelta):
        return int(round(v.total_seconds()))
    if isinstance(v, dt.time):
        return v.hour * 3600 + v.minute * 60 + v.second
    if isinstance(v, (int, float)):
        return int(round(float(v) * 86400))
    return None


def naive_ts(d, t):
    if not d or not t:
        return None
    return f"{d.isoformat()}T{t.strftime('%H:%M:%S')}"


def split_client_project(raw):
    sep = " : "
    i = raw.find(sep)
    if i == -1:
        return (None, raw.strip())
    return (raw[:i].strip() or None, raw[i + len(sep):].strip())


def main():
    if not XLSX_PATH.exists():
        print(f"ERROR: spreadsheet not found at {XLSX_PATH}", file=sys.stderr)
        sys.exit(1)

    print("Resolving org + base rate...")
    org = sb("GET", "/organizations", params={
        "slug": f"eq.{ORG_SLUG}",
        "select": "id,default_hourly_rate_usd",
    })
    if not org:
        raise RuntimeError("Bowden Works org not found")
    org_id = org[0]["id"]
    base_rate = float(org[0]["default_hourly_rate_usd"])
    print(f"  org_id = {org_id}  base_rate = ${base_rate}/hr")

    print("Resolving 'Initial 2026 migration' batch...")
    batch = sb("GET", "/clockify_imports", params={
        "org_id": f"eq.{org_id}",
        "name": "eq.Initial 2026 migration",
        "select": "id",
    })
    if not batch:
        raise RuntimeError("Initial 2026 migration batch not found")
    batch_id = batch[0]["id"]
    print(f"  batch_id = {batch_id}")

    print(f"Reading {XLSX_PATH.name}...")
    wb = openpyxl.load_workbook(XLSX_PATH, data_only=True, read_only=True)
    ws = wb["Converted Entries"]

    matched = 0
    unmatched = 0
    multi_matched = 0
    updated = 0
    skipped_pre_2026 = 0

    print("Walking Converted Entries...")
    for i, row in enumerate(ws.iter_rows(values_only=True), start=1):
        if i == 1:
            continue
        start_d = coerce_date(row[7])
        if not start_d:
            continue
        if start_d < BACKFILL_CUTOFF:
            skipped_pre_2026 += 1
            continue

        start_t = coerce_time(row[8])
        start_at = naive_ts(start_d, start_t)
        if not start_at:
            continue

        source_email = cell_email(row[12])  # Tags column
        description = cell_str(row[5])
        operator_raw = cell_str(row[2])     # spreadsheet "Client" column
        project_raw = cell_str(row[3])      # spreadsheet "Project" column
        duration_seconds = coerce_seconds(row[11])

        if not source_email or not description or not project_raw:
            continue

        client_parsed, project_parsed = split_client_project(project_raw)

        # Find the matching time_entries row
        params = {
            "org_id": f"eq.{org_id}",
            "import_id": f"eq.{batch_id}",
            "start_at": f"eq.{start_at}",
            "source_user_email": f"eq.{source_email}",
            "description": f"eq.{description}",
            "select": "id,converted_duration_seconds",
        }
        candidates = sb("GET", "/time_entries", params=params)

        if not candidates:
            unmatched += 1
            continue
        if len(candidates) > 1:
            multi_matched += 1

        # Recompute billout_cost_usd for safety (current proportion/rate)
        for c in candidates:
            cd = c.get("converted_duration_seconds")
            if cd is not None:
                cost = round((cd / 3600.0) * base_rate, 4)
            else:
                cost = None
            sb(
                "PATCH",
                f"/time_entries",
                params={"id": f"eq.{c['id']}"},
                body={
                    "operator": operator_raw,
                    "client": client_parsed,
                    "project": project_parsed,
                    "billout_cost_usd": cost,
                },
                prefer="return=minimal",
            )
            updated += 1

        matched += 1
        if matched % 250 == 0:
            print(f"  matched {matched} rows...")

    print()
    print(f"Done.")
    print(f"  matched (spreadsheet rows): {matched}")
    print(f"  multi-matched (same start+email+desc): {multi_matched}")
    print(f"  unmatched: {unmatched}")
    print(f"  skipped pre-2026: {skipped_pre_2026}")
    print(f"  total DB rows updated: {updated}")


if __name__ == "__main__":
    main()
