#!/usr/bin/env python3
"""
Re-geocode the Regent Grand starter CSV using Nominatim (OpenStreetMap),
constrained to a Providenciales bounding box so we don't land on
similarly-named places elsewhere in the world.

For each row:
  1. Query "<title>, Providenciales" with the viewbox bounded.
  2. If no result, retry "<title> Turks and Caicos" without bounding.
  3. If no result, keep the original coordinates and flag as unchanged.

Writes:
  - regent-grand-starter-geocoded.csv   (new CSV with updated coords)
  - regent-grand-geocode-report.txt     (line-by-line diff with status)
"""
import csv
import json
import sys
import time
import urllib.parse
import urllib.request

INPUT  = "/srv/apps/bw-plugins/regent-grand-starter.csv"
OUTPUT = "/srv/apps/bw-plugins/regent-grand-starter-geocoded.csv"
REPORT = "/srv/apps/bw-plugins/regent-grand-geocode-report.txt"

# Providenciales island bounding box (lon_min, lat_max, lon_max, lat_min)
VIEWBOX = "-72.45,21.90,-72.05,21.70"

HEADERS = {
    "User-Agent": "BW-Map-Magnet/1.2 (regent-grand-starter geocode)",
    "Accept-Language": "en-US,en;q=0.8",
}


def nominatim(q, bounded=True):
    params = {
        "format":   "json",
        "limit":    "1",
        "q":        q,
        "viewbox":  VIEWBOX,
    }
    if bounded:
        params["bounded"] = "1"
    url = "https://nominatim.openstreetmap.org/search?" + urllib.parse.urlencode(params)
    req = urllib.request.Request(url, headers=HEADERS)
    try:
        with urllib.request.urlopen(req, timeout=12) as resp:
            data = json.loads(resp.read().decode("utf-8"))
    except Exception as e:
        return None, f"network: {e}"
    if not data:
        return None, "no result"
    hit = data[0]
    try:
        lat = float(hit["lat"])
        lng = float(hit["lon"])
    except (KeyError, ValueError):
        return None, "bad result"
    name = hit.get("display_name", "")
    return (lat, lng), name


def in_provo(lat, lng):
    # A bit looser than the viewbox so unbounded retries inside T&C still count.
    return 21.65 <= lat <= 21.95 and -72.50 <= lng <= -72.00


def lookup(title):
    """Two-stage lookup: bounded → unbounded fallback. Returns (lat, lng, source, display_name)."""
    q1 = f"{title}, Providenciales"
    coords, info = nominatim(q1, bounded=True)
    time.sleep(1.1)  # Be polite — Nominatim asks for ≤ 1 req/s.
    if coords:
        return (coords[0], coords[1], "bounded", info)

    q2 = f"{title}, Turks and Caicos"
    coords, info = nominatim(q2, bounded=False)
    time.sleep(1.1)
    if coords and in_provo(coords[0], coords[1]):
        return (coords[0], coords[1], "unbounded-in-provo", info)
    if coords:
        return (None, None, "unbounded-rejected-out-of-range", f"{coords[0]:.4f},{coords[1]:.4f} {info}")
    return (None, None, "not-found", info)


def main():
    report_lines = []
    updated, kept, rejected = 0, 0, 0

    with open(INPUT, newline="") as fin, open(OUTPUT, "w", newline="") as fout:
        reader = csv.DictReader(fin)
        fieldnames = reader.fieldnames
        writer = csv.DictWriter(fout, fieldnames=fieldnames)
        writer.writeheader()

        for i, row in enumerate(reader, start=2):
            title = row["title"]
            old_lat = float(row["latitude"])
            old_lng = float(row["longitude"])

            print(f"[{i-1:>2}/56] {title} ...", file=sys.stderr, flush=True)
            new_lat, new_lng, source, info = lookup(title)

            if new_lat is not None:
                dist_km = haversine(old_lat, old_lng, new_lat, new_lng)
                row["latitude"]  = f"{new_lat:.6f}"
                row["longitude"] = f"{new_lng:.6f}"
                updated += 1
                report_lines.append(
                    f"UPDATED  {title}\n"
                    f"    old: {old_lat:.6f},{old_lng:.6f}\n"
                    f"    new: {new_lat:.6f},{new_lng:.6f}  ({source}, ~{dist_km:.2f} km from old)\n"
                    f"    match: {info[:100]}\n"
                )
            elif source.startswith("unbounded-rejected"):
                rejected += 1
                report_lines.append(
                    f"REJECTED {title} (Nominatim returned {info[:100]} — outside Provo, keeping old coords)\n"
                )
            else:
                kept += 1
                report_lines.append(
                    f"KEPT     {title} (not found in Nominatim)\n"
                )

            writer.writerow(row)

    with open(REPORT, "w") as f:
        f.write(f"Geocode summary: updated={updated}, kept-old={kept}, rejected-out-of-range={rejected}\n\n")
        f.writelines(report_lines)

    print(f"\nDone. updated={updated}, kept={kept}, rejected={rejected}", file=sys.stderr)
    print(f"  Output: {OUTPUT}", file=sys.stderr)
    print(f"  Report: {REPORT}", file=sys.stderr)


def haversine(lat1, lng1, lat2, lng2):
    """Distance in km between two coords."""
    import math
    R = 6371.0
    rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
    dlat = math.radians(lat2 - lat1)
    dlng = math.radians(lng2 - lng1)
    a = math.sin(dlat/2)**2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlng/2)**2
    return 2 * R * math.asin(math.sqrt(a))


if __name__ == "__main__":
    main()
