mtg-builder/hydrate.py

#!/usr/bin/env python3
"""
Scryfall decklist hydrator for EDH decks.
Fetches card data from Scryfall API and organizes decklists by card type.
"""

import argparse
import json
import os
import re
import sys
import time
from pathlib import Path
from typing import Optional
import urllib.request
import urllib.error
import urllib.parse


SCRYFALL_API = "https://api.scryfall.com"
RATE_LIMIT_DELAY = 0.1  # 100ms between requests

FIELDS = [
    "name",
    "mana_cost",
    "cmc",
    "colors",
    "color_identity",
    "type_line",
    "oracle_text",
    "power",
    "toughness",
    "loyalty",
]


def parse_decklist(filepath: str) -> list[dict]:
    """Parse a decklist file into list of {count, name} dicts."""
    cards = []
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            match = re.match(r"^(\d+)x?\s+(.+)$", line, re.IGNORECASE)
            if match:
                cards.append(
                    {"count": int(match.group(1)), "name": match.group(2).strip()}
                )
    return cards


def strip_set_code(name: str) -> str:
    """Remove set code and collector number from card name.

    E.g., "Keep Out (ECL) 19" -> "Keep Out"
    """
    name = re.sub(r"\s*\([^)]+\)\s*\d+ *$", "", name)
    name = re.sub(r"\s*\*F\*$", "", name)
    return name.strip()


def fetch_card(name: str, retry_count: int = 3) -> Optional[dict]:
    """Fetch card data from Scryfall API using fuzzy search."""
    original_name = name
    name = strip_set_code(name)

    encoded = urllib.parse.quote(name)
    url = f"{SCRYFALL_API}/cards/named?fuzzy={encoded}"

    for attempt in range(retry_count):
        try:
            req = urllib.request.Request(
                url, headers={"User-Agent": "EDHDeckBuilder/1.0", "Accept": "*/*"}
            )
            with urllib.request.urlopen(req, timeout=30) as response:
                return json.loads(response.read().decode("utf-8"))
        except urllib.error.HTTPError as e:
            if e.code == 429:
                retry_after = int(e.headers.get("Retry-After", 60))
                print(f"  Rate limited. Waiting {retry_after}s...", file=sys.stderr)
                time.sleep(retry_after)
            elif e.code == 404:
                if name != original_name:
                    print(
                        f"  Not found with set code, retrying: '{name}'",
                        file=sys.stderr,
                    )
                    return fetch_card(name, retry_count=1)
                print(f"  Error fetching '{original_name}': Not found", file=sys.stderr)
                return None
            else:
                if attempt < retry_count - 1:
                    wait_time = 2**attempt
                    print(
                        f"  HTTP {e.code}, retrying in {wait_time}s...", file=sys.stderr
                    )
                    time.sleep(wait_time)
                else:
                    print(
                        f"  Error fetching '{original_name}': HTTP {e.code}",
                        file=sys.stderr,
                    )
                    return None
        except urllib.error.URLError as e:
            print(f"  Error fetching '{original_name}': {e.reason}", file=sys.stderr)
            return None
        except json.JSONDecodeError:
            print(f"  Error parsing response for '{original_name}'", file=sys.stderr)
            return None

    return None


def extract_card_info(card_data: dict) -> dict:
    """Extract relevant fields from Scryfall card data."""
    result = {"scryfall_uri": card_data.get("scryfall_uri", "")}

    for field in FIELDS:
        result[field] = card_data.get(field)

    if card_data.get("card_faces"):
        face = card_data["card_faces"][0]
        for field in ["mana_cost", "type_line", "oracle_text", "power", "toughness"]:
            if result.get(field) is None:
                result[field] = face.get(field)

    result["colors"] = card_data.get("colors", [])
    result["color_identity"] = card_data.get("color_identity", [])

    return result


def categorize_by_type(cards: list[dict]) -> dict[str, list[dict]]:
    """Categorize cards by their primary type line."""
    categories = {
        "commander": [],
        "creatures": [],
        "instants": [],
        "sorceries": [],
        "artifacts": [],
        "enchantments": [],
        "planeswalkers": [],
        "lands": [],
        "other": [],
    }

    for card in cards:
        type_line = card.get("type_line", "").lower()

        if "legendary" in type_line and (
            "creature" in type_line or "planeswalker" in type_line
        ):
            if not categories["commander"]:
                categories["commander"].append(card)
                continue

        if "creature" in type_line:
            categories["creatures"].append(card)
        elif "instant" in type_line:
            categories["instants"].append(card)
        elif "sorcery" in type_line:
            categories["sorceries"].append(card)
        elif "artifact" in type_line:
            categories["artifacts"].append(card)
        elif "enchantment" in type_line:
            categories["enchantments"].append(card)
        elif "planeswalker" in type_line:
            categories["planeswalkers"].append(card)
        elif "land" in type_line:
            categories["lands"].append(card)
        else:
            categories["other"].append(card)

    return {k: v for k, v in categories.items() if v}


def hydrate_decklist(
    input_file: str, output_dir: str, cache_file: Optional[str] = None
) -> None:
    """Main hydration function."""
    cache = {}
    if cache_file and os.path.exists(cache_file):
        with open(cache_file, "r", encoding="utf-8") as f:
            cache = json.load(f)
        print(f"Loaded {len(cache)} cached cards")

    print(f"Parsing decklist: {input_file}")
    entries = parse_decklist(input_file)
    print(f"Found {len(entries)} unique card entries")

    hydrated = []
    for i, entry in enumerate(entries, 1):
        name = entry["name"]
        count = entry["count"]

        if name in cache:
            card_info = cache[name]
            print(f"[{i}/{len(entries)}] {name} (cached)")
        else:
            print(f"[{i}/{len(entries)}] Fetching: {name}...")
            card_data = fetch_card(name)
            if card_data:
                card_info = extract_card_info(card_data)
                cache[name] = card_info
                time.sleep(RATE_LIMIT_DELAY)
            else:
                card_info = {"name": name, "error": "not found"}

        card_info["count"] = count
        hydrated.append(card_info)

    if cache_file:
        with open(cache_file, "w", encoding="utf-8") as f:
            json.dump(cache, f, indent=2)
        print(f"Cached {len(cache)} cards to {cache_file}")

    categories = categorize_by_type(hydrated)

    os.makedirs(output_dir, exist_ok=True)

    all_cards_path = os.path.join(output_dir, "full_collection.json")
    with open(all_cards_path, "w", encoding="utf-8") as f:
        json.dump(hydrated, f, indent=2)
    print(f"Wrote full collection to {all_cards_path}")

    for category, cards in categories.items():
        cat_path = os.path.join(output_dir, f"{category}.json")
        with open(cat_path, "w", encoding="utf-8") as f:
            json.dump(cards, f, indent=2)
        print(f"  {category}: {len(cards)} cards -> {cat_path}")

    print(f"\nDeck summary:")
    print(f"  Total cards: {sum(c.get('count', 1) for c in hydrated)}")
    print(f"  Unique cards: {len(hydrated)}")


def create_deck(deck_name: str, base_dir: str = "decks") -> str:
    """Create a new deck folder structure."""
    deck_path = os.path.join(base_dir, deck_name)
    os.makedirs(deck_path, exist_ok=True)

    template = {"name": deck_name, "commander": None, "cards": []}

    with open(os.path.join(deck_path, "deck.json"), "w", encoding="utf-8") as f:
        json.dump(template, f, indent=2)

    print(f"Created deck: {deck_path}")
    return deck_path


def main():
    parser = argparse.ArgumentParser(
        description="Hydrate MTG decklists with Scryfall data"
    )
    subparsers = parser.add_subparsers(dest="command", help="Commands")

    hydrate_parser = subparsers.add_parser(
        "hydrate", help="Hydrate a decklist with Scryfall data"
    )
    hydrate_parser.add_argument("input", help="Input decklist file")
    hydrate_parser.add_argument(
        "-o", "--output", default="output/hydrated", help="Output directory"
    )
    hydrate_parser.add_argument(
        "-c",
        "--cache",
        default="cache/card_cache.json",
        help="Cache file for card data",
    )

    new_parser = subparsers.add_parser("new", help="Create a new deck folder")
    new_parser.add_argument("name", help="Deck name")
    new_parser.add_argument(
        "-d", "--dir", default="data/decks", help="Base directory for decks"
    )

    args = parser.parse_args()

    if args.command == "hydrate":
        hydrate_decklist(args.input, args.output, args.cache)
    elif args.command == "new":
        create_deck(args.name, args.dir)
    else:
        parser.print_help()


if __name__ == "__main__":
    main()