Source code for pytyche.visual_confidence

"""Visual confidence payload for generator verification.

Provides a tested payload function that backs all panels of the visual
confidence notebook. The notebook renders from the payload; unit tests
assert on the payload directly without notebook rendering.

Public API
----------
- ``VisualConfidencePayload`` — frozen dataclass with required sections.
- ``build_visual_confidence_payload(bundle, bootstrap_seed,
  n_bootstrap=200) -> VisualConfidencePayload``.
"""

from __future__ import annotations

import dataclasses

import numpy as np

from pytyche.contracts import CalibrationBundle, MetricFamily
from pytyche.summarize import summarize_hurdle_components

# ---------------------------------------------------------------------------
# Truth-leakage column names — observed DataFrames must not contain these.
# ---------------------------------------------------------------------------

#: Column names that indicate ground-truth leakage into observed data.
#: These names appear in TruthResult / CalibrationTruth internals but must
#: never appear as columns in VariantData.visitors DataFrames.
_TRUTH_COLUMN_NAMES: frozenset[str] = frozenset({
    "cate",
    "tau",
    "tau_i",
    "p0",
    "p1",
    "m0",
    "m1",
    "conv",
    "aov",
    "cate_per_visitor",
    "conv_per_visitor",
    "aov_per_visitor",
    "true_effect",
    "true_cate",
    "effect",
    "effect_components",
})


# ---------------------------------------------------------------------------
# Payload type
# ---------------------------------------------------------------------------



[docs]
@dataclasses.dataclass(frozen=True)
class VisualConfidencePayload:
    """Typed payload backing all panels of the visual confidence notebook.

    All four fields are required — there are no optional sections.  Future
    analyzer/BCF panels extend this type by subclassing or by adding fields
    to a derived dataclass without breaking existing sections (open/closed
    principle for the payload contract).

    Fields:
        invariants:    Name → bool map of generator contract checks.
        truth_summary: Population-level truth statistics.
        data_summary:  Per-variant empirical summaries (variant name → stats).
        recovery:      Empirical recovery comparison with planted truth and
                       bootstrap SE on the empirical lift.
    """

    invariants: dict[str, bool]
    truth_summary: dict[str, object]
    data_summary: dict[str, dict[str, object]]
    recovery: dict[str, object]



# ---------------------------------------------------------------------------
# Invariant checks
# ---------------------------------------------------------------------------


def _check_observed_truth_boundary(bundle: CalibrationBundle) -> bool:
    """True if no observed variant DataFrame contains truth-leakage columns."""
    for variant in bundle.observed.variants:
        cols = set(variant.visitors.columns)
        if cols & _TRUTH_COLUMN_NAMES:
            return False
    return True


def _check_cate_alignment(bundle: CalibrationBundle) -> bool:
    """True if cate_per_visitor length equals total observed visitors."""
    cate = bundle.truth.cate_per_visitor
    # visual_confidence is K=2-only: the scalar cate_per_visitor is populated
    # only for binary arms (at K>=3 the heterogeneity lives in the per-contrast
    # list). Fail loudly rather than degrade if handed a multi-arm bundle.
    assert cate is not None, "visual_confidence requires a K=2 bundle (cate_per_visitor populated)"
    total_visitors = sum(v.n_visitors for v in bundle.observed.variants)
    return len(cate.values) == total_visitors


def _check_decomposition_identity(bundle: CalibrationBundle, tol: float = 1e-10) -> bool:
    """True if hurdle decomposition satisfies effect = conv_effect + aov_effect.

    For binary metrics, always returns True (single component trivially equals
    the total effect).
    """
    if bundle.truth.metric_family == MetricFamily.BINARY:
        return True
    ec = bundle.truth.effect_components
    conv_effect = ec.get("conv_effect", 0.0)
    aov_effect = ec.get("aov_effect", 0.0)
    return abs(bundle.truth.effect - (conv_effect + aov_effect)) <= tol


def _check_effect_equals_mean_cate(bundle: CalibrationBundle, tol: float = 1e-10) -> bool:
    """True if bundle.truth.effect equals mean(cate_per_visitor.values)."""
    cate = bundle.truth.cate_per_visitor
    assert cate is not None, "visual_confidence requires a K=2 bundle (cate_per_visitor populated)"
    mean_cate = float(np.mean(cate.values))
    return abs(bundle.truth.effect - mean_cate) <= tol


def _check_cross_arm_visitor_id_uniqueness(bundle: CalibrationBundle) -> bool:
    """True if no visitor_id appears in more than one variant."""
    seen: set[str] = set()
    for variant in bundle.observed.variants:
        ids = set(variant.visitors["visitor_id"].tolist())
        if ids & seen:
            return False
        seen.update(ids)
    return True


# ---------------------------------------------------------------------------
# Section builders
# ---------------------------------------------------------------------------


def _build_invariants(bundle: CalibrationBundle) -> dict[str, bool]:
    """Compute all 5 invariant checks and return as a name → bool dict."""
    return {
        "observed_truth_boundary": _check_observed_truth_boundary(bundle),
        "cate_alignment": _check_cate_alignment(bundle),
        "decomposition_identity": _check_decomposition_identity(bundle),
        "effect_equals_mean_cate": _check_effect_equals_mean_cate(bundle),
        "cross_arm_visitor_id_uniqueness": _check_cross_arm_visitor_id_uniqueness(
            bundle
        ),
    }


def _build_truth_summary(bundle: CalibrationBundle) -> dict[str, object]:
    """Extract population-level truth statistics from bundle.truth."""
    cate = bundle.truth.cate_per_visitor
    assert cate is not None, "visual_confidence requires a K=2 bundle (cate_per_visitor populated)"
    cate_values = cate.values
    return {
        "effect": bundle.truth.effect,
        "effect_components": bundle.truth.effect_components,
        "cate_mean": float(np.mean(cate_values)),
        "cate_std": float(np.std(cate_values, ddof=1)),
        "cate_min": float(np.min(cate_values)),
        "cate_max": float(np.max(cate_values)),
    }


def _build_data_summary(
    bundle: CalibrationBundle,
) -> dict[str, dict[str, object]]:
    """Compute per-variant empirical summaries.

    For binary metrics: n_visitors, conversion_rate.
    For hurdle metrics: n_visitors, conversion_rate, mean_revenue.
    """
    is_hurdle = bundle.truth.metric_family == MetricFamily.HURDLE_REAL
    result: dict[str, dict[str, object]] = {}
    for variant in bundle.observed.variants:
        df = variant.visitors
        n = variant.n_visitors
        conversion_rate = (
            df["converted"].sum().item() / n if n > 0 else 0.0
        )
        entry: dict[str, object] = {
            "n_visitors": n,
            "conversion_rate": conversion_rate,
        }
        if is_hurdle:
            entry["mean_revenue"] = (
                df["revenue"].sum().item() / n if n > 0 else 0.0
            )
        result[variant.name] = entry
    return result


def _get_metric_values(bundle: CalibrationBundle) -> tuple[np.ndarray, np.ndarray]:
    """Extract per-visitor metric values for control and treatment arms.

    Returns (ctrl_values, treat_values) for the primary metric.
    For binary: conversion indicator (0/1).
    For hurdle: revenue per visitor (0.0 for non-converters).
    """
    # Convention: variants[0] = control, variants[1] = treatment.
    ctrl_variant = bundle.observed.variants[0]
    treat_variant = bundle.observed.variants[1]

    if bundle.truth.metric_family == MetricFamily.BINARY:
        ctrl_values = ctrl_variant.visitors["converted"].to_numpy(dtype=float)
        treat_values = treat_variant.visitors["converted"].to_numpy(dtype=float)
    else:
        # Hurdle: revenue per visitor.
        ctrl_values = ctrl_variant.visitors["revenue"].to_numpy(dtype=float)
        treat_values = treat_variant.visitors["revenue"].to_numpy(dtype=float)

    return ctrl_values, treat_values


def _build_recovery(
    bundle: CalibrationBundle,
    bootstrap_seed: int,
    n_bootstrap: int,
) -> dict[str, object]:
    """Compute recovery comparison with bootstrap SE on empirical lift.

    Fields:
        planted_effect:  bundle.truth.effect (ground truth).
        empirical_lift:  treatment mean minus control mean for the primary metric.
        bootstrap_se:    deterministic bootstrap SE on empirical lift.
    """
    ctrl_values, treat_values = _get_metric_values(bundle)
    empirical_lift = float(treat_values.mean() - ctrl_values.mean())

    # Deterministic bootstrap SE using the provided seed.
    rng = np.random.default_rng(bootstrap_seed)
    lifts: list[float] = []
    for _ in range(n_bootstrap):
        ctrl_sample = rng.choice(ctrl_values, size=len(ctrl_values), replace=True)
        treat_sample = rng.choice(treat_values, size=len(treat_values), replace=True)
        lifts.append(float(treat_sample.mean() - ctrl_sample.mean()))
    bootstrap_se = float(np.std(lifts, ddof=1))

    recovery: dict[str, object] = {
        "planted_effect": bundle.truth.effect,
        "empirical_lift": empirical_lift,
        "bootstrap_se": bootstrap_se,
    }

    if bundle.truth.metric_family == MetricFamily.HURDLE_REAL:
        components = summarize_hurdle_components(bundle.observed)
        recovery["planted_conv_effect"] = bundle.truth.effect_components["conv_effect"]
        recovery["planted_aov_effect"] = bundle.truth.effect_components["aov_effect"]
        recovery.update(components)

    return recovery


# ---------------------------------------------------------------------------
# Public entrypoint
# ---------------------------------------------------------------------------



[docs]
def build_visual_confidence_payload(
    bundle: CalibrationBundle,
    bootstrap_seed: int,
    n_bootstrap: int = 200,
) -> VisualConfidencePayload:
    """Build a VisualConfidencePayload from a CalibrationBundle.

    Computes all panel data from the bundle:

    - ``invariants``: 5 generator contract checks (all bool).
    - ``truth_summary``: planted effect, components, and per-visitor CATE stats.
    - ``data_summary``: per-variant empirical summaries (n_visitors, rates, revenue).
    - ``recovery``: planted effect, empirical lift, and bootstrap SE on lift.

    Parameters
    ----------
    bundle:
        CalibrationBundle from ``generate_v2_core()``.
    bootstrap_seed:
        Seed for the bootstrap RNG — controls SE reproducibility.
    n_bootstrap:
        Number of bootstrap resamples.  Default 200.

    Returns
    -------
    VisualConfidencePayload
        Frozen payload with all four required sections populated.
    """
    if not isinstance(n_bootstrap, int) or n_bootstrap < 2:
        raise ValueError(
            f"n_bootstrap must be an int >= 2, got {n_bootstrap!r}"
        )
    return VisualConfidencePayload(
        invariants=_build_invariants(bundle),
        truth_summary=_build_truth_summary(bundle),
        data_summary=_build_data_summary(bundle),
        recovery=_build_recovery(bundle, bootstrap_seed, n_bootstrap),
    )