Source code for pytyche.visual_confidence

"""Visual confidence payload for generator verification.

Provides a tested payload function that backs all panels of the visual
confidence notebook. The notebook renders from the payload; unit tests
assert on the payload directly without notebook rendering.

Public API
----------
- ``VisualConfidencePayload`` — frozen dataclass with required sections.
- ``build_visual_confidence_payload(bundle, bootstrap_seed,
  n_bootstrap=200) -> VisualConfidencePayload``.
"""

from __future__ import annotations

import dataclasses

import numpy as np

from pytyche.contracts import CalibrationBundle, MetricFamily
from pytyche.summarize import summarize_hurdle_components

# ---------------------------------------------------------------------------
# Truth-leakage column names — observed DataFrames must not contain these.
# ---------------------------------------------------------------------------

#: Column names that indicate ground-truth leakage into observed data.
#: These names appear in TruthResult / CalibrationTruth internals but must
#: never appear as columns in VariantData.visitors DataFrames.
_TRUTH_COLUMN_NAMES: frozenset[str] = frozenset({
    "cate",
    "tau",
    "tau_i",
    "p0",
    "p1",
    "m0",
    "m1",
    "conv",
    "aov",
    "cate_per_visitor",
    "conv_per_visitor",
    "aov_per_visitor",
    "true_effect",
    "true_cate",
    "effect",
    "effect_components",
})


# ---------------------------------------------------------------------------
# Payload type
# ---------------------------------------------------------------------------


[docs] @dataclasses.dataclass(frozen=True) class VisualConfidencePayload: """Typed payload backing all panels of the visual confidence notebook. All four fields are required — there are no optional sections. Future analyzer/BCF panels extend this type by subclassing or by adding fields to a derived dataclass without breaking existing sections (open/closed principle for the payload contract). Fields: invariants: Name → bool map of generator contract checks. truth_summary: Population-level truth statistics. data_summary: Per-variant empirical summaries (variant name → stats). recovery: Empirical recovery comparison with planted truth and bootstrap SE on the empirical lift. """ invariants: dict[str, bool] truth_summary: dict[str, object] data_summary: dict[str, dict[str, object]] recovery: dict[str, object]
# --------------------------------------------------------------------------- # Invariant checks # --------------------------------------------------------------------------- def _check_observed_truth_boundary(bundle: CalibrationBundle) -> bool: """True if no observed variant DataFrame contains truth-leakage columns.""" for variant in bundle.observed.variants: cols = set(variant.visitors.columns) if cols & _TRUTH_COLUMN_NAMES: return False return True def _check_cate_alignment(bundle: CalibrationBundle) -> bool: """True if cate_per_visitor length equals total observed visitors.""" cate = bundle.truth.cate_per_visitor # visual_confidence is K=2-only: the scalar cate_per_visitor is populated # only for binary arms (at K>=3 the heterogeneity lives in the per-contrast # list). Fail loudly rather than degrade if handed a multi-arm bundle. assert cate is not None, "visual_confidence requires a K=2 bundle (cate_per_visitor populated)" total_visitors = sum(v.n_visitors for v in bundle.observed.variants) return len(cate.values) == total_visitors def _check_decomposition_identity(bundle: CalibrationBundle, tol: float = 1e-10) -> bool: """True if hurdle decomposition satisfies effect = conv_effect + aov_effect. For binary metrics, always returns True (single component trivially equals the total effect). """ if bundle.truth.metric_family == MetricFamily.BINARY: return True ec = bundle.truth.effect_components conv_effect = ec.get("conv_effect", 0.0) aov_effect = ec.get("aov_effect", 0.0) return abs(bundle.truth.effect - (conv_effect + aov_effect)) <= tol def _check_effect_equals_mean_cate(bundle: CalibrationBundle, tol: float = 1e-10) -> bool: """True if bundle.truth.effect equals mean(cate_per_visitor.values).""" cate = bundle.truth.cate_per_visitor assert cate is not None, "visual_confidence requires a K=2 bundle (cate_per_visitor populated)" mean_cate = float(np.mean(cate.values)) return abs(bundle.truth.effect - mean_cate) <= tol def _check_cross_arm_visitor_id_uniqueness(bundle: CalibrationBundle) -> bool: """True if no visitor_id appears in more than one variant.""" seen: set[str] = set() for variant in bundle.observed.variants: ids = set(variant.visitors["visitor_id"].tolist()) if ids & seen: return False seen.update(ids) return True # --------------------------------------------------------------------------- # Section builders # --------------------------------------------------------------------------- def _build_invariants(bundle: CalibrationBundle) -> dict[str, bool]: """Compute all 5 invariant checks and return as a name → bool dict.""" return { "observed_truth_boundary": _check_observed_truth_boundary(bundle), "cate_alignment": _check_cate_alignment(bundle), "decomposition_identity": _check_decomposition_identity(bundle), "effect_equals_mean_cate": _check_effect_equals_mean_cate(bundle), "cross_arm_visitor_id_uniqueness": _check_cross_arm_visitor_id_uniqueness( bundle ), } def _build_truth_summary(bundle: CalibrationBundle) -> dict[str, object]: """Extract population-level truth statistics from bundle.truth.""" cate = bundle.truth.cate_per_visitor assert cate is not None, "visual_confidence requires a K=2 bundle (cate_per_visitor populated)" cate_values = cate.values return { "effect": bundle.truth.effect, "effect_components": bundle.truth.effect_components, "cate_mean": float(np.mean(cate_values)), "cate_std": float(np.std(cate_values, ddof=1)), "cate_min": float(np.min(cate_values)), "cate_max": float(np.max(cate_values)), } def _build_data_summary( bundle: CalibrationBundle, ) -> dict[str, dict[str, object]]: """Compute per-variant empirical summaries. For binary metrics: n_visitors, conversion_rate. For hurdle metrics: n_visitors, conversion_rate, mean_revenue. """ is_hurdle = bundle.truth.metric_family == MetricFamily.HURDLE_REAL result: dict[str, dict[str, object]] = {} for variant in bundle.observed.variants: df = variant.visitors n = variant.n_visitors conversion_rate = ( df["converted"].sum().item() / n if n > 0 else 0.0 ) entry: dict[str, object] = { "n_visitors": n, "conversion_rate": conversion_rate, } if is_hurdle: entry["mean_revenue"] = ( df["revenue"].sum().item() / n if n > 0 else 0.0 ) result[variant.name] = entry return result def _get_metric_values(bundle: CalibrationBundle) -> tuple[np.ndarray, np.ndarray]: """Extract per-visitor metric values for control and treatment arms. Returns (ctrl_values, treat_values) for the primary metric. For binary: conversion indicator (0/1). For hurdle: revenue per visitor (0.0 for non-converters). """ # Convention: variants[0] = control, variants[1] = treatment. ctrl_variant = bundle.observed.variants[0] treat_variant = bundle.observed.variants[1] if bundle.truth.metric_family == MetricFamily.BINARY: ctrl_values = ctrl_variant.visitors["converted"].to_numpy(dtype=float) treat_values = treat_variant.visitors["converted"].to_numpy(dtype=float) else: # Hurdle: revenue per visitor. ctrl_values = ctrl_variant.visitors["revenue"].to_numpy(dtype=float) treat_values = treat_variant.visitors["revenue"].to_numpy(dtype=float) return ctrl_values, treat_values def _build_recovery( bundle: CalibrationBundle, bootstrap_seed: int, n_bootstrap: int, ) -> dict[str, object]: """Compute recovery comparison with bootstrap SE on empirical lift. Fields: planted_effect: bundle.truth.effect (ground truth). empirical_lift: treatment mean minus control mean for the primary metric. bootstrap_se: deterministic bootstrap SE on empirical lift. """ ctrl_values, treat_values = _get_metric_values(bundle) empirical_lift = float(treat_values.mean() - ctrl_values.mean()) # Deterministic bootstrap SE using the provided seed. rng = np.random.default_rng(bootstrap_seed) lifts: list[float] = [] for _ in range(n_bootstrap): ctrl_sample = rng.choice(ctrl_values, size=len(ctrl_values), replace=True) treat_sample = rng.choice(treat_values, size=len(treat_values), replace=True) lifts.append(float(treat_sample.mean() - ctrl_sample.mean())) bootstrap_se = float(np.std(lifts, ddof=1)) recovery: dict[str, object] = { "planted_effect": bundle.truth.effect, "empirical_lift": empirical_lift, "bootstrap_se": bootstrap_se, } if bundle.truth.metric_family == MetricFamily.HURDLE_REAL: components = summarize_hurdle_components(bundle.observed) recovery["planted_conv_effect"] = bundle.truth.effect_components["conv_effect"] recovery["planted_aov_effect"] = bundle.truth.effect_components["aov_effect"] recovery.update(components) return recovery # --------------------------------------------------------------------------- # Public entrypoint # ---------------------------------------------------------------------------
[docs] def build_visual_confidence_payload( bundle: CalibrationBundle, bootstrap_seed: int, n_bootstrap: int = 200, ) -> VisualConfidencePayload: """Build a VisualConfidencePayload from a CalibrationBundle. Computes all panel data from the bundle: - ``invariants``: 5 generator contract checks (all bool). - ``truth_summary``: planted effect, components, and per-visitor CATE stats. - ``data_summary``: per-variant empirical summaries (n_visitors, rates, revenue). - ``recovery``: planted effect, empirical lift, and bootstrap SE on lift. Parameters ---------- bundle: CalibrationBundle from ``generate_v2_core()``. bootstrap_seed: Seed for the bootstrap RNG — controls SE reproducibility. n_bootstrap: Number of bootstrap resamples. Default 200. Returns ------- VisualConfidencePayload Frozen payload with all four required sections populated. """ if not isinstance(n_bootstrap, int) or n_bootstrap < 2: raise ValueError( f"n_bootstrap must be an int >= 2, got {n_bootstrap!r}" ) return VisualConfidencePayload( invariants=_build_invariants(bundle), truth_summary=_build_truth_summary(bundle), data_summary=_build_data_summary(bundle), recovery=_build_recovery(bundle, bootstrap_seed, n_bootstrap), )