Source code for pytyche.summarize

"""Empirical (non-Bayesian) summarization of observed experiment data.

Pure functions computing counts, rates, and lifts from
``ObservedExperimentData``.  No posterior inference — just arithmetic
on the observed data.  Serves as the first consumer of v2 contracts.

Format parity invariant: ``summarize_v2`` accepts both generator output
and production-loaded data identically, since both produce
``ObservedExperimentData`` conforming to ``VISITOR_SCHEMA``.
"""

from __future__ import annotations

import dataclasses
from typing import cast

import pandas as pd

from pytyche.contracts import (
    BetweenRule,
    ComparisonRule,
    EqRule,
    InRule,
    ObservedExperimentData,
    RuleClause,
    SegmentRule,
)
from pytyche.validation import validate_observed_data, validate_rule

# ---------------------------------------------------------------------------
# Hurdle component decomposition
# ---------------------------------------------------------------------------


[docs] def summarize_hurdle_components(observed: ObservedExperimentData) -> dict[str, float]: """Empirical hurdle decomposition in RPV units. Raises ValueError if metric is not revenue_per_visitor or if not exactly 2 arms. Uses the same additive decomposition as CalibrationTruth:: conv_effect = (p1_hat - p0_hat) * m0_hat aov_effect = p1_hat * (m1_hat - m0_hat) total = conv_effect + aov_effect Where p_hat = conversion rate, m_hat = mean AOV among converters. Guards: m_hat = 0.0 if no converters in that arm. """ if observed.metric != "revenue_per_visitor": raise ValueError( f"summarize_hurdle_components requires metric='revenue_per_visitor', " f"got {observed.metric!r}" ) if len(observed.variants) != 2: raise ValueError( f"summarize_hurdle_components requires exactly 2 variants, " f"got {len(observed.variants)}" ) ctrl, treat = observed.variants[0], observed.variants[1] # Conversion rates. p0 = ctrl.n_conversions / ctrl.n_visitors if ctrl.n_visitors > 0 else 0.0 p1 = treat.n_conversions / treat.n_visitors if treat.n_visitors > 0 else 0.0 # Mean AOV among converters (0.0 if no converters). ctrl_converters = ctrl.visitors[ctrl.visitors["converted"]] treat_converters = treat.visitors[treat.visitors["converted"]] m0 = ctrl_converters["revenue"].mean().item() if len(ctrl_converters) > 0 else 0.0 m1 = treat_converters["revenue"].mean().item() if len(treat_converters) > 0 else 0.0 conv_effect = (p1 - p0) * m0 aov_effect = p1 * (m1 - m0) total_effect = conv_effect + aov_effect return { "empirical_conv_effect": conv_effect, "empirical_aov_effect": aov_effect, "empirical_total_effect": total_effect, }
# --------------------------------------------------------------------------- # Payload types (frozen dataclasses, local to this module) # ---------------------------------------------------------------------------
[docs] @dataclasses.dataclass(frozen=True) class VariantSummary: """Per-variant empirical summary.""" name: str n_visitors: int n_conversions: int conversion_rate: float total_revenue: float revenue_per_visitor: float
[docs] @dataclasses.dataclass(frozen=True) class LiftSummary: """Lift between two variants for a single metric.""" baseline: str comparison: str metric: str baseline_value: float comparison_value: float absolute_lift: float relative_lift: float | None
[docs] @dataclasses.dataclass(frozen=True) class SegmentSummary: """Per-segment breakdown with variant stats and lift.""" rule: SegmentRule n_visitors: int pct_of_total: float variants: list[VariantSummary] lift: LiftSummary
[docs] @dataclasses.dataclass(frozen=True) class EmpiricalSummary: """Complete empirical summary of an experiment.""" experiment_id: str metric: str variants: list[VariantSummary] lift: LiftSummary segments: list[SegmentSummary]
# --------------------------------------------------------------------------- # apply_rule — boolean mask from SegmentRule # --------------------------------------------------------------------------- def _apply_clause(df: pd.DataFrame, clause: RuleClause) -> pd.Series: """Apply a single clause to a DataFrame, returning a boolean Series. NaN semantics: NaN values produce False for all clause types. """ col = cast(pd.Series, df[clause.feature]) if isinstance(clause, EqRule): mask = col == clause.value elif isinstance(clause, InRule): mask = col.isin(clause.values) elif isinstance(clause, ComparisonRule): if clause.operator == "gt": mask = col > clause.threshold elif clause.operator == "gte": mask = col >= clause.threshold elif clause.operator == "lt": mask = col < clause.threshold elif clause.operator == "lte": mask = col <= clause.threshold else: raise ValueError(f"Unknown comparison operator: {clause.operator!r}") elif isinstance(clause, BetweenRule): mask = (col >= clause.low) & (col <= clause.high) else: raise TypeError(f"Unknown clause type: {type(clause).__name__}") return mask.fillna(False)
[docs] def apply_rule(df: pd.DataFrame, rule: SegmentRule) -> pd.Series: """Boolean mask: True for visitors matching ALL clauses (AND-combined). NaN values in feature columns produce False — a visitor with missing data does not match any rule. """ mask = pd.Series(True, index=df.index) for clause in rule.clauses: mask = mask & _apply_clause(df, clause) return mask
# --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _variant_summary(name: str, df: pd.DataFrame) -> VariantSummary: """Compute VariantSummary from a filtered DataFrame.""" n = len(df) if n == 0: return VariantSummary( name=name, n_visitors=0, n_conversions=0, conversion_rate=0.0, total_revenue=0.0, revenue_per_visitor=0.0, ) n_conversions = int(df["converted"].sum().item()) total_revenue = float(df["revenue"].sum().item()) return VariantSummary( name=name, n_visitors=n, n_conversions=n_conversions, conversion_rate=n_conversions / n, total_revenue=total_revenue, revenue_per_visitor=total_revenue / n, ) def _lift_summary( baseline: VariantSummary, comparison: VariantSummary, metric: str, ) -> LiftSummary: """Compute lift between two variant summaries for the primary metric.""" if metric == "conversion_rate": base_val = baseline.conversion_rate comp_val = comparison.conversion_rate elif metric == "revenue_per_visitor": base_val = baseline.revenue_per_visitor comp_val = comparison.revenue_per_visitor else: raise ValueError(f"Unknown metric for lift: {metric!r}") absolute = comp_val - base_val relative = (absolute / base_val) if base_val != 0.0 else None return LiftSummary( baseline=baseline.name, comparison=comparison.name, metric=metric, baseline_value=base_val, comparison_value=comp_val, absolute_lift=absolute, relative_lift=relative, ) # --------------------------------------------------------------------------- # Public API # ---------------------------------------------------------------------------
[docs] def summarize_v2( observed: ObservedExperimentData, segments: list[SegmentRule] | None = None, *, strict: bool = True, ) -> EmpiricalSummary: """Compute empirical summary from observed experiment data. Validates ``observed`` at entry (fail-closed). Parameters ---------- observed: The experiment data to summarize. segments: Optional list of segment rules for breakdown. Each rule produces a ``SegmentSummary``. strict: Passed through to ``validate_observed_data``. Set ``False`` to allow asymmetric feature columns across variants. Returns ------- EmpiricalSummary Summary with per-variant stats, lift, and optional segment breakdown. Raises ------ ValueError If the experiment does not have exactly 2 variants. SchemaViolation If observed data fails validation. """ validate_observed_data(observed, strict=strict) if len(observed.variants) != 2: raise ValueError( f"summarize_v2 requires exactly 2 variants, " f"got {len(observed.variants)}" ) # Per-variant summaries from VariantData fields. variant_summaries = [ _variant_summary(v.name, v.visitors) for v in observed.variants ] # Lift: baseline (idx 0) vs comparison (idx 1). lift = _lift_summary(variant_summaries[0], variant_summaries[1], observed.metric) # Segment breakdowns. segment_summaries: list[SegmentSummary] = [] if segments: # Concatenate all variant visitors for rule application. all_visitors = pd.concat( [v.visitors for v in observed.variants], ignore_index=True ) total_visitors = len(all_visitors) for rule in segments: validate_rule(rule, observed) mask = apply_rule(all_visitors, rule) seg_df = all_visitors[mask] n_seg = len(seg_df) pct = n_seg / total_visitors if total_visitors > 0 else 0.0 # Split by variant. seg_variants = [] for v in observed.variants: v_df = cast(pd.DataFrame, seg_df[seg_df["variant"] == v.name]) seg_variants.append(_variant_summary(v.name, v_df)) seg_lift = _lift_summary(seg_variants[0], seg_variants[1], observed.metric) segment_summaries.append( SegmentSummary( rule=rule, n_visitors=n_seg, pct_of_total=pct, variants=seg_variants, lift=seg_lift, ) ) return EmpiricalSummary( experiment_id=observed.experiment_id, metric=observed.metric, variants=variant_summaries, lift=lift, segments=segment_summaries, )