"""Empirical (non-Bayesian) summarization of observed experiment data.
Pure functions computing counts, rates, and lifts from
``ObservedExperimentData``. No posterior inference — just arithmetic
on the observed data. Serves as the first consumer of v2 contracts.
Format parity invariant: ``summarize_v2`` accepts both generator output
and production-loaded data identically, since both produce
``ObservedExperimentData`` conforming to ``VISITOR_SCHEMA``.
"""
from __future__ import annotations
import dataclasses
from typing import cast
import pandas as pd
from pytyche.contracts import (
BetweenRule,
ComparisonRule,
EqRule,
InRule,
ObservedExperimentData,
RuleClause,
SegmentRule,
)
from pytyche.validation import validate_observed_data, validate_rule
# ---------------------------------------------------------------------------
# Hurdle component decomposition
# ---------------------------------------------------------------------------
[docs]
def summarize_hurdle_components(observed: ObservedExperimentData) -> dict[str, float]:
"""Empirical hurdle decomposition in RPV units.
Raises ValueError if metric is not revenue_per_visitor or if not exactly
2 arms.
Uses the same additive decomposition as CalibrationTruth::
conv_effect = (p1_hat - p0_hat) * m0_hat
aov_effect = p1_hat * (m1_hat - m0_hat)
total = conv_effect + aov_effect
Where p_hat = conversion rate, m_hat = mean AOV among converters.
Guards: m_hat = 0.0 if no converters in that arm.
"""
if observed.metric != "revenue_per_visitor":
raise ValueError(
f"summarize_hurdle_components requires metric='revenue_per_visitor', "
f"got {observed.metric!r}"
)
if len(observed.variants) != 2:
raise ValueError(
f"summarize_hurdle_components requires exactly 2 variants, "
f"got {len(observed.variants)}"
)
ctrl, treat = observed.variants[0], observed.variants[1]
# Conversion rates.
p0 = ctrl.n_conversions / ctrl.n_visitors if ctrl.n_visitors > 0 else 0.0
p1 = treat.n_conversions / treat.n_visitors if treat.n_visitors > 0 else 0.0
# Mean AOV among converters (0.0 if no converters).
ctrl_converters = ctrl.visitors[ctrl.visitors["converted"]]
treat_converters = treat.visitors[treat.visitors["converted"]]
m0 = ctrl_converters["revenue"].mean().item() if len(ctrl_converters) > 0 else 0.0
m1 = treat_converters["revenue"].mean().item() if len(treat_converters) > 0 else 0.0
conv_effect = (p1 - p0) * m0
aov_effect = p1 * (m1 - m0)
total_effect = conv_effect + aov_effect
return {
"empirical_conv_effect": conv_effect,
"empirical_aov_effect": aov_effect,
"empirical_total_effect": total_effect,
}
# ---------------------------------------------------------------------------
# Payload types (frozen dataclasses, local to this module)
# ---------------------------------------------------------------------------
[docs]
@dataclasses.dataclass(frozen=True)
class VariantSummary:
"""Per-variant empirical summary."""
name: str
n_visitors: int
n_conversions: int
conversion_rate: float
total_revenue: float
revenue_per_visitor: float
[docs]
@dataclasses.dataclass(frozen=True)
class LiftSummary:
"""Lift between two variants for a single metric."""
baseline: str
comparison: str
metric: str
baseline_value: float
comparison_value: float
absolute_lift: float
relative_lift: float | None
[docs]
@dataclasses.dataclass(frozen=True)
class SegmentSummary:
"""Per-segment breakdown with variant stats and lift."""
rule: SegmentRule
n_visitors: int
pct_of_total: float
variants: list[VariantSummary]
lift: LiftSummary
[docs]
@dataclasses.dataclass(frozen=True)
class EmpiricalSummary:
"""Complete empirical summary of an experiment."""
experiment_id: str
metric: str
variants: list[VariantSummary]
lift: LiftSummary
segments: list[SegmentSummary]
# ---------------------------------------------------------------------------
# apply_rule — boolean mask from SegmentRule
# ---------------------------------------------------------------------------
def _apply_clause(df: pd.DataFrame, clause: RuleClause) -> pd.Series:
"""Apply a single clause to a DataFrame, returning a boolean Series.
NaN semantics: NaN values produce False for all clause types.
"""
col = cast(pd.Series, df[clause.feature])
if isinstance(clause, EqRule):
mask = col == clause.value
elif isinstance(clause, InRule):
mask = col.isin(clause.values)
elif isinstance(clause, ComparisonRule):
if clause.operator == "gt":
mask = col > clause.threshold
elif clause.operator == "gte":
mask = col >= clause.threshold
elif clause.operator == "lt":
mask = col < clause.threshold
elif clause.operator == "lte":
mask = col <= clause.threshold
else:
raise ValueError(f"Unknown comparison operator: {clause.operator!r}")
elif isinstance(clause, BetweenRule):
mask = (col >= clause.low) & (col <= clause.high)
else:
raise TypeError(f"Unknown clause type: {type(clause).__name__}")
return mask.fillna(False)
[docs]
def apply_rule(df: pd.DataFrame, rule: SegmentRule) -> pd.Series:
"""Boolean mask: True for visitors matching ALL clauses (AND-combined).
NaN values in feature columns produce False — a visitor with missing
data does not match any rule.
"""
mask = pd.Series(True, index=df.index)
for clause in rule.clauses:
mask = mask & _apply_clause(df, clause)
return mask
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _variant_summary(name: str, df: pd.DataFrame) -> VariantSummary:
"""Compute VariantSummary from a filtered DataFrame."""
n = len(df)
if n == 0:
return VariantSummary(
name=name,
n_visitors=0,
n_conversions=0,
conversion_rate=0.0,
total_revenue=0.0,
revenue_per_visitor=0.0,
)
n_conversions = int(df["converted"].sum().item())
total_revenue = float(df["revenue"].sum().item())
return VariantSummary(
name=name,
n_visitors=n,
n_conversions=n_conversions,
conversion_rate=n_conversions / n,
total_revenue=total_revenue,
revenue_per_visitor=total_revenue / n,
)
def _lift_summary(
baseline: VariantSummary,
comparison: VariantSummary,
metric: str,
) -> LiftSummary:
"""Compute lift between two variant summaries for the primary metric."""
if metric == "conversion_rate":
base_val = baseline.conversion_rate
comp_val = comparison.conversion_rate
elif metric == "revenue_per_visitor":
base_val = baseline.revenue_per_visitor
comp_val = comparison.revenue_per_visitor
else:
raise ValueError(f"Unknown metric for lift: {metric!r}")
absolute = comp_val - base_val
relative = (absolute / base_val) if base_val != 0.0 else None
return LiftSummary(
baseline=baseline.name,
comparison=comparison.name,
metric=metric,
baseline_value=base_val,
comparison_value=comp_val,
absolute_lift=absolute,
relative_lift=relative,
)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def summarize_v2(
observed: ObservedExperimentData,
segments: list[SegmentRule] | None = None,
*,
strict: bool = True,
) -> EmpiricalSummary:
"""Compute empirical summary from observed experiment data.
Validates ``observed`` at entry (fail-closed).
Parameters
----------
observed:
The experiment data to summarize.
segments:
Optional list of segment rules for breakdown. Each rule produces
a ``SegmentSummary``.
strict:
Passed through to ``validate_observed_data``. Set ``False`` to
allow asymmetric feature columns across variants.
Returns
-------
EmpiricalSummary
Summary with per-variant stats, lift, and optional segment breakdown.
Raises
------
ValueError
If the experiment does not have exactly 2 variants.
SchemaViolation
If observed data fails validation.
"""
validate_observed_data(observed, strict=strict)
if len(observed.variants) != 2:
raise ValueError(
f"summarize_v2 requires exactly 2 variants, "
f"got {len(observed.variants)}"
)
# Per-variant summaries from VariantData fields.
variant_summaries = [
_variant_summary(v.name, v.visitors) for v in observed.variants
]
# Lift: baseline (idx 0) vs comparison (idx 1).
lift = _lift_summary(variant_summaries[0], variant_summaries[1], observed.metric)
# Segment breakdowns.
segment_summaries: list[SegmentSummary] = []
if segments:
# Concatenate all variant visitors for rule application.
all_visitors = pd.concat(
[v.visitors for v in observed.variants], ignore_index=True
)
total_visitors = len(all_visitors)
for rule in segments:
validate_rule(rule, observed)
mask = apply_rule(all_visitors, rule)
seg_df = all_visitors[mask]
n_seg = len(seg_df)
pct = n_seg / total_visitors if total_visitors > 0 else 0.0
# Split by variant.
seg_variants = []
for v in observed.variants:
v_df = cast(pd.DataFrame, seg_df[seg_df["variant"] == v.name])
seg_variants.append(_variant_summary(v.name, v_df))
seg_lift = _lift_summary(seg_variants[0], seg_variants[1], observed.metric)
segment_summaries.append(
SegmentSummary(
rule=rule,
n_visitors=n_seg,
pct_of_total=pct,
variants=seg_variants,
lift=seg_lift,
)
)
return EmpiricalSummary(
experiment_id=observed.experiment_id,
metric=observed.metric,
variants=variant_summaries,
lift=lift,
segments=segment_summaries,
)