"""Act-now recommendation summary for one treatment-vs-control contrast.
Implementation behind ``posterior.recommendation_summary(...)`` on the three
posterior result types. The treatment's metric-native contrast draws are
scoped (all visitors, or one segment's members), reduced to a per-draw mean
lift vector, and summarized under the same SHIP / STOP / CONTINUE priority
rule as the legacy ``compare.variants.recommendation_summary`` — extended
with the closed-form ``expected_value_of_one_more_round`` (preposterior
EVSI; see ``docs/concepts/decision-theoretic-inputs.md``).
"""
from __future__ import annotations
import math
import numpy as np
from scipy.stats import norm
from pytyche.analysis._contrasts import (
AnyBCFResult,
concat_visitors,
contrast_samples,
require_observed,
segment_member_mask,
)
from pytyche.contracts import (
Decision,
DecisionThresholds,
DiscoveredSegment,
RecommendationSummary,
)
[docs]
def recommendation_summary(
posterior: AnyBCFResult,
treatment: str,
segment: DiscoveredSegment | None = None,
*,
thresholds: DecisionThresholds | None = None,
min_practical_effect: float = 0.02,
) -> RecommendationSummary:
"""Act-now SHIP / CONTINUE / STOP recommendation for one treatment.
The act-now risk assessment for *treatment* vs control: what committing
to either side costs in expectation, how confident the posterior is,
and what one more round of data is worth. The treatment's metric-native
contrast draws are reduced to a per-draw mean lift over the scope's
visitors, then summarized under the legacy ``compare.variants`` decision
rule (SHIP iff the expected loss and both probability gates clear their
thresholds; else STOP on harm; else STOP on futility; else CONTINUE).
v0.2 raw scope: probabilities and expected losses are computed from the
raw draws even on a calibrated posterior — interval corrections land
where intervals are built, never here.
Args:
posterior: One of the three posterior result types, carrying
observed data (raises otherwise).
treatment: Treatment variant name; selects contrast column
``observed.treatment_names.index(treatment)``.
segment: ``None`` computes the global snapshot (per-draw mean over
ALL visitors); a ``DiscoveredSegment`` restricts the mean to
its rule's members.
thresholds: Decision thresholds; ``DecisionThresholds()`` defaults
when ``None``.
min_practical_effect: Minimum meaningful lift for
``probability_better`` / ``probability_harmful``.
Returns:
``RecommendationSummary`` with the decision, its decision-theoretic
evidence, the thresholds used, and
``expected_value_of_one_more_round`` always populated (never NaN;
closed-form preposterior EVSI documented in
``docs/concepts/decision-theoretic-inputs.md``).
Raises:
ValueError: When ``posterior.observed`` is ``None``; when
*treatment* is not one of the observed treatment names; or
when the segment's rule matches zero visitors.
TypeError: When *posterior* is not an accepted result type.
"""
observed = require_observed(posterior)
if treatment not in observed.treatment_names:
raise ValueError(
f"unknown treatment {treatment!r}; valid treatment names: "
f"{list(observed.treatment_names)}"
)
if thresholds is None:
thresholds = DecisionThresholds()
j = observed.treatment_names.index(treatment)
contrast = contrast_samples(posterior)[:, :, j] # (n, S)
if segment is not None:
members = segment_member_mask(concat_visitors(observed), segment)
contrast = contrast[members]
lift = contrast.mean(axis=0) # (S,)
probability_positive = float(np.mean(lift > 0.0))
probability_better = float(np.mean(lift > min_practical_effect))
probability_harmful = float(np.mean(lift < -min_practical_effect))
expected_loss_baseline = float(np.maximum(lift, 0.0).mean())
expected_loss_comparison = float(np.maximum(-lift, 0.0).mean())
# Same priority order as compare.variants.recommendation_summary.
if (
expected_loss_comparison < thresholds.expected_loss_tolerance
and probability_positive > thresholds.p_positive_threshold
and probability_better > thresholds.p_better_threshold
):
decision = Decision.SHIP
elif probability_harmful > thresholds.harm_threshold:
decision = Decision.STOP
elif probability_better < thresholds.futility_threshold:
decision = Decision.STOP
else:
decision = Decision.CONTINUE
return RecommendationSummary(
treatment=treatment,
decision=decision,
expected_loss_baseline=expected_loss_baseline,
expected_loss_comparison=expected_loss_comparison,
probability_positive=probability_positive,
probability_better=probability_better,
probability_harmful=probability_harmful,
thresholds={
"expected_loss_tolerance": thresholds.expected_loss_tolerance,
"p_positive_threshold": thresholds.p_positive_threshold,
"p_better_threshold": thresholds.p_better_threshold,
"futility_threshold": thresholds.futility_threshold,
"harm_threshold": thresholds.harm_threshold,
},
expected_value_of_one_more_round=(
_expected_value_of_one_more_round(lift)
),
)
def _expected_value_of_one_more_round(lift: np.ndarray) -> float:
"""Closed-form preposterior EVSI of one more round at the same n.
Normal-approximate the lift posterior (``mu = lift.mean()``, ``sigma =
lift.std(ddof=1)``); one more round at the same per-round n doubles the
data, so the preposterior sd of the future posterior mean is
``s = sigma / sqrt(2)``, and the Raiffa–Schlaifer two-action value of
sample information is ``s * (pdf(z) - z * cdf(-z))`` with
``z = |mu| / s`` (see ``docs/concepts/decision-theoretic-inputs.md``).
Degenerate sigma (zero or non-finite) returns exactly ``0.0`` — this
helper never returns NaN.
"""
if lift.size < 2:
# A single draw carries no spread information — ddof=1 would emit
# numpy RuntimeWarnings before producing the NaN the guard below
# catches. Return the degenerate value directly, warning-free.
return 0.0
mu = float(lift.mean())
sigma = float(lift.std(ddof=1))
if sigma == 0.0 or not math.isfinite(sigma):
return 0.0
s = sigma / math.sqrt(2.0)
z = abs(mu) / s
return float(s * (norm.pdf(z) - z * norm.cdf(-z)))