"""Thompson allocation over discovered segments.
Implementation behind ``posterior.thompson_allocation(...)`` on the three
posterior result types. Per segment, each arm's allocation weight is the
posterior probability that it is the segment's best arm — the win
frequency of the shared best-arm rule
(:func:`pytyche.analysis._contrasts.best_arms`) over draws of the
segment-mean contrast vector, control included. The epsilon floor is an
exact iterative floor-clip — inert when no raw frequency is below
``epsilon / K``.
"""
from __future__ import annotations
from collections.abc import Sequence
import numpy as np
from pytyche.analysis._contrasts import (
AnyBCFResult,
best_arms,
concat_visitors,
contrast_samples,
require_observed,
segment_member_mask,
)
from pytyche.contracts import DiscoveredSegment
[docs]
def thompson_allocation(
posterior: AnyBCFResult,
segments: Sequence[DiscoveredSegment],
epsilon: float = 0.02,
) -> dict[int, dict[str, float]]:
"""Per-segment traffic split: each arm's weight is the posterior
probability that it is the segment's best arm.
This is Thompson sampling at segment granularity. Each posterior draw
casts one vote per segment: the draw's treatment-vs-control contrasts
are averaged over the segment's members, and the vote goes to the
treatment with the largest contrast — or to control, when no contrast
is positive. An arm's weight is the fraction of draws it wins. Where
the posterior is confident, traffic concentrates on the winner; where
arms are still close, traffic stays spread across the contenders, so
the next round collects evidence exactly where the decision is open.
Args:
posterior: One of the three posterior result types, carrying
observed data (raises otherwise).
segments: Segments to allocate over — typically
``fit_policy_tree(...).segments``. Only ``id`` and ``rule``
are consumed. Membership is each segment's rule applied to
the concatenated visitor rows (all of ``variants[0]``'s rows,
then ``variants[1]``'s, and so on — the same row order every
per-visitor sample array uses).
epsilon: Safety-net exploration floor: arms whose win frequency
falls below ``epsilon / K`` are raised to exactly
``epsilon / K`` and the rest rescaled to preserve sum-to-1,
iterating until stable — so no arm's traffic is starved to
zero. Inert when every arm is already above the floor;
``0.0`` returns raw win frequencies verbatim. This is NOT
the dial for how much traffic stays on control — that is
``min_control_weight`` / ``min_explore_weight`` on
``pt.sequential_experiment``. In the canonical
Control + Explore + Optimized cell structure this floor is
mostly redundant; rarely worth overriding.
Returns:
``{segment.id: {variant_name: weight}}`` — inner dicts in variant
order (control first), control included, summing to 1.
Raises:
ValueError: When ``posterior.observed`` is ``None``; when
*epsilon* is outside ``[0, 1]`` (the floor-clip is only
well-defined on that range — ``epsilon > 1`` would demand
per-arm floors that cannot sum to 1); or when a segment's
rule matches zero visitors (an empty mean over members would
silently produce NaN weights).
TypeError: When *posterior* is not an accepted result type.
"""
if not 0.0 <= epsilon <= 1.0:
raise ValueError(
f"thompson_allocation: epsilon must be in [0, 1], got {epsilon}"
)
observed = require_observed(posterior)
contrasts = contrast_samples(posterior) # (n, S, K-1)
variant_names = (observed.control_name, *observed.treatment_names)
k = len(variant_names)
# The pinned extraction row order: variant-list concat.
visitors = concat_visitors(observed)
allocation: dict[int, dict[str, float]] = {}
for seg in segments:
members = segment_member_mask(visitors, seg)
mean_contrasts = contrasts[members].mean(axis=0) # (S, K-1)
raw = arm_win_frequencies(mean_contrasts, k)
weights = iterative_floor_clip(raw, epsilon)
allocation[seg.id] = {
name: float(w)
for name, w in zip(variant_names, weights, strict=True)
}
return allocation
def arm_win_frequencies(mean_contrasts: np.ndarray, n_arms: int) -> np.ndarray:
"""Raw per-arm win frequencies over draws under the shared best-arm rule.
Each row of *mean_contrasts* (shape ``(S, K - 1)``) is one posterior
draw's segment-mean contrast vector; the draw's winner is
:func:`pytyche.analysis._contrasts.best_arms` (control included). The
shared internal behind both ``thompson_allocation`` and
``fit_policy_tree``'s per-leaf ``arm_best_probabilities`` — the two
surfaces MUST agree draw-for-draw, so they share this code path.
Args:
mean_contrasts: Per-draw contrast vectors, shape ``(S, K - 1)``.
n_arms: Total arm count K (control plus treatments).
Returns:
Float array of shape ``(n_arms,)`` summing to 1 — arm ``a``'s win
frequency over the S draws.
"""
winners = best_arms(mean_contrasts) # (S,)
counts = np.bincount(winners, minlength=n_arms)
return counts / counts.sum()
def iterative_floor_clip(raw: np.ndarray, epsilon: float) -> np.ndarray:
"""Exact iterative floor-clip of *raw* (sums to 1) at ``epsilon / K``.
Arms below the floor are raised to exactly ``epsilon / K`` and fixed;
the remaining arms are rescaled so the total stays 1; iterate until no
unfixed arm is below the floor (converges in <= K passes for
``epsilon <= 1``). INERT when nothing is below the floor — *raw* is
returned unchanged (no mixing toward uniform; exploration belongs to
the experiment's Explore cell, not this safety net).
"""
k = raw.shape[0]
floor = epsilon / k
if not np.any(raw < floor):
return raw
weights = raw.astype(float).copy()
fixed = np.zeros(k, dtype=bool)
while True:
below = ~fixed & (weights < floor)
if not below.any():
return weights
weights[below] = floor
fixed |= below
free = ~fixed
free_target = 1.0 - floor * int(fixed.sum())
weights[free] *= free_target / weights[free].sum()