Source code for pytyche.analysis._thompson

"""Thompson allocation over discovered segments.

Implementation behind ``posterior.thompson_allocation(...)`` on the three
posterior result types.  Per segment, each arm's allocation weight is the
posterior probability that it is the segment's best arm — the win
frequency of the shared best-arm rule
(:func:`pytyche.analysis._contrasts.best_arms`) over draws of the
segment-mean contrast vector, control included.  The epsilon floor is an
exact iterative floor-clip — inert when no raw frequency is below
``epsilon / K``.
"""

from __future__ import annotations

from collections.abc import Sequence

import numpy as np

from pytyche.analysis._contrasts import (
    AnyBCFResult,
    best_arms,
    concat_visitors,
    contrast_samples,
    require_observed,
    segment_member_mask,
)
from pytyche.contracts import DiscoveredSegment


[docs] def thompson_allocation( posterior: AnyBCFResult, segments: Sequence[DiscoveredSegment], epsilon: float = 0.02, ) -> dict[int, dict[str, float]]: """Per-segment traffic split: each arm's weight is the posterior probability that it is the segment's best arm. This is Thompson sampling at segment granularity. Each posterior draw casts one vote per segment: the draw's treatment-vs-control contrasts are averaged over the segment's members, and the vote goes to the treatment with the largest contrast — or to control, when no contrast is positive. An arm's weight is the fraction of draws it wins. Where the posterior is confident, traffic concentrates on the winner; where arms are still close, traffic stays spread across the contenders, so the next round collects evidence exactly where the decision is open. Args: posterior: One of the three posterior result types, carrying observed data (raises otherwise). segments: Segments to allocate over — typically ``fit_policy_tree(...).segments``. Only ``id`` and ``rule`` are consumed. Membership is each segment's rule applied to the concatenated visitor rows (all of ``variants[0]``'s rows, then ``variants[1]``'s, and so on — the same row order every per-visitor sample array uses). epsilon: Safety-net exploration floor: arms whose win frequency falls below ``epsilon / K`` are raised to exactly ``epsilon / K`` and the rest rescaled to preserve sum-to-1, iterating until stable — so no arm's traffic is starved to zero. Inert when every arm is already above the floor; ``0.0`` returns raw win frequencies verbatim. This is NOT the dial for how much traffic stays on control — that is ``min_control_weight`` / ``min_explore_weight`` on ``pt.sequential_experiment``. In the canonical Control + Explore + Optimized cell structure this floor is mostly redundant; rarely worth overriding. Returns: ``{segment.id: {variant_name: weight}}`` — inner dicts in variant order (control first), control included, summing to 1. Raises: ValueError: When ``posterior.observed`` is ``None``; when *epsilon* is outside ``[0, 1]`` (the floor-clip is only well-defined on that range — ``epsilon > 1`` would demand per-arm floors that cannot sum to 1); or when a segment's rule matches zero visitors (an empty mean over members would silently produce NaN weights). TypeError: When *posterior* is not an accepted result type. """ if not 0.0 <= epsilon <= 1.0: raise ValueError( f"thompson_allocation: epsilon must be in [0, 1], got {epsilon}" ) observed = require_observed(posterior) contrasts = contrast_samples(posterior) # (n, S, K-1) variant_names = (observed.control_name, *observed.treatment_names) k = len(variant_names) # The pinned extraction row order: variant-list concat. visitors = concat_visitors(observed) allocation: dict[int, dict[str, float]] = {} for seg in segments: members = segment_member_mask(visitors, seg) mean_contrasts = contrasts[members].mean(axis=0) # (S, K-1) raw = arm_win_frequencies(mean_contrasts, k) weights = iterative_floor_clip(raw, epsilon) allocation[seg.id] = { name: float(w) for name, w in zip(variant_names, weights, strict=True) } return allocation
def arm_win_frequencies(mean_contrasts: np.ndarray, n_arms: int) -> np.ndarray: """Raw per-arm win frequencies over draws under the shared best-arm rule. Each row of *mean_contrasts* (shape ``(S, K - 1)``) is one posterior draw's segment-mean contrast vector; the draw's winner is :func:`pytyche.analysis._contrasts.best_arms` (control included). The shared internal behind both ``thompson_allocation`` and ``fit_policy_tree``'s per-leaf ``arm_best_probabilities`` — the two surfaces MUST agree draw-for-draw, so they share this code path. Args: mean_contrasts: Per-draw contrast vectors, shape ``(S, K - 1)``. n_arms: Total arm count K (control plus treatments). Returns: Float array of shape ``(n_arms,)`` summing to 1 — arm ``a``'s win frequency over the S draws. """ winners = best_arms(mean_contrasts) # (S,) counts = np.bincount(winners, minlength=n_arms) return counts / counts.sum() def iterative_floor_clip(raw: np.ndarray, epsilon: float) -> np.ndarray: """Exact iterative floor-clip of *raw* (sums to 1) at ``epsilon / K``. Arms below the floor are raised to exactly ``epsilon / K`` and fixed; the remaining arms are rescaled so the total stays 1; iterate until no unfixed arm is below the floor (converges in <= K passes for ``epsilon <= 1``). INERT when nothing is below the floor — *raw* is returned unchanged (no mixing toward uniform; exploration belongs to the experiment's Explore cell, not this safety net). """ k = raw.shape[0] floor = epsilon / k if not np.any(raw < floor): return raw weights = raw.astype(float).copy() fixed = np.zeros(k, dtype=bool) while True: below = ~fixed & (weights < floor) if not below.any(): return weights weights[below] = floor fixed |= below free = ~fixed free_target = 1.0 - floor * int(fixed.sum()) weights[free] *= free_target / weights[free].sum()