Source code for pytyche.analysis._thompson

"""Thompson allocation over discovered segments.

Implementation behind ``posterior.thompson_allocation(...)`` on the three
posterior result types.  Per segment, each arm's allocation weight is the
posterior probability that it is the segment's best arm — the win
frequency of the shared best-arm rule
(:func:`pytyche.analysis._contrasts.best_arms`) over draws of the
segment-mean contrast vector, control included.  The epsilon floor is an
exact iterative floor-clip — inert when no raw frequency is below
``epsilon / K``.
"""

from __future__ import annotations

from collections.abc import Sequence

import numpy as np

from pytyche.analysis._contrasts import (
    AnyBCFResult,
    best_arms,
    concat_visitors,
    contrast_samples,
    require_observed,
    segment_member_mask,
)
from pytyche.contracts import DiscoveredSegment



[docs]
def thompson_allocation(
    posterior: AnyBCFResult,
    segments: Sequence[DiscoveredSegment],
    epsilon: float = 0.02,
) -> dict[int, dict[str, float]]:
    """Per-segment traffic split: each arm's weight is the posterior
    probability that it is the segment's best arm.

    This is Thompson sampling at segment granularity.  Each posterior draw
    casts one vote per segment: the draw's treatment-vs-control contrasts
    are averaged over the segment's members, and the vote goes to the
    treatment with the largest contrast — or to control, when no contrast
    is positive.  An arm's weight is the fraction of draws it wins.  Where
    the posterior is confident, traffic concentrates on the winner; where
    arms are still close, traffic stays spread across the contenders, so
    the next round collects evidence exactly where the decision is open.

    Args:
        posterior: One of the three posterior result types, carrying
            observed data (raises otherwise).
        segments: Segments to allocate over — typically
            ``fit_policy_tree(...).segments``.  Only ``id`` and ``rule``
            are consumed.  Membership is each segment's rule applied to
            the concatenated visitor rows (all of ``variants[0]``'s rows,
            then ``variants[1]``'s, and so on — the same row order every
            per-visitor sample array uses).
        epsilon: Safety-net exploration floor: arms whose win frequency
            falls below ``epsilon / K`` are raised to exactly
            ``epsilon / K`` and the rest rescaled to preserve sum-to-1,
            iterating until stable — so no arm's traffic is starved to
            zero.  Inert when every arm is already above the floor;
            ``0.0`` returns raw win frequencies verbatim.  This is NOT
            the dial for how much traffic stays on control — that is
            ``min_control_weight`` / ``min_explore_weight`` on
            ``pt.sequential_experiment``.  In the canonical
            Control + Explore + Optimized cell structure this floor is
            mostly redundant; rarely worth overriding.

    Returns:
        ``{segment.id: {variant_name: weight}}`` — inner dicts in variant
        order (control first), control included, summing to 1.

    Raises:
        ValueError: When ``posterior.observed`` is ``None``; when
            *epsilon* is outside ``[0, 1]`` (the floor-clip is only
            well-defined on that range — ``epsilon > 1`` would demand
            per-arm floors that cannot sum to 1); or when a segment's
            rule matches zero visitors (an empty mean over members would
            silently produce NaN weights).
        TypeError: When *posterior* is not an accepted result type.
    """
    if not 0.0 <= epsilon <= 1.0:
        raise ValueError(
            f"thompson_allocation: epsilon must be in [0, 1], got {epsilon}"
        )
    observed = require_observed(posterior)
    contrasts = contrast_samples(posterior)  # (n, S, K-1)
    variant_names = (observed.control_name, *observed.treatment_names)
    k = len(variant_names)

    # The pinned extraction row order: variant-list concat.
    visitors = concat_visitors(observed)

    allocation: dict[int, dict[str, float]] = {}
    for seg in segments:
        members = segment_member_mask(visitors, seg)
        mean_contrasts = contrasts[members].mean(axis=0)  # (S, K-1)
        raw = arm_win_frequencies(mean_contrasts, k)
        weights = iterative_floor_clip(raw, epsilon)
        allocation[seg.id] = {
            name: float(w)
            for name, w in zip(variant_names, weights, strict=True)
        }
    return allocation



def arm_win_frequencies(mean_contrasts: np.ndarray, n_arms: int) -> np.ndarray:
    """Raw per-arm win frequencies over draws under the shared best-arm rule.

    Each row of *mean_contrasts* (shape ``(S, K - 1)``) is one posterior
    draw's segment-mean contrast vector; the draw's winner is
    :func:`pytyche.analysis._contrasts.best_arms` (control included).  The
    shared internal behind both ``thompson_allocation`` and
    ``fit_policy_tree``'s per-leaf ``arm_best_probabilities`` — the two
    surfaces MUST agree draw-for-draw, so they share this code path.

    Args:
        mean_contrasts: Per-draw contrast vectors, shape ``(S, K - 1)``.
        n_arms: Total arm count K (control plus treatments).

    Returns:
        Float array of shape ``(n_arms,)`` summing to 1 — arm ``a``'s win
        frequency over the S draws.
    """
    winners = best_arms(mean_contrasts)  # (S,)
    counts = np.bincount(winners, minlength=n_arms)
    return counts / counts.sum()


def iterative_floor_clip(raw: np.ndarray, epsilon: float) -> np.ndarray:
    """Exact iterative floor-clip of *raw* (sums to 1) at ``epsilon / K``.

    Arms below the floor are raised to exactly ``epsilon / K`` and fixed;
    the remaining arms are rescaled so the total stays 1; iterate until no
    unfixed arm is below the floor (converges in <= K passes for
    ``epsilon <= 1``).  INERT when nothing is below the floor — *raw* is
    returned unchanged (no mixing toward uniform; exploration belongs to
    the experiment's Explore cell, not this safety net).
    """
    k = raw.shape[0]
    floor = epsilon / k
    if not np.any(raw < floor):
        return raw
    weights = raw.astype(float).copy()
    fixed = np.zeros(k, dtype=bool)
    while True:
        below = ~fixed & (weights < floor)
        if not below.any():
            return weights
        weights[below] = floor
        fixed |= below
        free = ~fixed
        free_target = 1.0 - floor * int(fixed.sum())
        weights[free] *= free_target / weights[free].sum()