{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Leave-one-model-out (LOMO) replicate tables\n",
    "\n",
    "Mirrors what `bootstrap_ci.py --leave-one-out --seed 42 --n-resamples 1000` would produce, but in rendered table form for paper Appendix E.\n",
    "Reads `../results/results.jsonl.zst`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json, math, pathlib\n",
    "from collections import defaultdict\n",
    "import numpy as np\n",
    "import zstandard\n",
    "\n",
    "RESULTS = pathlib.Path('../results/results.jsonl.zst')\n",
    "\n",
    "def load_jsonl(path):\n",
    "    p = pathlib.Path(path)\n",
    "    if p.suffix == '.zst':\n",
    "        with p.open('rb') as f:\n",
    "            raw = zstandard.ZstdDecompressor().decompress(f.read(), max_output_size=2 << 30)\n",
    "        for line in raw.decode('utf-8').splitlines():\n",
    "            if line.strip():\n",
    "                yield json.loads(line)\n",
    "    else:\n",
    "        with p.open() as f:\n",
    "            for line in f:\n",
    "                if line.strip():\n",
    "                    yield json.loads(line)\n",
    "\n",
    "def geomean(xs):\n",
    "    if not xs: return float('nan')\n",
    "    return math.exp(sum(math.log(x) for x in xs if x > 0) / len(xs))\n",
    "\n",
    "def model_of(row):\n",
    "    return (row.get('source_model') or row.get('model')\n",
    "            or row.get('path','').split('/')[-1].split('__')[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Collect (model, ratio) per (iter, kind, method)\n",
    "buckets = defaultdict(list)\n",
    "for r in load_jsonl(RESULTS):\n",
    "    iter_tag = r.get('_iter','?'); kind = r.get('_kind','?')\n",
    "    if 'methods' in r:\n",
    "        for m_name, m_data in r['methods'].items():\n",
    "            if not isinstance(m_data, dict): continue\n",
    "            ratio = m_data.get('ratio'); ok = m_data.get('verified')\n",
    "            if ratio is None or not (ok is True or 'trusted' in str(ok)): continue\n",
    "            buckets[(iter_tag, kind, m_name)].append((model_of(r), ratio))\n",
    "print(f'methods aggregated: {len(buckets)}')\n",
    "\n",
    "# LOMO table for bf16_split + qb_k4 + decomp_perstream_zstd19_bgscale\n",
    "import pandas as pd\n",
    "def lomo_table(method_filter):\n",
    "    rows = []\n",
    "    for (it, kind, m), pairs in buckets.items():\n",
    "        if not method_filter(m): continue\n",
    "        by_model = defaultdict(list)\n",
    "        for mdl, ratio in pairs:\n",
    "            by_model[mdl].append(ratio)\n",
    "        all_models = sorted(by_model)\n",
    "        if len(all_models) < 2: continue\n",
    "        for hold in all_models:\n",
    "            rest = [r for m, rs in by_model.items() if m != hold for r in rs]\n",
    "            rows.append({\n",
    "                'iter': it, 'kind': kind, 'method': m, 'held_out': hold,\n",
    "                'n_models_in_rest': len(all_models)-1,\n",
    "                'n_tensors_in_rest': len(rest), 'geomean_rest': round(geomean(rest), 5),\n",
    "                'n_tensors_held': len(by_model[hold]), 'geomean_held': round(geomean(by_model[hold]), 5),\n",
    "            })\n",
    "    return pd.DataFrame(rows)\n",
    "\n",
    "print('--- bf16_split LOMO ---')\n",
    "print(lomo_table(lambda m: m == 'bf16_split').to_markdown(index=False))\n",
    "print()\n",
    "print('--- qb_k4 LOMO (Q4_K) ---')\n",
    "print(lomo_table(lambda m: m == 'qb_k4').to_markdown(index=False))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
  "language_info": {"name": "python", "version": "3.11"}
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
