From 53681a5a1f644d33a63cd001961bdf5ea543b353 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 03:19:36 +0000 Subject: [PATCH 1/8] Round 1: fix LaTeX-in-emphasis/blockquote rendering at the root The card pipeline ran marked before KaTeX, so markdown emphasis rules consumed any formula containing * or _ (e.g. Q^*(s,a)) and split the $...$ span, leaving KaTeX nothing clean to render. Add docs/js/math-katex.js: marked extensions that tokenize $...$ and $$...$$ before emphasis runs and render each span straight to KaTeX HTML. Math now survives emphasis, blockquotes, tables and list items. - tools/test_math_render.cjs: deterministic Node regression test (6 cases) - tools/audit_card_links.py + .link_baseline.json: ratcheting check for inert internal cross-links (current baseline 91 dead / 12 orphan cards) so the count can only go down; wired both into validate.yml - broaden validate.yml path filter to docs/** so JS changes are covered --- .github/workflows/validate.yml | 8 +- docs/js/atlas-cards.js | 5 ++ docs/js/math-katex.js | 85 ++++++++++++++++++ tools/.link_baseline.json | 4 + tools/audit_card_links.py | 153 +++++++++++++++++++++++++++++++++ tools/test_math_render.cjs | 86 ++++++++++++++++++ 6 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 docs/js/math-katex.js create mode 100644 tools/.link_baseline.json create mode 100644 tools/audit_card_links.py create mode 100644 tools/test_math_render.cjs diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 1ec8126..335f704 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -3,14 +3,14 @@ name: Validate graph & links on: pull_request: paths: - - 'docs/data/**' + - 'docs/**' - 'concepts.md' - 'tools/**' - '.github/workflows/validate.yml' push: branches: [main] paths: - - 'docs/data/**' + - 'docs/**' - 'concepts.md' - 'tools/**' @@ -23,6 +23,10 @@ jobs: with: { python-version: '3.11' } - name: Validate graph.json run: python tools/validate_graph.py + - name: Check internal card cross-links + run: python tools/audit_card_links.py + - name: Render math regression test (marked + KaTeX) + run: node tools/test_math_render.cjs - name: Check external deep links run: python tools/check_links.py --max-failures 5 continue-on-error: true diff --git a/docs/js/atlas-cards.js b/docs/js/atlas-cards.js index 4434f9e..c063c73 100644 --- a/docs/js/atlas-cards.js +++ b/docs/js/atlas-cards.js @@ -11,6 +11,8 @@ // problem / insight / validation / paradigm), the merged graph carries // a `summary_zh` plus typed adjacency. We synthesize a card from that. +import { installMathExtension } from "./math-katex.js"; + const FRONT = /^---[\r\n]+([\s\S]*?)[\r\n]+---/; const cardCache = new Map(); const RAW_BASE = "data/cards"; @@ -405,6 +407,9 @@ export class CardRenderer { } _mdToHtml(body) { + // Tokenize math BEFORE markdown emphasis can split a `$…$` span (formulae + // with `*` or `_` such as `Q^*` were being mangled into broken KaTeX). + if (window.marked && window.katex) installMathExtension(window.marked, window.katex); // strip CR characters and let marked.parse handle the rest const rendered = window.marked ? window.marked.parse(body) : `
${this._escape(body)}
`; const sanitised = window.DOMPurify ? window.DOMPurify.sanitize(rendered, { ADD_ATTR: ["data-jump", "target", "rel"] }) : rendered; diff --git a/docs/js/math-katex.js b/docs/js/math-katex.js new file mode 100644 index 0000000..fe1350a --- /dev/null +++ b/docs/js/math-katex.js @@ -0,0 +1,85 @@ +// Math rendering for `marked`. +// +// Problem this solves: the previous pipeline ran `marked` first and KaTeX's +// `renderMathInElement` afterwards. Markdown emphasis rules run during the +// first pass, so any formula containing `*` or `_` — `Q^*(s,a)`, a stray +// `a_i ... a_j` pair, multiplication `w * x` — gets eaten as ``/`` +// and the `$…$` span is split across an element boundary. KaTeX then cannot +// find a clean delimiter pair and the formula renders broken. This is the +// "formula inside emphasis / inside a blockquote" failure. +// +// Fix: register `marked` extensions that tokenize `$…$` and `$$…$$` BEFORE any +// emphasis rule can touch them, and render each span straight to KaTeX HTML. +// Math therefore survives emphasis, blockquotes, list items and table cells. +// +// The module is deterministic and side-effect free apart from the one-time +// `marked.use(...)` registration, guarded by a flag so repeated calls are safe. + +export function installMathExtension(marked, katex) { + if (!marked || !katex || marked.__mathInstalled) return; + + const render = (tex, display) => { + try { + return katex.renderToString(tex, { + displayMode: display, + throwOnError: false, + output: "htmlAndMathml", + strict: "ignore", + trust: false, + }); + } catch (_e) { + // Degrade to an escaped, readable fallback rather than throwing. + const safe = String(tex).replace(/[<>&]/g, c => ({ "<": "<", ">": ">", "&": "&" }[c])); + return display + ? `
${safe}
` + : `${safe}`; + } + }; + + // Block-level display math sitting on its own line(s): `$$ … $$`. + const blockMath = { + name: "blockMath", + level: "block", + start(src) { + const i = src.indexOf("$$"); + return i < 0 ? undefined : i; + }, + tokenizer(src) { + const m = /^\$\$([\s\S]+?)\$\$/.exec(src); + if (m) return { type: "blockMath", raw: m[0], text: m[1].trim() }; + return undefined; + }, + renderer(token) { + return `
${render(token.text, true)}
`; + }, + }; + + // Inline math. Handles `$$…$$` that appears mid-paragraph as display math, + // and ordinary `$…$` as inline math. The negative lookaheads keep the two + // delimiter widths from colliding. + const inlineMath = { + name: "inlineMath", + level: "inline", + start(src) { + const i = src.indexOf("$"); + return i < 0 ? undefined : i; + }, + tokenizer(src) { + let m = /^\$\$(?!\$)([\s\S]+?)\$\$/.exec(src); + if (m) return { type: "inlineMath", raw: m[0], text: m[1].trim(), display: true }; + // Single-dollar inline math. Allow escaped `\$` inside; forbid newlines + // and bare `$` so we never run away to the end of the document. + m = /^\$(?!\$)((?:\\\$|[^\n$])+?)\$(?!\$)/.exec(src); + if (m) return { type: "inlineMath", raw: m[0], text: m[1], display: false }; + return undefined; + }, + renderer(token) { + return token.display + ? `${render(token.text, true)}` + : render(token.text, false); + }, + }; + + marked.use({ extensions: [blockMath, inlineMath] }); + marked.__mathInstalled = true; +} diff --git a/tools/.link_baseline.json b/tools/.link_baseline.json new file mode 100644 index 0000000..4c45c83 --- /dev/null +++ b/tools/.link_baseline.json @@ -0,0 +1,4 @@ +{ + "max_inert": 91, + "max_orphans": 12 +} diff --git a/tools/audit_card_links.py b/tools/audit_card_links.py new file mode 100644 index 0000000..5d55305 --- /dev/null +++ b/tools/audit_card_links.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +"""Audit internal cross-links inside the extended cards. + +A card link like `[CQL](paper_cql.md)` only becomes a live jump in the UI if it +resolves to a node that exists in docs/data/graph_extended.json. The front-end +(docs/js/atlas-cards.js `_rewriteCardLinks`) resolves a link in two ways: + + * a bare node id `paper:cql` + * a card filename `paper_cql.md` -> `paper:cql`, falling back to the + leading id chunk `paper:cql` (text before first `_`) + +If neither resolves, the link is rendered inert (a dead end for the reader). +This script mirrors that resolution exactly, counts the inert links, and fails +when the count exceeds a recorded baseline so the number can only ratchet down. + +It also reports card files under cards/extended/ that have no matching node +(orphan cards the graph can never surface). + +Usage: + python tools/audit_card_links.py # gate against the baseline + python tools/audit_card_links.py --report # print every offender, never fail + python tools/audit_card_links.py --update-baseline # record current count +""" +from __future__ import annotations + +import json +import re +import sys +from collections import Counter, defaultdict +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +GRAPH = ROOT / "docs" / "data" / "graph_extended.json" +CARDS = ROOT / "docs" / "data" / "cards" +BASELINE = ROOT / "tools" / ".link_baseline.json" + +KIND_PREFIXES = { + "paper", "paradigm", "insight", "validation", "problem", "move", + "concept", "essay", "course", "channel", "lab", +} +LINK_RE = re.compile(r"\]\(([^)]+)\)") +BARE_RE = re.compile(r"^([a-z]+):([A-Za-z0-9._-]+)$") +FILE_RE = re.compile(r"(?:^|/)([a-z]+)[_:]([A-Za-z0-9._-]+?)(?:\.md)?$") + + +def load_ids() -> set[str]: + g = json.loads(GRAPH.read_text(encoding="utf-8")) + return {n["id"] for n in g["nodes"]} + + +def resolve(href: str, ids: set[str]): + """Return ('node', resolved_id_or_None) for internal node links, else ('skip', None).""" + href = href.split()[0].strip() if href.strip() else href + if re.match(r"^(https?:|mailto:|tel:|#)", href, re.I): + return ("skip", None) + if "concepts.md" in href: + return ("skip", None) + if re.search(r"\.ipynb($|[?#])", href, re.I): + return ("skip", None) + m = BARE_RE.match(href) + if m and m.group(1) in KIND_PREFIXES: + cid = f"{m.group(1)}:{m.group(2)}" + return ("node", cid if cid in ids else None) + m = FILE_RE.search(href) + if m and m.group(1) in KIND_PREFIXES and href.endswith(".md"): + kind, rest = m.group(1), m.group(2) + full = f"{kind}:{rest}" + if full in ids: + return ("node", full) + if "_" in rest: + head = f"{kind}:{rest.split('_')[0]}" + if head in ids: + return ("node", head) + return ("node", None) + return ("skip", None) + + +def main() -> int: + report = "--report" in sys.argv + update = "--update-baseline" in sys.argv + ids = load_ids() + + total = 0 + inert = 0 + missing: Counter[str] = Counter() + referrers: dict[str, list[str]] = defaultdict(list) + + for path in sorted(CARDS.rglob("*.md")): + text = path.read_text(encoding="utf-8") + for m in LINK_RE.finditer(text): + kind, resolved = resolve(m.group(1), ids) + if kind != "node": + continue + total += 1 + if resolved is None: + inert += 1 + fm = FILE_RE.search(m.group(1).split()[0]) + key = f"{fm.group(1)}:{fm.group(2)}" if fm else m.group(1) + missing[key] += 1 + if len(referrers[key]) < 3: + referrers[key].append(path.name) + + # Orphan cards: a card file under cards/extended whose node is absent. + orphans = [] + for path in sorted((CARDS / "extended").glob("*.md")): + stem = path.stem + m = re.match(r"^([a-z]+)_(.+)$", stem) + if not m: + continue + kind = m.group(1) + if kind not in KIND_PREFIXES: + continue + cid = f"{kind}:{m.group(2)}" + if cid not in ids and not any(i.endswith(":" + m.group(2)) for i in ids): + orphans.append(path.name) + + print(f"internal node-links: {total} INERT(dead): {inert} unique dead ids: {len(missing)}") + if report or inert: + print("\ntop dead node ids (count, sample referrers):") + for key, cnt in missing.most_common(60): + print(f" {cnt:3d} {key:55s} e.g. {referrers[key]}") + if orphans: + print(f"\norphan cards (file present, no node) [{len(orphans)}]:") + for o in orphans[:40]: + print(f" {o}") + + base = {"max_inert": inert, "max_orphans": len(orphans)} + if BASELINE.exists(): + base = json.loads(BASELINE.read_text(encoding="utf-8")) + + if update: + BASELINE.write_text(json.dumps({"max_inert": inert, "max_orphans": len(orphans)}, indent=2) + "\n", encoding="utf-8") + print(f"\nbaseline updated -> max_inert={inert} max_orphans={len(orphans)}") + return 0 + + if report: + return 0 + + failed = False + if inert > base.get("max_inert", inert): + print(f"\nFAIL: inert links {inert} exceed baseline {base['max_inert']} (new dead links introduced).") + failed = True + if len(orphans) > base.get("max_orphans", len(orphans)): + print(f"\nFAIL: orphan cards {len(orphans)} exceed baseline {base['max_orphans']}.") + failed = True + if failed: + return 1 + print(f"OK within baseline (max_inert={base.get('max_inert')}, max_orphans={base.get('max_orphans')}).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_math_render.cjs b/tools/test_math_render.cjs new file mode 100644 index 0000000..897433d --- /dev/null +++ b/tools/test_math_render.cjs @@ -0,0 +1,86 @@ +#!/usr/bin/env node +/* + * Regression test for the marked + KaTeX math pipeline. + * + * Guards the bug where a formula containing `*` or `_` (e.g. `Q^*(s,a)`) was + * consumed by markdown emphasis before KaTeX ran, splitting the math span and + * rendering broken. After installing docs/js/math-katex.js, every `$…$` and + * `$$…$$` must render to KaTeX HTML with no stray / inside. + * + * Pure Node, no browser, no network. Exit 0 on pass, 1 on failure. + */ +const fs = require("fs"); +const vm = require("vm"); +const path = require("path"); + +const ROOT = path.resolve(__dirname, ".."); + +function loadUmd(file) { + const code = fs.readFileSync(path.join(ROOT, file), "utf8"); + const sandbox = { module: { exports: {} }, console, window: {}, self: {} }; + sandbox.exports = sandbox.module.exports; + sandbox.self = sandbox.window; + vm.createContext(sandbox); + vm.runInContext(code, sandbox); + return sandbox.module.exports && Object.keys(sandbox.module.exports).length + ? sandbox.module.exports + : (sandbox.window && (sandbox.window.marked || sandbox.window.katex)); +} + +function loadEsmFn(file, fnName, scope) { + let code = fs.readFileSync(path.join(ROOT, file), "utf8"); + code = code.replace(/export\s+function\s+/g, "function ").replace(/export\s*\{[^}]*\}\s*;?/g, ""); + const sandbox = Object.assign({ module: { exports: {} }, console }, scope); + vm.createContext(sandbox); + vm.runInContext(code + `\nmodule.exports.${fnName} = ${fnName};`, sandbox); + return sandbox.module.exports[fnName]; +} + +const marked = loadUmd("docs/vendor/marked/marked.umd.js"); +const katex = loadUmd("docs/vendor/katex/katex.min.js"); +const installMathExtension = loadEsmFn("docs/js/math-katex.js", "installMathExtension", {}); +installMathExtension(marked, katex); + +const cases = [ + { name: "inline subscript sequence", md: "动作序列 $\\tau = (a_0, a_1, \\ldots, a_{H-1})$ 是输出。" }, + { name: "emphasis around math with star", md: "*强调里有公式 $\\max_{a'} Q(s',a')$ 会怎样*" }, + { name: "blockquote with Q^* (asterisks)", md: "> 引用块里 $Q^*(s,a)=\\mathbb{E}[r+\\gamma \\max_{a'}Q^*(s',a')]$ 的下标。" }, + { name: "two subscripts then multiplication", md: "行内 $\\pi_\\beta$ 与 $\\pi_\\theta$ 连用 $w_1 \\cdot w_2$。" }, + { name: "display block on own line", md: "前文。\n\n$$Q^*(s,a)=\\mathbb{E}\\big[r+\\gamma \\max_{a'}Q^*(s',a')\\big]$$\n\n后文。" }, + { name: "math inside table cell", md: "| 项 | 公式 |\n|---|---|\n| 损失 | $L=\\sum_i w_i x_i^*$ |" }, +]; + +// Strip KaTeX's own output, then check no `$` survives. If marked failed to +// tokenize a formula it would leave the literal `$…$` text behind (the old +// bug); a clean run consumes every delimiter. We also confirm KaTeX actually +// rendered. (A legitimate author-intended `*emphasis*` wrapping a formula is +// fine — the math just renders as an intact .katex span inside the .) +let failures = 0; +for (const c of cases) { + const out = marked.parse(c.md); + const hasKatex = /class="katex/.test(out); + const withoutKatexText = out.replace(/]*>[\s\S]*?<\/annotation>/g, ""); + const leftoverDollar = /\$/.test(withoutKatexText); + const ok = hasKatex && !leftoverDollar; + if (!ok) { + failures++; + console.log(`FAIL ${c.name}`); + console.log(` katex=${hasKatex} leftover-$=${leftoverDollar}`); + console.log(` ${out.replace(/\n/g, " ").slice(0, 240)}`); + } else { + console.log(`ok ${c.name}`); + } +} + +// Structural checks: blockquote case still wraps in
; table case +// still produces a . +const bq = marked.parse(cases[2].md); +if (!/
/.test(bq)) { failures++; console.log("FAIL blockquote wrapper lost"); } +const tbl = marked.parse(cases[5].md); +if (!/
/.test(tbl)) { failures++; console.log("FAIL table wrapper lost"); } + +if (failures) { + console.log(`\nMATH RENDER TEST FAILED: ${failures} case(s).`); + process.exit(1); +} +console.log(`\nMATH RENDER TEST OK: ${cases.length} cases, emphasis no longer eats math.`); From 9204431a0d042faba86a4553e75c6b48d5b79411 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 03:25:59 +0000 Subject: [PATCH 2/8] =?UTF-8?q?Round=202:=20graph-integrity=20pass=20?= =?UTF-8?q?=E2=80=94=20adopt=20orphan=20cards,=20repoint=20aliases?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recovers broken navigation that the audit surfaced: - Add graph nodes for 8 fully-written but unreachable orphan cards (CARLA Leaderboard, Apollo/Autoware, and six methodology-primitive 'move' cards), each wired to a real referrer so it is not isolated. The prose was already there; only the node was missing, so every link to it rendered inert. - Repoint confirmed alias links across 12 cards (simclr_moco -> simclr_mocov3, diffusion_policy/3ddiff_policy -> diffusion_policy_chi2023, 2309.17080_gaia1 -> gaia1, llama2 -> llama, gaia2 -> gaia1, zhao_shiyu_rl -> zhao_rl, vad -> vadv2). - Rebuild graph_extended.json via merge + repair (507 nodes / 1448 edges). Dead internal cross-links: 91 -> 45; orphan cards: 12 -> 4. Baseline ratcheted. --- docs/data/cards/channel_ez_encoder_academy.md | 2 +- docs/data/cards/course_cs285_levine.md | 4 +- ...tion_features_transfer_without_finetune.md | 2 +- docs/data/cards/extended/paper_bench2drive.md | 4 +- .../cards/extended/paper_carla_leaderboard.md | 2 +- .../paper_diffusion_policy_chi2023.md | 2 +- docs/data/cards/extended/paper_gpt4.md | 2 +- ...digm_scaling_data_with_self_supervision.md | 2 +- ...vior_cloning_compounds_errors_over_time.md | 2 +- ...tion_trace_counterfactual_vla_replanner.md | 2 +- docs/data/cards/paper_mnih2015_dqn.md | 2 +- docs/data/cards/paper_sutton_barto.md | 4 +- docs/data/generated/round2_integrity.json | 95 ++++++++++++ docs/data/graph_extended.json | 144 +++++++++++++++++- docs/data/graph_extended.stats.json | 35 +++-- tools/.link_baseline.json | 4 +- tools/round2_integrity.py | 107 +++++++++++++ 17 files changed, 374 insertions(+), 41 deletions(-) create mode 100644 docs/data/generated/round2_integrity.json create mode 100644 tools/round2_integrity.py diff --git a/docs/data/cards/channel_ez_encoder_academy.md b/docs/data/cards/channel_ez_encoder_academy.md index 0c1ae5f..269e1c1 100644 --- a/docs/data/cards/channel_ez_encoder_academy.md +++ b/docs/data/cards/channel_ez_encoder_academy.md @@ -39,7 +39,7 @@ bibtex: | 3. **把"研究范式之争"的对话搬到中文 timeline 上**——这件事 [Bitter Lesson](essay_bitter_lesson.md) 类宏观议题的中文圈传播尤其重要。 ## 数学锚点 / Math anchor -不适用。本频道不做数学推导,**做的是 *叙事 + 视角***。读者应把它当"播客"听,把数学推导留给 [Zhao 课程](course_zhao_shiyu_rl.md) / [CS285](course_cs285_levine.md) / [3b1b](channel_3blue1brown.md) / [Mu Li](channel_mu_li_bilibili.md)。 +不适用。本频道不做数学推导,**做的是 *叙事 + 视角***。读者应把它当"播客"听,把数学推导留给 [Zhao 课程](course_zhao_rl.md) / [CS285](course_cs285_levine.md) / [3b1b](channel_3blue1brown.md) / [Mu Li](channel_mu_li_bilibili.md)。 ## 架构 / Architectural intuition 频道的核心创作模型,借作者自述:"*deep learning 里的 encoder 把高维信息压成稠密表示——我的频道做同样的事,把 AI 世界压成你能吃下的小包*"。这正是本图谱使用它作为 spine 节点的原因:在"信息洪流"维度,它充当了**人类 encoder**,与本图谱作为**自动化 encoder** 的角色互补。 diff --git a/docs/data/cards/course_cs285_levine.md b/docs/data/cards/course_cs285_levine.md index 25fd513..d132f1d 100644 --- a/docs/data/cards/course_cs285_levine.md +++ b/docs/data/cards/course_cs285_levine.md @@ -34,7 +34,7 @@ bibtex: | 全球公认的**深度 RL** 黄金标准课程;授课人 Sergey Levine 是 deep RL 与机器人学的奠基者之一。开篇先讲 *imitation / behavioral cloning 与协变量偏移*,然后把 *PG / Actor-Critic / DQN / SAC / Model-based RL / IRL / Offline RL* 一网打尽。读完它+赵世钰课,你具备读 [DiLu](paper_2309.16292_dilu.md)、[Agent-Driver](paper_2311.10813_agent_driver.md)、[CF-VLA](paper_2512.24426_cfvla.md) 的全部 RL 背景。 ## 位置 / Why it matters -- 它把 [Zhao](course_zhao_shiyu_rl.md) 的纯公式 RL **嵌进神经网络**:参数化策略、参数化 Q、replay buffer、target network、$\epsilon$-greedy 实现细节、稳定性 trick…… +- 它把 [Zhao](course_zhao_rl.md) 的纯公式 RL **嵌进神经网络**:参数化策略、参数化 Q、replay buffer、target network、$\epsilon$-greedy 实现细节、稳定性 trick…… - 它是 [Bitter Lesson](essay_bitter_lesson.md) 在 RL 教学里**唯一被讲师直接朗读**的开篇短文(Lec 1); - 它的 Lec 2 *imitation learning* 直接对应自动驾驶 BC / E2E 范式的"奠基性失败模式"——*compounding error*——而这正是 [PlanT](paper_2210.14222_plant.md)、[UniAD](paper_2212.10156_uniad.md) 类工作必须面对的问题。 @@ -82,7 +82,7 @@ $$ *Levine 在课程开篇明确放出 Bitter Lesson 作为 deep RL 哲学根;他自己后续工作(offline RL、foundation policy)也基本贯彻"general method + scale"。这门课就是 Bitter Lesson 在 deep RL 子领域的**操作化指南**。* ## 后续节点 / Suggested next nodes -- → [Zhao Mathematical Foundations of RL](course_zhao_shiyu_rl.md) +- → [Zhao Mathematical Foundations of RL](course_zhao_rl.md) - → [Bitter Lesson](essay_bitter_lesson.md) - → [DAgger](paper_ross2011_dagger.md) · [DQN](paper_mnih2015_dqn.md) · [PPO](paper_schulman2017_ppo.md) · [RLHF/DPO](paper_rlhf_dpo.md) - → [DiLu (2309.16292)](paper_2309.16292_dilu.md) · [Agent-Driver](paper_2311.10813_agent_driver.md) diff --git a/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md b/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md index 141f0ef..9d118b1 100644 --- a/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md +++ b/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md @@ -48,4 +48,4 @@ $$\hat{y} = W_\mathcal{T}\, \phi_\theta(\mathbf{x}) + b_\mathcal{T}$$ ## 推演链路 -[BERT / GPT-3](paper_gpt3.md) 在语言上首次观察到"linear probing 即可接近 finetune" → [SimCLR / MoCo](paper_simclr_moco.md) 在视觉对比学习上复现 → [DINOv2](paper_dinov2.md) 把这一性质做到 SOTA → [DINOv3](paper_2508.10104_dinov3.md) 进一步扩展到 patch 级别 → [BLIP-2](paper_blip2.md) 把"冻结基座"做成多模态的标准实践 → [SAM](paper_sam.md) 把"image encoder 一次性算完,prompt 反复用"做成工程默认 → 投影到自动驾驶:冻结主干 + 多任务 head 成为感知栈的事实标准。 +[BERT / GPT-3](paper_gpt3.md) 在语言上首次观察到"linear probing 即可接近 finetune" → [SimCLR / MoCo](paper_simclr_mocov3.md) 在视觉对比学习上复现 → [DINOv2](paper_dinov2.md) 把这一性质做到 SOTA → [DINOv3](paper_2508.10104_dinov3.md) 进一步扩展到 patch 级别 → [BLIP-2](paper_blip2.md) 把"冻结基座"做成多模态的标准实践 → [SAM](paper_sam.md) 把"image encoder 一次性算完,prompt 反复用"做成工程默认 → 投影到自动驾驶:冻结主干 + 多任务 head 成为感知栈的事实标准。 diff --git a/docs/data/cards/extended/paper_bench2drive.md b/docs/data/cards/extended/paper_bench2drive.md index 80a8035..3898711 100644 --- a/docs/data/cards/extended/paper_bench2drive.md +++ b/docs/data/cards/extended/paper_bench2drive.md @@ -25,7 +25,7 @@ $$ $\mathcal{S}$ 是 44 类预设场景集合,每类多次实例化;$\mathrm{DS}$ 沿用 CARLA 的违规惩罚乘子;$\mathrm{success}$ 是 binary 完成;$\mathrm{Smoothness}$ 量化加减速 jerk。最终按场景"能力维度"分组报告,避免单一总分掩盖弱点。 ## 它在图谱里的位置 -Bench2Drive 把 [CARLA Leaderboard 2.0](paper_carla_leaderboard.md) 的协议变成可重复的端到端 protocol,与 [nuPlan](paper_nuplan.md) 形成"合成 + 反应式 vs 真实 + 半反应式"对照;下游被 [UniAD](../paper_2212.10156_uniad.md) / [VAD](../paper_vad.md) 等端到端模型当作主战场。它是 [closed loop data engine centric development paradigm](paradigm_closed_loop_data_engine_centric_development.md) 的关键节点,体现 [closed loop evaluation is the only ground truth for planners](insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md) 与 [offline metrics co evolve with methods so must be re audited](insight_offline_metrics_co_evolve_with_methods_so_must_be_re_audited.md) 两条洞见。 +Bench2Drive 把 [CARLA Leaderboard 2.0](paper_carla_leaderboard.md) 的协议变成可重复的端到端 protocol,与 [nuPlan](paper_nuplan.md) 形成"合成 + 反应式 vs 真实 + 半反应式"对照;下游被 [UniAD](../paper_2212.10156_uniad.md) / [VAD](../paper_vadv2.md) 等端到端模型当作主战场。它是 [closed loop data engine centric development paradigm](paradigm_closed_loop_data_engine_centric_development.md) 的关键节点,体现 [closed loop evaluation is the only ground truth for planners](insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md) 与 [offline metrics co evolve with methods so must be re audited](insight_offline_metrics_co_evolve_with_methods_so_must_be_re_audited.md) 两条洞见。 ## 架构 / 方法直觉 Bench2Drive 不是模型而是 protocol + dataset + baseline 套件。Dataset 部分用 CARLA Leaderboard 2.0 在多张地图上脚本化采集,标注包含语义分割、3D box、占用、轨迹意图、行为目标,覆盖 44 类能力(merge、yield、overtake、急刹、施工 …)。Evaluation 部分把 CARLA 闭环跑通的过程标准化:固定 sensor 配置、固定 NPC 行为种子、固定 routes,每个能力维度单独打分。Baseline 部分把 UniAD、VAD、TCP、ThinkTwice 等公开端到端方法重新在统一 codebase 实现,让数字真正可比。这种"benchmark + reference implementation"组合极大降低了入场门槛。 @@ -43,7 +43,7 @@ Bench2Drive 体现了一个 bitter lesson 副定律:评测协议必须随方 - [CARLA Leaderboard 2.0](paper_carla_leaderboard.md) — 直接前置基础设施 - [nuPlan](paper_nuplan.md) — 真实日志回放对照 - [UniAD](../paper_2212.10156_uniad.md) — 主要参赛端到端模型 -- [VAD](../paper_vad.md) — 矢量化端到端对照 +- [VAD](../paper_vadv2.md) — 矢量化端到端对照 - [InterFuser](paper_interfuser.md) — CARLA 上的另一条主线 - [closed loop data engine centric development paradigm](paradigm_closed_loop_data_engine_centric_development.md) - [insight: closed loop evaluation is the only ground truth for planners](insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md) diff --git a/docs/data/cards/extended/paper_carla_leaderboard.md b/docs/data/cards/extended/paper_carla_leaderboard.md index 5c552ce..10514e3 100644 --- a/docs/data/cards/extended/paper_carla_leaderboard.md +++ b/docs/data/cards/extended/paper_carla_leaderboard.md @@ -38,7 +38,7 @@ CARLA 仿真器是 Unreal Engine 4 渲染、PhysX 物理、自带 NPC 行为树 - Sensor 套件可自定 (摄像头 + LiDAR + IMU),但 Leaderboard 限制总带宽避免参赛者偷传 GT。 ## Bitter-Lesson 视角 -CARLA 把"评估什么是好驾驶"从研究员主观判断升级为可执行的闭环 + 数值化指标,是 bitter lesson 在评测端的胜利。但具体场景库、违规权重 $p_i$、NPC 行为树仍是人工写的——这给后来 [Bench2Drive](paper_bench2drive.md) 与 [Trajeglish](paper_trajeglish.md) 一类"用数据生成 NPC 与场景"的路线留下空间。仿真器自身的保真度仍是上界,[GAIA-1](../paper_2309.17080_gaia1.md) 与 [Cosmos](paper_cosmos.md) 试图把这层也机器学习化。 +CARLA 把"评估什么是好驾驶"从研究员主观判断升级为可执行的闭环 + 数值化指标,是 bitter lesson 在评测端的胜利。但具体场景库、违规权重 $p_i$、NPC 行为树仍是人工写的——这给后来 [Bench2Drive](paper_bench2drive.md) 与 [Trajeglish](paper_trajeglish.md) 一类"用数据生成 NPC 与场景"的路线留下空间。仿真器自身的保真度仍是上界,[GAIA-1](../paper_gaia1.md) 与 [Cosmos](paper_cosmos.md) 试图把这层也机器学习化。 ## 接下来读什么 - [Bench2Drive](paper_bench2drive.md) — CARLA 上的标准化端到端 protocol diff --git a/docs/data/cards/extended/paper_diffusion_policy_chi2023.md b/docs/data/cards/extended/paper_diffusion_policy_chi2023.md index 698f05c..c20ed9c 100644 --- a/docs/data/cards/extended/paper_diffusion_policy_chi2023.md +++ b/docs/data/cards/extended/paper_diffusion_policy_chi2023.md @@ -25,7 +25,7 @@ $$ $a^{(k)}\in\mathbb{R}^{H\times d_a}$ 是第 $k$ 步去噪迭代里的动作序列($H$ 步 horizon),$\epsilon_\theta$ 是噪声预测网络,$o$ 是观察条件。从高斯噪声出发反复迭代得到清晰的动作序列。 ## 它在图谱里的位置 -Diffusion Policy 与 [Diffuser](../paper_diffuser.md) 平行(一个把策略当扩散,一个把规划当扩散);上游借鉴 [DDPM](paper_ddpm.md) 的扩散框架;下游 motivate 了 [3D Diffusion Policy](paper_3ddiff_policy.md) 等扩展。它落在 [imitation learning paradigm](paradigm_imitation_learning.md) 上,体现 [human demonstrations compress implicit reward function](insight_human_demonstrations_compress_implicit_reward_function.md) 这条洞见。 +Diffusion Policy 与 [Diffuser](../paper_diffuser.md) 平行(一个把策略当扩散,一个把规划当扩散);上游借鉴 [DDPM](paper_ddpm.md) 的扩散框架;下游 motivate 了 [3D Diffusion Policy](paper_diffusion_policy_chi2023.md) 等扩展。它落在 [imitation learning paradigm](paradigm_imitation_learning.md) 上,体现 [human demonstrations compress implicit reward function](insight_human_demonstrations_compress_implicit_reward_function.md) 这条洞见。 ## 架构 / 方法直觉 观察 encoder 是 CNN(图像)或 MLP(状态),把最近 2-4 帧观察压成条件 embedding。Score network 是 1D 卷积 U-Net 或 transformer,输入是当前噪声动作序列 + 观察条件 + 扩散时间 step,输出是预测噪声。训练时给真实演示动作加各级噪声、让模型学习去噪。推理时从纯噪声开始 DDIM 或 DDPM 迭代约 100 步(实际部署用 10 步 DDIM 也够)。多解性的处理来自扩散过程本身——同样的观察可以生成两种不同模式的动作,因为采样轨迹由随机性决定。 diff --git a/docs/data/cards/extended/paper_gpt4.md b/docs/data/cards/extended/paper_gpt4.md index ef562ed..9ecd4b8 100644 --- a/docs/data/cards/extended/paper_gpt4.md +++ b/docs/data/cards/extended/paper_gpt4.md @@ -31,7 +31,7 @@ GPT-4 延续 [GPT-3](../paper_gpt3.md) 的 scaling 路线,由 [InstructGPT](pa 公开信息有限,但社区共识:约 1.8T 参数的混合专家 transformer,token 上下文从 8k 扩到 32k 再到 128k;视觉端通过 cross-attention 接 ViT encoder。训练分三段:海量网络文本预训练得到 base model,监督微调注入指令响应格式,最后用 RLHF 调到对齐目标。推理用 KV cache 摊销长上下文成本,部署用蒸馏小模型应付高 QPS 场景。 ## 工程上真正要注意什么 -- 权重不开放,所有访问通过 API;驾驶应用要么花钱调用,要么换 [Claude](paper_claude.md) / [Gemini](paper_gemini.md) / 开源 [Llama](paper_llama2.md)。 +- 权重不开放,所有访问通过 API;驾驶应用要么花钱调用,要么换 [Claude](paper_claude.md) / [Gemini](paper_gemini.md) / 开源 [Llama](paper_llama.md)。 - 评测可重复性堪忧:API 行为随版本悄悄变化,DiLu 之类工作的复现需要锁定 checkpoint。 - 推理延迟和成本是驾驶 in-car 部署的硬约束;常见做法是只在慢系统调用,快系统留给小模型。 - 训练数据截止日期会让模型不知道近期事件,对自动驾驶政策类问答需额外检索增强。 diff --git a/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md b/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md index 5959f74..6e9e2d6 100644 --- a/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md +++ b/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md @@ -13,7 +13,7 @@ | 工作 | 提供的零件 | |---|---| -| [SimCLR / MoCo v3](paper_simclr_moco.md) | 对比学习的稳定配方 | +| [SimCLR / MoCo v3](paper_simclr_mocov3.md) | 对比学习的稳定配方 | | [BYOL / SimSiam](paper_byol.md) | 非对比的 siamese 自监督 | | [DINO / DINOv2 / DINOv3](paper_2508.10104_dinov3.md) | 自蒸馏 + 多视图,得到强大零样本特征 | | [MAE / BEiT](paper_mae.md) | 掩码图像建模 | diff --git a/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md b/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md index 5dce43f..3330cc6 100644 --- a/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md +++ b/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md @@ -63,4 +63,4 @@ Ross & Bagnell 在 2011 年用一个简洁的 bound 把现象写清楚:若单 ## 与之相关的研究路径 -它和 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 互为镜像,是这一洞察的现象层证据。它和 [`insight:human_demonstrations_compress_implicit_reward_function`](insight_human_demonstrations_compress_implicit_reward_function.md) 形成张力——模仿能压缩奖励、却不能压缩纠错,因此需要 RL / 对齐补足。它的工程出口指向 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 与 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)。论文谱系上 [Ross 2011 DAgger](paper_ross2011_dagger.md)、[Roach](paper_roach.md)、[TransFuser](paper_transfuser.md)、[Diffusion Policy](paper_diffusion_policy.md) 都把这一问题列作主要动机;动手验证可见 [`../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb`](../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb)。 +它和 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 互为镜像,是这一洞察的现象层证据。它和 [`insight:human_demonstrations_compress_implicit_reward_function`](insight_human_demonstrations_compress_implicit_reward_function.md) 形成张力——模仿能压缩奖励、却不能压缩纠错,因此需要 RL / 对齐补足。它的工程出口指向 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 与 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)。论文谱系上 [Ross 2011 DAgger](paper_ross2011_dagger.md)、[Roach](paper_roach.md)、[TransFuser](paper_transfuser.md)、[Diffusion Policy](paper_diffusion_policy_chi2023.md) 都把这一问题列作主要动机;动手验证可见 [`../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb`](../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb)。 diff --git a/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md b/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md index a3fea53..c87b503 100644 --- a/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md +++ b/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md @@ -77,7 +77,7 @@ CF-VLA 是数条范式的交汇点,因此其延伸方向众多: ## 一条值得注意的反例 -CF-VLA 推演链解释了"反事实搜索能提升长尾决策",但**没能预言生成器质量的天花板**。当世界模型对罕见物理交互(如冰面摩擦、紧急避让动力学)的生成不准时,反事实分支本身就是错误样本,反而误导 VLA。这一痛点直接驱动 [Cosmos](paper_cosmos.md) 与 [GAIA-2](paper_gaia2.md) 一类更高保真度世界模型的研究。另一个推演缺口是偏好数据的稀缺:罕见场景下的专家偏好标注既稀少又昂贵,难以支撑 DPO 训练。 +CF-VLA 推演链解释了"反事实搜索能提升长尾决策",但**没能预言生成器质量的天花板**。当世界模型对罕见物理交互(如冰面摩擦、紧急避让动力学)的生成不准时,反事实分支本身就是错误样本,反而误导 VLA。这一痛点直接驱动 [Cosmos](paper_cosmos.md) 与 [GAIA-2](paper_gaia1.md) 一类更高保真度世界模型的研究。另一个推演缺口是偏好数据的稀缺:罕见场景下的专家偏好标注既稀少又昂贵,难以支撑 DPO 训练。 ## 推荐起步 diff --git a/docs/data/cards/paper_mnih2015_dqn.md b/docs/data/cards/paper_mnih2015_dqn.md index 6d6bec8..f8ca4b7 100644 --- a/docs/data/cards/paper_mnih2015_dqn.md +++ b/docs/data/cards/paper_mnih2015_dqn.md @@ -34,7 +34,7 @@ bibtex: | 把 Q-learning 与卷积网络结合,端到端从原始像素学打 Atari,**首次以单一算法达到人类水平**。两个稳定性 trick:*经验回放 (replay buffer)* + *目标网络 (target Q)*。Deep RL 的开端。 ## 位置 / Why it matters -- 是 [赵世钰课程](course_zhao_shiyu_rl.md) Lec 7 的"应用焦点"; +- 是 [赵世钰课程](course_zhao_rl.md) Lec 7 的"应用焦点"; - 是 [CS285](course_cs285_levine.md) Lec 6–8 的核心算法; - 也是 [DiLu](paper_2309.16292_dilu.md) 用来对比"知识驱动 vs 数据驱动"的 baseline。 diff --git a/docs/data/cards/paper_sutton_barto.md b/docs/data/cards/paper_sutton_barto.md index 3cf5f03..20bb61f 100644 --- a/docs/data/cards/paper_sutton_barto.md +++ b/docs/data/cards/paper_sutton_barto.md @@ -28,11 +28,11 @@ bibtex: | --- ## TL;DR -RL 领域的"圣经"。和 [赵世钰课程](course_zhao_shiyu_rl.md) 完全互补:Sutton & Barto 是英文教材标准,覆盖更广(含 bandit、average reward、Dyna 等),但深度 RL 部分较少。 +RL 领域的"圣经"。和 [赵世钰课程](course_zhao_rl.md) 完全互补:Sutton & Barto 是英文教材标准,覆盖更广(含 bandit、average reward、Dyna 等),但深度 RL 部分较少。 ## 与 spine 的交集 - 是 [Bitter Lesson](essay_bitter_lesson.md) 作者本人的教材; - 是 [CS285](course_cs285_levine.md) 的官方推荐参考书。 ## 后续 -- → [Zhao 课程](course_zhao_shiyu_rl.md) · [CS285](course_cs285_levine.md) · [Bitter Lesson](essay_bitter_lesson.md) +- → [Zhao 课程](course_zhao_rl.md) · [CS285](course_cs285_levine.md) · [Bitter Lesson](essay_bitter_lesson.md) diff --git a/docs/data/generated/round2_integrity.json b/docs/data/generated/round2_integrity.json new file mode 100644 index 0000000..9c3b7a7 --- /dev/null +++ b/docs/data/generated/round2_integrity.json @@ -0,0 +1,95 @@ +{ + "$comment": "Round 2 integrity: adopt orphan cards as nodes + one anchoring edge each.", + "nodes": [ + { + "id": "paper:carla_leaderboard", + "kind": "paper", + "topic": "evaluation_benchmark", + "tier": "A", + "year": 2021, + "label": "CARLA Leaderboard", + "label_zh": "CARLA Leaderboard(闭环驾驶排行榜)" + }, + { + "id": "paper:apollo_autoware", + "kind": "paper", + "topic": "e2e_ad", + "tier": "B", + "year": 2018, + "label": "Apollo / Autoware", + "label_zh": "Apollo / Autoware(开源自动驾驶软件栈)" + }, + { + "id": "move:apply_dual_lagrangian_to_safety_constraint", + "kind": "move", + "topic": "safety" + }, + { + "id": "move:treat_planner_as_policy_optimisation_with_constraints", + "kind": "move", + "topic": "planning" + }, + { + "id": "move:carry_recurrent_hidden_state_across_long_videos", + "kind": "move", + "topic": "world_models" + }, + { + "id": "move:joint_attention_over_multi_view_3d_queries", + "kind": "move", + "topic": "scene_understanding" + }, + { + "id": "move:gather_diverse_pretraining_data_then_filter_by_quality", + "kind": "move", + "topic": "data_engineering" + }, + { + "id": "move:add_noise_then_denoise_for_score_based_generation", + "kind": "move", + "topic": "methodology" + } + ], + "edges": [ + { + "source": "paper:bench2drive", + "target": "paper:carla_leaderboard", + "rel": "parallel" + }, + { + "source": "paradigm:modular_perception_to_planning_pipeline", + "target": "paper:apollo_autoware", + "rel": "manifests" + }, + { + "source": "move:apply_dual_lagrangian_to_safety_constraint", + "target": "paper:lagrangian_safe_rl", + "rel": "composes" + }, + { + "source": "move:treat_planner_as_policy_optimisation_with_constraints", + "target": "paper:cpo_safe_rl", + "rel": "composes" + }, + { + "source": "move:carry_recurrent_hidden_state_across_long_videos", + "target": "paper:dreamer_v3", + "rel": "composes" + }, + { + "source": "move:joint_attention_over_multi_view_3d_queries", + "target": "paper:li2022bevformer", + "rel": "composes" + }, + { + "source": "move:gather_diverse_pretraining_data_then_filter_by_quality", + "target": "paper:llama", + "rel": "composes" + }, + { + "source": "move:add_noise_then_denoise_for_score_based_generation", + "target": "paper:ddpm", + "rel": "composes" + } + ] +} diff --git a/docs/data/graph_extended.json b/docs/data/graph_extended.json index 7c20277..1855286 100644 --- a/docs/data/graph_extended.json +++ b/docs/data/graph_extended.json @@ -320,7 +320,7 @@ "phase": "prereq", "year": 2022, "card": "paper_li2022bevformer.md", - "degree": 17 + "degree": 18 }, { "id": "paper:vadv2", @@ -968,7 +968,7 @@ "year": 2023, "summary_zh": "DreamerV3 通过对回报、价值和奖励做对称对数变换以及一系列规范化技巧,让同一套超参数无需调参就能跨越雅达利、ProcGen、DMLab、Minecraft 等数十个不同动力学的任务取得领先成绩。它把世界模型方法从精细调参的研究原型变成了一个真正可以照搬使用的通用基线。", "label": "DreamerV3", - "degree": 7 + "degree": 8 }, { "id": "paper:iris_world_model", @@ -1268,7 +1268,7 @@ "year": 2017, "summary_zh": "CPO 把 TRPO 的信赖域思想推广到带约束的马尔可夫决策过程,在每一步策略更新里同时保证新的策略在期望回报上有改进,并且不会让某些预期约束代价超过预算。它是第一个在大规模深度强化学习里直接对安全约束做硬性保证的算法,奠定了后续安全 RL 的范式。", "label": "Constrained Policy Optimization", - "degree": 8 + "degree": 9 }, { "id": "paper:lagrangian_safe_rl", @@ -1280,7 +1280,7 @@ "year": 2019, "summary_zh": "Lagrangian 风格的安全强化学习把每个约束写成一个不等式,引入对偶变量并和策略参数一起做交替优化。这种方法实现简单,可以套在 PPO、SAC 等任意策略梯度算法上,是安全 RL 的工业基线,但对超参数和奖励 / 约束尺度比较敏感。", "label": "Lagrangian Safe RL", - "degree": 9 + "degree": 10 }, { "id": "paper:shielded_rl", @@ -2108,7 +2108,7 @@ "year": 2023, "summary_zh": "LLaMA 是 Meta 发布的开源大语言模型权重族,第一次以学术许可让研究者获得百亿到千亿参数规模的强基线。它直接催生了 Alpaca、Vicuna、Llama-2-Chat 等指令微调分支,使 VLM 与 VLA 研究不必再依赖封闭 API 就能复现训练流程。", "label": "LLaMA family", - "degree": 7 + "degree": 8 }, { "id": "paper:mistral", @@ -4243,7 +4243,7 @@ "insight:set_prediction_eliminates_postprocessing_heuristics" ], "label": "Paradigm: Modular Perception-to-Planning Pipeline", - "degree": 6 + "degree": 7 }, { "id": "paradigm:differentiable_end_to_end_imitation", @@ -4456,7 +4456,7 @@ "year": 2024, "card": "paper_bench2drive.md", "summary_zh": "Bench2Drive 在 CARLA Leaderboard 2.0 之上提供 44 个能力分桶与统一训练协议,使 UniAD、VAD 等端到端模型可在同一闭环环境下被公平比较,揭示了离线 L2 与闭环成功率之间的弱相关性。", - "degree": 7 + "degree": 8 }, { "id": "paper:carla_lb2", @@ -6385,6 +6385,94 @@ "label": "Neural scene reconstruction as the simulation engine", "degree": 5 }, + { + "id": "paper:carla_leaderboard", + "kind": "paper", + "topic": "evaluation_benchmark", + "tier": "A", + "year": 2021, + "label": "CARLA Leaderboard", + "label_zh": "CARLA Leaderboard(闭环驾驶排行榜)", + "phase": "core", + "degree": 1 + }, + { + "id": "paper:apollo_autoware", + "kind": "paper", + "topic": "e2e_ad", + "tier": "B", + "year": 2018, + "label": "Apollo / Autoware", + "label_zh": "Apollo / Autoware(开源自动驾驶软件栈)", + "phase": "core", + "degree": 1 + }, + { + "id": "move:apply_dual_lagrangian_to_safety_constraint", + "kind": "move", + "topic": "safety", + "label": "apply_dual_lagrangian_to_safety_constraint", + "label_zh": "apply_dual_lagrangian_to_safety_constraint", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:treat_planner_as_policy_optimisation_with_constraints", + "kind": "move", + "topic": "planning", + "label": "treat_planner_as_policy_optimisation_with_constraints", + "label_zh": "treat_planner_as_policy_optimisation_with_constraints", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:carry_recurrent_hidden_state_across_long_videos", + "kind": "move", + "topic": "world_models", + "label": "carry_recurrent_hidden_state_across_long_videos", + "label_zh": "carry_recurrent_hidden_state_across_long_videos", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:joint_attention_over_multi_view_3d_queries", + "kind": "move", + "topic": "scene_understanding", + "label": "joint_attention_over_multi_view_3d_queries", + "label_zh": "joint_attention_over_multi_view_3d_queries", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:gather_diverse_pretraining_data_then_filter_by_quality", + "kind": "move", + "topic": "data_engineering", + "label": "gather_diverse_pretraining_data_then_filter_by_quality", + "label_zh": "gather_diverse_pretraining_data_then_filter_by_quality", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:add_noise_then_denoise_for_score_based_generation", + "kind": "move", + "topic": "methodology", + "label": "add_noise_then_denoise_for_score_based_generation", + "label_zh": "add_noise_then_denoise_for_score_based_generation", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, { "id": "paper:chinchilla", "label": "Chinchilla", @@ -6431,7 +6519,7 @@ "phase": "core", "year": 2020, "summary_zh": "DDPM 把生成模型重新表述为逐步加噪后再学习反向去噪的过程,把图像生成的可训练目标压成预测每一步的噪声。它的简洁性与稳定性让扩散模型在两年内取代 GAN 成为图像、视频、动作生成的默认范式,也奠定了 Diffusion Policy、世界模型视频生成的方法学基础。", - "degree": 3 + "degree": 4 }, { "id": "paper:lora", @@ -12837,6 +12925,46 @@ "target": "paper:clip", "rel": "covers" }, + { + "source": "paper:bench2drive", + "target": "paper:carla_leaderboard", + "rel": "parallel" + }, + { + "source": "paradigm:modular_perception_to_planning_pipeline", + "target": "paper:apollo_autoware", + "rel": "manifests" + }, + { + "source": "move:apply_dual_lagrangian_to_safety_constraint", + "target": "paper:lagrangian_safe_rl", + "rel": "composes" + }, + { + "source": "move:treat_planner_as_policy_optimisation_with_constraints", + "target": "paper:cpo_safe_rl", + "rel": "composes" + }, + { + "source": "move:carry_recurrent_hidden_state_across_long_videos", + "target": "paper:dreamer_v3", + "rel": "composes" + }, + { + "source": "move:joint_attention_over_multi_view_3d_queries", + "target": "paper:li2022bevformer", + "rel": "composes" + }, + { + "source": "move:gather_diverse_pretraining_data_then_filter_by_quality", + "target": "paper:llama", + "rel": "composes" + }, + { + "source": "move:add_noise_then_denoise_for_score_based_generation", + "target": "paper:ddpm", + "rel": "composes" + }, { "source": "paper:gpt3", "target": "paper:chinchilla", diff --git a/docs/data/graph_extended.stats.json b/docs/data/graph_extended.stats.json index 982a258..20e293d 100644 --- a/docs/data/graph_extended.stats.json +++ b/docs/data/graph_extended.stats.json @@ -1,14 +1,14 @@ { - "node_count": 499, - "edge_count": 1440, + "node_count": 507, + "edge_count": 1448, "by_kind": { - "paper": 184, + "paper": 186, "channel": 3, "course": 2, "essay": 1, "concept": 25, "lab": 11, - "move": 129, + "move": 135, "problem": 41, "insight": 54, "paradigm": 25, @@ -17,18 +17,18 @@ "by_tier": { "spine": 14, "S": 39, - "A": 74, - "B": 63, + "A": 75, + "B": 64, "concept": 25, "lab": 11, - "move": 129, + "move": 135, "problem": 41, "insight": 54, "paradigm": 25, "validation": 24 }, "by_topic": { - "e2e_ad": 23, + "e2e_ad": 24, "vlm_vla": 71, "brain_inspired": 7, "ssl_vision": 37, @@ -37,15 +37,15 @@ "rl_foundations": 9, "deep_rl": 80, "meta_philosophy": 3, - "world_models": 32, - "planning": 15, + "world_models": 33, + "planning": 16, "control": 7, - "safety": 15, + "safety": 16, "foundation_models": 18, "alignment": 4, "llm_agent": 14, "reasoning": 8, - "evaluation_benchmark": 17, + "evaluation_benchmark": 18, "simulator": 8, "dataset": 6, "efficient_computing": 14, @@ -54,19 +54,21 @@ "safety_standard": 14, "geometry_3d": 27, "sensor_fusion": 3, - "scene_understanding": 26 + "scene_understanding": 27, + "data_engineering": 1, + "methodology": 1 }, "by_rel": { "prereq": 89, "covers": 283, - "parallel": 110, + "parallel": 111, "contrasts": 23, "extends": 37, "feeds": 88, "implements": 14, - "manifests": 168, + "manifests": 169, "enables": 58, - "composes": 355, + "composes": 361, "motivates": 144, "validates": 64, "unsolved_by": 7 @@ -78,6 +80,7 @@ "insights_and_validations": 69, "methodology_axis": 84, "perception_axis": 75, + "round2_integrity": 8, "wave_e_stubs": 10 } } diff --git a/tools/.link_baseline.json b/tools/.link_baseline.json index 4c45c83..c29bb4b 100644 --- a/tools/.link_baseline.json +++ b/tools/.link_baseline.json @@ -1,4 +1,4 @@ { - "max_inert": 91, - "max_orphans": 12 + "max_inert": 45, + "max_orphans": 4 } diff --git a/tools/round2_integrity.py b/tools/round2_integrity.py new file mode 100644 index 0000000..5f52596 --- /dev/null +++ b/tools/round2_integrity.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +"""Round 2 graph-integrity pass. + +Two safe, mechanical fixes that recover broken navigation: + + 1. Adopt orphan cards. Eight cards under cards/extended/ were fully written + but never given a graph node, so every link to them rendered inert and the + prose was unreachable. Add the nodes (the renderer auto-resolves the card), + each wired to a real referrer so it is not isolated. + + 2. Repoint confirmed aliases. Several cards link to a filename whose node + lives under a slightly different id (e.g. simclr_moco -> simclr_mocov3). + Rewrite those link targets to the canonical id. + +Writes docs/data/generated/round2_integrity.json and edits card text in place, +then the caller re-runs tools/merge_graph.py. +""" +from __future__ import annotations + +import json +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +CARDS = ROOT / "docs" / "data" / "cards" +GEN = ROOT / "docs" / "data" / "generated" / "round2_integrity.json" + +# id -> (kind, topic, one edge as (source, target, rel)) +NEW_NODES = { + "paper:carla_leaderboard": dict( + kind="paper", topic="evaluation_benchmark", tier="A", year=2021, + label="CARLA Leaderboard", label_zh="CARLA Leaderboard(闭环驾驶排行榜)", + edge=("paper:bench2drive", "paper:carla_leaderboard", "parallel")), + "paper:apollo_autoware": dict( + kind="paper", topic="e2e_ad", tier="B", year=2018, + label="Apollo / Autoware", label_zh="Apollo / Autoware(开源自动驾驶软件栈)", + edge=("paradigm:modular_perception_to_planning_pipeline", "paper:apollo_autoware", "manifests")), + "move:apply_dual_lagrangian_to_safety_constraint": dict( + kind="move", topic="safety", + edge=("move:apply_dual_lagrangian_to_safety_constraint", "paper:lagrangian_safe_rl", "composes")), + "move:treat_planner_as_policy_optimisation_with_constraints": dict( + kind="move", topic="planning", + edge=("move:treat_planner_as_policy_optimisation_with_constraints", "paper:cpo_safe_rl", "composes")), + "move:carry_recurrent_hidden_state_across_long_videos": dict( + kind="move", topic="world_models", + edge=("move:carry_recurrent_hidden_state_across_long_videos", "paper:dreamer_v3", "composes")), + "move:joint_attention_over_multi_view_3d_queries": dict( + kind="move", topic="scene_understanding", + edge=("move:joint_attention_over_multi_view_3d_queries", "paper:li2022bevformer", "composes")), + "move:gather_diverse_pretraining_data_then_filter_by_quality": dict( + kind="move", topic="data_engineering", + edge=("move:gather_diverse_pretraining_data_then_filter_by_quality", "paper:llama", "composes")), + "move:add_noise_then_denoise_for_score_based_generation": dict( + kind="move", topic="methodology", + edge=("move:add_noise_then_denoise_for_score_based_generation", "paper:ddpm", "composes")), +} + +# filename-stem rewrites: old -> new (applied to ](OLD.md) link targets) +REPOINTS = { + "simclr_moco": "simclr_mocov3", + "diffusion_policy": "diffusion_policy_chi2023", + "3ddiff_policy": "diffusion_policy_chi2023", + "2309.17080_gaia1": "gaia1", + "llama2": "llama", + "gaia2": "gaia1", + "zhao_shiyu_rl": "zhao_rl", + "vad": "vadv2", +} + + +def main() -> int: + g = json.loads((ROOT / "docs" / "data" / "graph_extended.json").read_text(encoding="utf-8")) + ids = {n["id"] for n in g["nodes"]} + + nodes, edges = [], [] + for nid, spec in NEW_NODES.items(): + node = {"id": nid, "kind": spec["kind"], "topic": spec["topic"]} + for k in ("tier", "year", "label", "label_zh"): + if k in spec: + node[k] = spec[k] + nodes.append(node) + s, t, r = spec["edge"] + edges.append({"source": s, "target": t, "rel": r}) + + GEN.write_text(json.dumps({ + "$comment": "Round 2 integrity: adopt orphan cards as nodes + one anchoring edge each.", + "nodes": nodes, "edges": edges, + }, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + print(f"wrote {GEN.name}: +{len(nodes)} nodes, +{len(edges)} edges") + + # Repoint alias links in every card. + changed = 0 + for path in CARDS.rglob("*.md"): + txt = path.read_text(encoding="utf-8") + orig = txt + for old, new in REPOINTS.items(): + # match ](OLD.md...) keeping any kind_ prefix and ./ ../ paths + txt = re.sub(rf'(\]\([^)]*?){re.escape(old)}\.md', rf'\g<1>{new}.md', txt) + if txt != orig: + path.write_text(txt, encoding="utf-8") + changed += 1 + print(f"repointed alias links in {changed} card(s)") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 110efd2f5f3db371357306d9cdb87086b0e1d074 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 03:38:30 +0000 Subject: [PATCH 3/8] Round 3: replace mechanical 'dynamic insight' with authored research lens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bottom-of-card panel restated the card summary and dressed edge counts up as 'why this matters' — neither dynamic nor insightful. Replace it with: - An authored, per-node Research lens (docs/data/research_lens.json, built by tools/build_research_lens.py): four columns of real reasoning — load-bearing assumption, failure boundary, a falsifiable next experiment, and the cross-domain isomorphism — rendered through the same markdown+KaTeX pipeline so formulae and intra-atlas links work. Ships 16 flagship lenses (offline RL, imitation/DAgger, safety-Lagrangian, world models, UniAD, PPO, Transformer, DQN, AlphaZero, CQL, conditional-generation planning, four paradigms). - Honest navigation for every node: downstream next-reads, upstream provenance as links (not a count), and the open problems it answers. Nodes without a lens show only this, instead of a synthetic 'insight'. All 16 lenses' internal links verified to resolve; 24 KaTeX spans render with 0 errors on the offline-RL lens; nav-only fallback confirmed on non-lens nodes. --- docs/atlas3d.css | 52 ++++++++++++- docs/data/research_lens.json | 98 +++++++++++++++++++++++ docs/js/atlas-cards.js | 145 ++++++++++++++++------------------ tools/build_research_lens.py | 146 +++++++++++++++++++++++++++++++++++ 4 files changed, 363 insertions(+), 78 deletions(-) create mode 100644 docs/data/research_lens.json create mode 100644 tools/build_research_lens.py diff --git a/docs/atlas3d.css b/docs/atlas3d.css index 53f7b30..484e1f5 100644 --- a/docs/atlas3d.css +++ b/docs/atlas3d.css @@ -241,10 +241,59 @@ input#yearSlider { width: 100%; accent-color: var(--accent); } color: var(--ink); margin: 0 0 10px; } -.insight-grid { +.insight-hint { + font-size: 12px; + line-height: 1.55; + color: var(--ink-mute); + margin: 0 0 10px; +} +/* ---------- research lens ---------- */ +.lens-title { + margin-top: 0; + margin-bottom: 6px; + font-size: 14px; + color: var(--accent-warm); + letter-spacing: 0.04em; +} +.lens-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 12px 16px; + margin: 8px 0 20px; +} +.lens-cell { + border-left: 2px solid rgba(108, 177, 255, 0.4); + padding: 2px 0 2px 12px; +} +.lens-cell h4 { + font-size: 12px; + color: var(--accent); + letter-spacing: 0.04em; + margin: 0 0 5px; +} +.lens-body { + font-size: 12.8px; + line-height: 1.62; + color: var(--ink); +} +.lens-body p { margin: 0 0 6px; } +.lens-body p:last-child { margin-bottom: 0; } +.lens-body code { font-size: 0.92em; } +.lens-body .katex { font-size: 1em; } +.reused-list { + padding-left: 18px; + margin: 0; + list-style: disc; +} +.reused-list li { + font-size: 12.5px; + line-height: 1.55; + margin-bottom: 3px; +} +.insight-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); + gap: 12px 16px; margin-top: 8px; } .insight-cell h4 { @@ -285,6 +334,7 @@ input#yearSlider { width: 100%; accent-color: var(--accent); } } @media (max-width: 920px) { .insight-grid { grid-template-columns: 1fr; } + .lens-grid { grid-template-columns: 1fr; } } /* ---------- mermaid diagrams ---------- */ diff --git a/docs/data/research_lens.json b/docs/data/research_lens.json new file mode 100644 index 0000000..5194a8e --- /dev/null +++ b/docs/data/research_lens.json @@ -0,0 +1,98 @@ +{ + "insight:offline_rl_is_actually_constrained_dynamic_programming": { + "assumption": "数据集 $\\mathcal{D}$ 的状态-动作支撑 $\\mathrm{supp}(\\pi_\\beta)$ 已经覆盖了通向高回报所必需的关键动作;惩罚项只压制支撑外的乐观,而不伤害支撑内的价值传播。换句话说,约束动态规划能达到的上界,被数据覆盖范围而非算法本身钉死。", + "failure": "当最优动作根本不在支撑内(日志里从未出现\"紧急左打方向\"),没有任何离线算法能学出它——这是信息缺失,不是优化失败。另一端,惩罚强度 $\\alpha$ 过大时连支撑内的 Bellman 备份也被压平,策略退化为行为克隆;$\\alpha$ 的可用区间通常很窄,需要在留出集上扫调。", + "experiment": "固定一份驾驶日志,按动作类型系统性\"挖洞\",剔除 5% / 10% / 20% 的支撑,测 [CQL](paper_cql.md) 与 [IQL](paper_iql.md) 的闭环成功率随支撑覆盖率的衰减曲线,再与数据总量做对照回归。可证伪的预测:闭环性能由覆盖率单调决定、与总量近似无关;若总量能补偿覆盖缺失,则本洞察被推翻。", + "isomorphism": "同一结构出现在监督学习的\"经验风险 $\\le$ 真实风险 + 复杂度项\"、离策略评估的 importance-sampling 权重截断、模型预测控制的可行域约束 $u\\in\\mathcal{U}$、推荐系统的 propensity clipping——都是\"在可信区域内最优化、对区域外保持悲观\"。" + }, + "insight:imitation_learning_alone_cannot_recover_from_compounding_errors": { + "assumption": "训练分布与部署分布一致,即策略真正访问到的状态仍落在专家演示覆盖的区域内。监督式行为克隆把每一帧当作独立同分布样本,默认单步误差不会被时间放大。", + "failure": "一旦策略犯下专家从未犯过的小错,就进入演示未覆盖的状态,误差以 $\\mathcal{O}(\\varepsilon T^2)$ 的速率随时间步 $T$ 复利累积([DAgger](paper_ross2011_dagger.md) 的核心界)。驾驶里典型表现是\"压线后越偏越远\",因为纠偏状态恰恰不在专家轨迹上。", + "experiment": "在闭环模拟器里测\"恢复率\":人为把车摆到偏离车道中心 $d$ 米处,统计纯 BC 与 [DAgger](paper_ross2011_dagger.md) / 加噪重标注策略把车拉回的成功率随 $d$ 的曲线。可证伪点:若纯 BC 的恢复率不随 $d$ 急剧下降,则\"复合误差主导失效\"这一论断不成立。", + "isomorphism": "等价于控制论里开环 vs 闭环的稳定性差别、语言模型自回归生成的暴露偏差(exposure bias)、以及数值积分中误差随步数累积——根因都是\"在自己生成的、与训练不同的分布上继续做预测\"。" + }, + "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping": { + "assumption": "安全可以写成可量化的约束 $J_C(\\pi)=\\mathbb{E}\\big[\\sum_t c_t\\big]\\le d$,且存在一个对偶乘子 $\\lambda^\\star$ 使带约束问题与无约束的拉格朗日问题 $\\max_\\pi\\min_{\\lambda\\ge 0} J(\\pi)-\\lambda\\,(J_C(\\pi)-d)$ 同解(强对偶/Slater 条件成立)。", + "failure": "当约束非凸、或可行域为空(任务本身与安全冲突)时强对偶失效,乘子 $\\lambda$ 来回振荡而不收敛。把安全塞进奖励加权 $r-\\beta c$ 看似等价,却把\"绝不可越界\"软化成\"越界够便宜就做\"——固定权重无法表达硬约束,这正是奖励塑形的根本缺陷。", + "experiment": "同一安全驾驶任务下,对比固定惩罚权重与 [PID-Lagrangian](paper_pid_lagrangian.md) 自适应乘子:扫描权重 $\\beta$,画出\"违规率-回报\"帕累托前沿。可证伪预测:拉格朗日法的前沿严格支配任意固定权重;若某个 $\\beta$ 能同时匹配低违规与高回报,则约束法的优势被推翻。", + "isomorphism": "与经济学的影子价格、最优控制的庞特里亚金协态变量、SVM 的 KKT 乘子是同一对偶结构;[离线 RL 的支撑约束](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 也是它的同型实例,只是约束的物理含义从\"安全包络\"换成\"数据支撑\"。" + }, + "insight:world_models_let_planning_be_done_in_imagination": { + "assumption": "存在一个学得的转移模型 $p_\\theta(z_{t+1}\\mid z_t,a_t)$,其多步展开的误差增长足够慢,使得在想象中累积的回报估计与真实环境足够相关——即模型在策略实际访问的分布上是可信的。", + "failure": "长程展开时单步误差随步数复合,策略会发现并利用模型的漏洞(model exploitation),在想象里拿到现实中不存在的高回报。稀疏或长尾事件(行人鬼探头)在训练数据里太少,世界模型直接对其失真,规划因此对最关键场景视而不见。", + "experiment": "测\"想象-现实回报相关性随展开步数 $H$ 的衰减\":用 [Dreamer V3](paper_dreamer_v3.md) 在 CartPole 像素观测上展开 $H=1\\ldots 50$ 步,画想象回报与真实回报的相关系数曲线,并定位相关性跌破 0.5 的临界 $H^\\star$。可证伪点:若 $H^\\star$ 与策略性能无关,则\"想象规划受模型展开误差主导\"不成立。", + "isomorphism": "等同于人脑的前瞻模拟、围棋的 [MCTS](paper_silver2017_alphazero.md) 在状态模型上做树搜索、以及模型预测控制(MPC)在动力学模型上滚动优化——都是\"用一个可微/可查询的环境替身,把昂贵的真实试错搬进廉价的内部推演\"。" + }, + "paradigm:differentiable_end_to_end_imitation": { + "assumption": "从传感器到轨迹的整条管线可微,且监督信号(专家轨迹)足够强、足够多,梯度能穿过感知-预测-规划把误差正确分配到上游。相信联合优化的全局最优优于各模块各自最优的拼装。", + "failure": "当信号弱(罕见场景演示极少)或存在因果混淆(专家看后视镜才变道,模型只学到\"变道前总会减速\"的相关而非因果),端到端会学到捷径而非策略。可微也不等于可解释:一个不可分解的黑箱在认证与事故归因上代价高昂。", + "experiment": "在 [UniAD](paper_2212.10156_uniad.md) 上做模块消融:逐一切断 BEV→检测、检测→预测、预测→规划之间的梯度(停止反传),测闭环分数下降幅度,定位\"联合可微\"真正贡献最大的那一环。可证伪点:若切断所有跨模块梯度后分数几乎不变,则\"端到端联合优化\"的收益被推翻。", + "isomorphism": "与深度学习取代手工特征工程是同一支\"特征学习 > 手工设计\"的脉络(见 [Bitter Lesson](essay_bitter_lesson.md));也对应可微渲染把图形学反问题变成端到端优化、可微物理把控制问题接进梯度下降。" + }, + "paradigm:offline_rl": { + "assumption": "已有海量交互日志,但再与真实环境交互昂贵或危险;且日志隐含的行为策略 $\\pi_\\beta$ 的支撑足够宽,使\"在支撑内做约束价值迭代\"能拼出优于演示者的策略。保守是合理的归纳偏置。", + "failure": "数据近优时离线 RL 退化为模仿(无缝合空间);数据严重次优且覆盖窄时,约束又把它锁死在低水平。离线到在线的切换最棘手:离线学到的保守策略一上线遇到分布偏移就崩,[Cal-QL](paper_cql.md) 正是为此而生。缺少在线指标时,模型选择本身就是未解难题。", + "experiment": "在同一份次优日志上对比 [CQL](paper_cql.md) / [IQL](paper_iql.md) / [Decision Transformer](paper_decision_transformer.md),并用拟合 Q 评估(FQE)预测闭环排名,再与真实闭环排名比对。可证伪点:若离线指标与闭环排名相关性高,则\"离线模型选择无解\"被部分推翻——这本身就是一篇有价值的负结果。", + "isomorphism": "是带数据支撑约束的动态规划(见 [统一洞察](insight_offline_rl_is_actually_constrained_dynamic_programming.md));与计量经济学的反事实估计、批量老虎机(batched bandits)、以及\"只能看历史不能做实验\"的流行病学因果推断共享同一困境。" + }, + "paradigm:world_model_paradigm": { + "assumption": "环境动力学可被一个生成模型以足够保真度学到,且在该模型内做规划/学习的收益超过模型误差带来的损失。相信\"先学世界,再在世界里学策略\"比无模型直接试错样本效率高一个量级。", + "failure": "模型在数据稀疏区失真,策略会专门往这些区域钻以套取虚高回报;视频级世界模型还面临长程一致性与算力墙。保真度提升的边际成本陡增,何时\"够用\"没有理论判据,往往只能靠下游闭环回测。", + "experiment": "控制变量地把世界模型的预测保真度(用 FVD / 多步重建误差度量)按档位降级,测下游策略闭环性能随保真度的响应曲线,找到\"保真度饱和点\"。可证伪预测:存在一个保真度阈值,越过后下游性能不再提升——若性能随保真度线性无饱和,则\"realism 是训练价值下界\"需要修正。", + "isomorphism": "对应基于模型的 [强化学习](paradigm_model_based_rl.md)、卡尔曼滤波的内部状态预测、以及科学里\"先建可计算模型再做数值实验\"的方法论;[GAIA-1](paper_gaia1.md)、[Dreamer](paper_dreamer_v3.md) 分别是视频级与隐空间级的两种实现。" + }, + "paradigm:vla_paradigm": { + "assumption": "语言/视觉预训练得到的语义先验,能迁移成对物理动作有用的表征;把感知、推理、动作统一成一个 token 序列后,大模型的涌现能力可以外推到驾驶决策。相信\"通才基座 + 少量对齐\"胜过从零训练的专才。", + "failure": "语言 token 与连续物理世界存在 grounding 鸿沟:模型能说出\"该减速\"却给不出可执行的 $a_t$;在安全攸关回路里还会产生幻觉动作。大模型的延迟与算力也常超出 30Hz 实时控制预算,迫使快慢双系统折中。", + "experiment": "分离\"会说\"与\"会做\":让 VLA 同时输出自然语言决策与低层控制,统计二者一致性,并测当强制只用语言意图驱动一个独立低层控制器时闭环分数的变化。可证伪点:若语言意图与控制输出高度一致且可互替,则 grounding 鸿沟不存在——这会颠覆\"语言不足以落地\"的判断。", + "isomorphism": "与机器人领域的 [RT-2](paper_rt2.md) / [OpenVLA](paper_openvla.md)、把规划当语言建模的 [tokenized 轨迹](move_tokenize_modalities.md) 思路同构;本质是\"用一个序列模型吞掉模态边界\",与多模态大模型把图文统一成 token 是同一招。" + }, + "paper:2212.10156": { + "assumption": "把检测、跟踪、建图、运动预测、占据预测全部用共享的 query 串成一条以\"规划为最终目标\"的可微管线,相信各子任务的中间表征若都服务于规划,则联合训练优于各自为政。query 充当模块间无损的信息总线。", + "failure": "规划导向的联合训练让梯度被最终规划损失主导,可能牺牲上游子任务在其自身指标上的精度;query 总线一旦在某一环退化,误差会沿链路传播。开环 nuScenes 指标与真实闭环安全的相关性也受质疑,漂亮的位移误差未必转化为闭环成功率。", + "experiment": "复刻 [lab03 的 query 直觉](../../labs/lab03_uniad_query_intuition.ipynb),逐个冻结上游任务头,测规划 L2 与碰撞率的变化,量化每个子任务对规划的边际贡献。可证伪点:若冻结运动预测头后规划几乎不退化,则\"预测必须进规划闭环\"的设计前提需重估。", + "isomorphism": "query 总线与 [DETR](paper_carion2020.md) 的 object query、感知里把任务统一到一组可学习查询是同一招;\"以下游目标统领上游表征\"则与端到端语音识别取代手工对齐、可微渲染统一图形学子任务同构。" + }, + "paper:ross2011_dagger": { + "assumption": "训练时可以反复 query 专家:在策略自己访问到的状态上索取专家标注,从而把训练分布拉回到部署分布。相信\"在自己会犯错的地方问专家怎么做\"能把误差界从 $\\mathcal{O}(\\varepsilon T^2)$ 压到 $\\mathcal{O}(\\varepsilon T)$。", + "failure": "现实中专家未必可在线 query(人类驾驶员无法对模型遇到的每一帧实时打方向),且专家在 OOD 状态上的标注本身可能不可靠。聚合数据集不断变大、分布持续漂移,也带来非平稳训练与遗忘问题。", + "experiment": "对比 BC、DAgger、以及\"加噪声扰动状态再用专家重标注\"的廉价替代,在固定 query 预算下画\"恢复成功率-标注次数\"曲线。可证伪点:若加噪重标注在同等预算下追平 DAgger,则在线 query 专家的必要性被削弱——对工程落地是重要结论。", + "isomorphism": "与主动学习\"在不确定处查询标签\"、在线学习的 no-regret 框架、以及对抗训练\"在模型薄弱处补样本\"同构;核心都是\"让训练分布追着测试分布跑\"。" + }, + "paper:schulman2017_ppo": { + "assumption": "用裁剪的重要性比率把每步策略更新限制在信赖域内,可在不显式求二阶约束的情况下近似单调改进。相信\"小步多走 + 限制偏移\"比一步到位更稳,且裁剪是 TRPO 硬约束的廉价一阶替身。", + "failure": "裁剪只是启发式,并不真正保证 [TRPO](paper_schulman2015_trpo.md) 的单调改进界;对裁剪阈值 $\\epsilon$、广义优势估计(GAE)的 $\\lambda$、minibatch 轮数等超参敏感。奖励尺度漂移或优势估计偏差会让\"信赖域\"形同虚设。", + "experiment": "在同一任务上扫描裁剪阈值 $\\epsilon\\in[0.05,0.4]$,记录每次更新的实际 KL 偏移分布,检验\"裁剪是否真的把 KL 控制在目标附近\"。可证伪点:若实测 KL 经常远超 $\\epsilon$ 对应的隐含上界而性能照常,则\"裁剪≈信赖域\"的叙事需要修正。", + "isomorphism": "信赖域思想与拟牛顿法的步长控制、邻近点算法(proximal methods)、以及监督学习里的梯度裁剪同源;\"用一阶廉价代理替换二阶昂贵约束\"则是优化工程里反复出现的母题。" + }, + "paper:vaswani2017": { + "assumption": "序列内的依赖可以完全由成对注意力 $\\mathrm{softmax}(QK^\\top/\\sqrt{d})V$ 捕获,无需循环或卷积的归纳偏置;相信\"全局可并行的关联\"加上足够数据与算力,胜过 RNN 的顺序归纳偏置。", + "failure": "自注意力对序列长度 $n$ 是 $\\mathcal{O}(n^2)$ 计算与显存,长序列(高分辨率 BEV、长视频)撞算力墙;缺乏局部性/平移等先验,在小数据上不如带归纳偏置的模型,要靠规模与预训练补偿。", + "experiment": "在固定算力预算下,对比标准 softmax 注意力与 [线性注意力](paper_linear_attention.md) 在长序列驾驶轨迹上的精度-延迟前沿,定位序列长度阈值 $n^\\star$,越过它线性注意力反超。可证伪点:若任意长度下 softmax 都不被反超,则\"长序列必须放弃二次注意力\"的判断不成立。", + "isomorphism": "注意力本质是按内容寻址的软查表,与数据库 join、图神经网络的消息传递、核方法的相似度加权同构;\"把成对关系当一等公民\"也正是本图谱用带类型边组织知识的同一思路。" + }, + "paper:cql": { + "assumption": "通过在 Bellman 误差上叠加\"压低数据外动作 $Q$ 值\"的正则,可让学到的 $Q$ 成为真实价值的下界,从而避免 $\\max_{a'}$ 把 OOD 高估自举放大。相信保守下界足以支撑安全的策略改进。", + "failure": "正则强度 $\\alpha$ 过大时支撑内价值也被压平、退化为 BC;过小则 OOD 高估仍发散,可用区间窄。下界保证依赖若干分布假设,实践中并不严格成立;面对极次优或覆盖极窄的数据,CQL 也无能为力。", + "experiment": "可视化 $Q$ 值随 $\\alpha$ 的\"过估计-保守\"相变:在固定数据集上扫 $\\alpha$,画支撑内/支撑外 $Q$ 的均值差与闭环回报,定位最优 $\\alpha^\\star$ 的窄窗。可证伪点:若闭环回报对 $\\alpha$ 不敏感,则保守正则的精调价值被高估。", + "isomorphism": "是 [离线 RL = 约束 DP](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 中 Penalty 项的一种具体实现,与 [IQL](paper_iql.md) 的 expectile、[BCQ](paper_bear.md) 的行为先验互为变体;\"学一个悲观下界再据此决策\"也见于鲁棒优化与最坏情形风险控制。" + }, + "move:treat_planning_as_conditional_generation": { + "assumption": "专家轨迹的经验分布里\"好轨迹的形状\"可被生成模型 $p_\\theta(\\tau\\mid s_0,g,\\mathcal{C})$ 直接学到,从而以采样替代显式非凸优化。相信整条分布的输出比单点最优更有用——尤其在多模态决策处。", + "failure": "模仿专家会把专家偶发的危险样本一并学进,需 reward-aware 训练或后置安全过滤;扩散采样几十步在车端跑不动,要靠一致性模型或蒸馏压步数;遇到训练未见的条件组合(罕见路口拓扑),模型会强行生成熟悉模式造成无声失效。", + "experiment": "在分叉路口构造双峰示教(直行/让行各半),对比高斯策略与 [扩散策略](paper_diffusion_policy_chi2023.md) 复现双峰的能力:测生成轨迹的模态覆盖率与\"取均值塌缩到道路中央\"的发生率。可证伪点:若高斯策略也能避免模态塌缩,则生成式规划在多模态上的优势被推翻。", + "isomorphism": "与把检测当 [集合预测](move_treat_detection_as_set_prediction_with_learnable_queries.md)、把决策当 [序列建模](paradigm_sequence_modeling_for_decision.md) 是同一次\"从判别/优化转向生成\"的范式迁移;也对应文本生成、分子设计里\"从分布采样可行解\"的思路。" + }, + "paper:mnih2015_dqn": { + "assumption": "用经验回放打破样本相关性、用滞后的目标网络稳定自举目标,就能让 $Q$-learning 与深度网络稳定结合。相信\"存储-重采样 + 缓慢移动的目标\"足以驯服致命三要素(函数逼近 × 自举 × 离策略)。", + "failure": "$\\max_{a'}Q$ 带来系统性过估计偏差([Double-Q](move_double_q_to_reduce_overestimation.md) 才缓解);回放是均匀采样时对稀有关键经验利用不足;目标网络更新频率、回放容量等超参敏感。连续动作空间下 $\\max$ 不可解,需另起炉灶([SAC](paper_sac.md))。", + "experiment": "做去稳定消融:分别移除目标网络、缩小回放容量,画训练曲线发散程度,量化每个稳定化部件的边际贡献(本仓库 lab 已含 target-net 发散消融)。可证伪点:若移除目标网络后仍稳定收敛,则其必要性在该任务上被推翻。", + "isomorphism": "目标网络=自举估计里\"冻结一个慢变参照系\",与时间差分学习的半梯度、批归一化里冻结统计量同源;经验回放则与离线数据复用、课程采样共享\"复用历史经验\"的母题。" + }, + "paper:silver2017_alphazero": { + "assumption": "在已知且可完美模拟的环境里,用 MCTS 做策略提升、用自博弈生成无尽数据、让网络蒸馏搜索结果,三者闭环可不依赖任何人类知识逼近最优。相信\"搜索 + 自博弈\"能把测试时算力转化为训练信号。", + "failure": "强依赖一个完美、廉价、可回溯的环境模型——这正是驾驶不具备的([MuZero](paper_muzero.md) 改为学模型来松绑)。自博弈要求对称零和或可自我对弈的结构;搜索成本随分支因子爆炸,实时控制预算下难以照搬。", + "experiment": "复刻 AlphaZero 自博弈循环(本仓库 [validation trace](validation_trace_alpha_zero_self_play_with_mcts_guided_policy.md)),系统削减每步 MCTS 模拟次数,测棋力随搜索预算的衰减,定位\"网络先验单独能达到的下界\"。可证伪点:若零搜索的纯网络先验已接近满搜索棋力,则\"测试时搜索是棋力主因\"被推翻。", + "isomorphism": "是 [测试时算力替代训练时算力](insight_test_time_compute_substitutes_train_time_via_search.md) 的奠基范例,与大模型推理时的多次采样投票、规划里的 MPC 滚动优化同构;自蒸馏搜索结果也对应\"用慢而准的过程教快而糙的模型\"。" + } +} diff --git a/docs/js/atlas-cards.js b/docs/js/atlas-cards.js index c063c73..f80fcf4 100644 --- a/docs/js/atlas-cards.js +++ b/docs/js/atlas-cards.js @@ -52,6 +52,21 @@ export class CardRenderer { this.graph = graph; this.byId = new Map(graph.nodes.map(n => [n.id, n])); this.adj = this._buildAdj(graph.edges); + this.lensData = null; // node id -> authored research-lens object + this._lensPromise = null; + } + + // Load the authored per-node research lenses once. Missing file is fine — + // nodes without a lens fall back to structural navigation only. + _ensureLens() { + if (this.lensData) return Promise.resolve(this.lensData); + if (!this._lensPromise) { + this._lensPromise = fetch("data/research_lens.json") + .then(r => (r.ok ? r.json() : {})) + .catch(() => ({})) + .then(obj => { this.lensData = new Map(Object.entries(obj || {})); return this.lensData; }); + } + return this._lensPromise; } _buildAdj(edges) { @@ -72,6 +87,7 @@ export class CardRenderer { el.innerHTML = `

没有找到这个节点。

`; return; } + await this._ensureLens(); const cardPath = this._cardPath(node); let body = ""; if (cardPath) { @@ -564,24 +580,27 @@ export class CardRenderer { if (node.kind === "lab" || node.kind === "channel") return ""; const adj = this.adj.get(node.id) || { in: [], out: [] }; - const summary = this._insightSummary(node, adj); - const importance = this._insightImportance(node, adj); + const lens = this.lensData ? this.lensData.get(node.id) : null; const suggestions = this._insightNextSuggestions(node, adj); + const upstream = this._insightUpstream(node, adj); const problems = this._insightOpenProblems(node, adj); const html = []; html.push(`
`); - html.push(`

动态洞察 / Dynamic insight

`); - html.push(`

${summary}

`); + // The authored research lens carries the actual reasoning. When absent we + // show only honest, graph-derived navigation rather than a synthetic + // "insight" that merely restates the card or counts edges. + if (lens) html.push(this._renderLens(lens)); + html.push(`

继续探索 / Where to go next

`); + html.push(`

这里只放由关系网络直接给出的线索,不复述正文。

`); html.push(`
`); - html.push(`

这条节点为什么重要

${importance}

`); html.push(`

下一步可读

${suggestions}
`); - html.push(`

开放问题

${problems}
`); + html.push(`

它由哪些组件支撑

${upstream}
`); + html.push(`

它正面回答的开放问题

${problems}
`); html.push(`
`); html.push(`
`); - // Internal guard — ensure no banned meta-language slipped into our - // synthesised text. We never display this warning to the user; it's a - // belt-and-braces check meant to surface in dev consoles only. + // Internal guard — ensure no banned meta-language slipped into our text. + // Never shown to the user; surfaces in dev consoles only. const joined = html.join(""); if (this._hasMetaLeakage(joined)) { try { (window.console && console.warn && console.warn("[insight-report] meta-language leakage detected for", node.id)); } catch (_e) { /* noop */ } @@ -589,78 +608,50 @@ export class CardRenderer { return joined; } - // 1) Summary — prefer authored `summary_zh`, fall back to `summary`, - // else synthesise from kind + label + most-frequent edge relation. - _insightSummary(node, adj) { - const authored = (node.summary_zh && String(node.summary_zh).trim()) - || (node.summary && String(node.summary).trim()); - if (authored) { - // Clip to ~2 sentences worth — defensive against extremely long YAML - // summaries that would dominate the panel. - const clipped = this._clipSentences(authored, 2, 220); - return this._escape(clipped); - } - // Synthesise. - const kindLabel = this._kindLabel(node.kind); - const label = node.label_zh || node.label || node.id; - const freq = new Map(); - for (const { rel } of adj.out) freq.set(rel, (freq.get(rel) || 0) + 1); - for (const { rel } of adj.in) freq.set(rel, (freq.get(rel) || 0) + 1); - let topRel = null; let topCount = 0; - // Iteration order on Map is insertion order — deterministic. - for (const [r, c] of freq) { - if (c > topCount) { topCount = c; topRel = r; } - } - if (topRel) { - return `${this._escape(label)} 是一条 ${this._escape(kindLabel)} 节点,邻接里最常出现的是 ${this._escape(this._relLabel(topRel))}(${topCount} 次),可由此切入理解它在图谱里的位置。`; - } - return `${this._escape(label)} 是一条 ${this._escape(kindLabel)} 节点,目前邻接较稀疏,建议先从同主题节点回看。`; - } - - // 2) Importance — count incoming composes/validates/manifests/motivates, - // cite specific node names. - _insightImportance(node, adj) { - const incoming = adj.in.filter(e => ["composes", "validates", "manifests", "motivates"].includes(e.rel)); - // Bucket by relation, deterministic order - const order = ["composes", "validates", "manifests", "motivates"]; - const byRel = new Map(order.map(r => [r, []])); - for (const e of incoming) { - const list = byRel.get(e.rel); - if (list) list.push(e.other); - } - const total = incoming.length; - if (!total) { - // Try a softer fallback — any incoming edges at all. - const anyIn = adj.in.length; - if (!anyIn) { - return `当前图谱里没有指向这条节点的关系,可能是新接入或处于研究范式的源头。`; - } - return `当前没有结构性引用(composes / validates / manifests / motivates),但仍被 ${anyIn} 条其它关系连接,更多线索见 锚点链路 标签页。`; - } - // Pick up to 3 most-cited representative nodes (across all four buckets) - const ranked = []; - for (const r of order) { - for (const id of byRel.get(r)) { - const o = this.byId.get(id); - if (!o) continue; - ranked.push({ id, o, rel: r, deg: typeof o.degree === "number" ? o.degree : (this.adj.get(id)?.in?.length || 0) }); - } - } - ranked.sort((a, b) => (b.deg - a.deg) || a.id.localeCompare(b.id)); - const exemplars = ranked.slice(0, 3); - // Compose Chinese counts: "被 N 条 paradigm/validation/manifests 关系引用" + // Authored research lens: four columns of genuine reasoning, rendered + // through the same markdown + KaTeX pipeline as the card body so formulae + // and intra-atlas links work. Any subset of the four fields may be present. + _renderLens(lens) { + const cells = [ + ["assumption", "承重假设 · 它成立依赖什么"], + ["failure", "失效边界 · 它在什么条件下崩"], + ["experiment", "可证伪的下一步 · 怎样推进或推翻"], + ["isomorphism", "同构迁移 · 别处出现过的同型结构"], + ]; const parts = []; + parts.push(`

研究透镜 / Research lens

`); + parts.push(`

拆开这项工作的内在逻辑:它靠什么前提成立、在哪里失效、可以如何被证伪地推进、以及它与别的领域共享的同一套结构。

`); + parts.push(`
`); + for (const [key, title] of cells) { + const val = lens[key]; + if (!val) continue; + parts.push(`

${title}

${this._mdToHtml(String(val))}
`); + } + parts.push(`
`); + return parts.join(""); + } + + // Upstream provenance — the structural relations pointing INTO this node, + // shown as jump links (the components it was assembled from), not as a count. + _insightUpstream(node, adj) { + const order = ["composes", "manifests", "validates", "motivates", "extends", "prereq"]; + const seen = new Set(); + const items = []; for (const r of order) { - const c = byRel.get(r).length; - if (c) parts.push(`${c} 条 ${this._relLabel(r)}`); + for (const { rel, other } of adj.in) { + if (rel !== r) continue; + const o = this.byId.get(other); + if (!o || seen.has(o.id)) continue; + seen.add(o.id); + items.push(o); + if (items.length >= 6) break; + } + if (items.length >= 6) break; } - const countText = parts.join(" + "); - let examplesHtml = ""; - if (exemplars.length) { - const links = exemplars.map(({ o }) => this._anchorTag(o)).join("、"); - examplesHtml = `,例如 ${links}`; + if (!items.length) { + return `

没有指向它的结构性关系,它更像研究脉络的一个源头。

`; } - return `被 ${countText} 关系共 ${total} 次引用${examplesHtml}。`; + return `
    ${items.map(o => `
  • ${this._anchorTag(o)}
  • `).join("")}
`; } // 3) Next suggestions — outbound neighbours, prefer enables/extends/motivates, diff --git a/tools/build_research_lens.py b/tools/build_research_lens.py new file mode 100644 index 0000000..fe99497 --- /dev/null +++ b/tools/build_research_lens.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +r"""Build docs/data/research_lens.json from authored, per-node research lenses. + +Each lens has up to four fields, each a short markdown string (KaTeX `$…$` and +intra-atlas `[label](paper_xxx.md)` links are supported by the renderer): + + assumption 承重假设 — the premise that must hold for the work to work + failure 失效边界 — the regime + mechanism where it breaks + experiment 可证伪的下一步 — a concrete, runnable test that could advance or refute it + isomorphism 同构迁移 — the same structure seen in another field + +Raw strings (r"...") keep LaTeX backslashes intact; json.dump escapes them. +Run: python tools/build_research_lens.py +""" +from __future__ import annotations + +import json +from pathlib import Path + +OUT = Path(__file__).resolve().parents[1] / "docs" / "data" / "research_lens.json" + +LENS: dict[str, dict[str, str]] = { + +"insight:offline_rl_is_actually_constrained_dynamic_programming": { + "assumption": r"""数据集 $\mathcal{D}$ 的状态-动作支撑 $\mathrm{supp}(\pi_\beta)$ 已经覆盖了通向高回报所必需的关键动作;惩罚项只压制支撑外的乐观,而不伤害支撑内的价值传播。换句话说,约束动态规划能达到的上界,被数据覆盖范围而非算法本身钉死。""", + "failure": r"""当最优动作根本不在支撑内(日志里从未出现"紧急左打方向"),没有任何离线算法能学出它——这是信息缺失,不是优化失败。另一端,惩罚强度 $\alpha$ 过大时连支撑内的 Bellman 备份也被压平,策略退化为行为克隆;$\alpha$ 的可用区间通常很窄,需要在留出集上扫调。""", + "experiment": r"""固定一份驾驶日志,按动作类型系统性"挖洞",剔除 5% / 10% / 20% 的支撑,测 [CQL](paper_cql.md) 与 [IQL](paper_iql.md) 的闭环成功率随支撑覆盖率的衰减曲线,再与数据总量做对照回归。可证伪的预测:闭环性能由覆盖率单调决定、与总量近似无关;若总量能补偿覆盖缺失,则本洞察被推翻。""", + "isomorphism": r"""同一结构出现在监督学习的"经验风险 $\le$ 真实风险 + 复杂度项"、离策略评估的 importance-sampling 权重截断、模型预测控制的可行域约束 $u\in\mathcal{U}$、推荐系统的 propensity clipping——都是"在可信区域内最优化、对区域外保持悲观"。""", +}, + +"insight:imitation_learning_alone_cannot_recover_from_compounding_errors": { + "assumption": r"""训练分布与部署分布一致,即策略真正访问到的状态仍落在专家演示覆盖的区域内。监督式行为克隆把每一帧当作独立同分布样本,默认单步误差不会被时间放大。""", + "failure": r"""一旦策略犯下专家从未犯过的小错,就进入演示未覆盖的状态,误差以 $\mathcal{O}(\varepsilon T^2)$ 的速率随时间步 $T$ 复利累积([DAgger](paper_ross2011_dagger.md) 的核心界)。驾驶里典型表现是"压线后越偏越远",因为纠偏状态恰恰不在专家轨迹上。""", + "experiment": r"""在闭环模拟器里测"恢复率":人为把车摆到偏离车道中心 $d$ 米处,统计纯 BC 与 [DAgger](paper_ross2011_dagger.md) / 加噪重标注策略把车拉回的成功率随 $d$ 的曲线。可证伪点:若纯 BC 的恢复率不随 $d$ 急剧下降,则"复合误差主导失效"这一论断不成立。""", + "isomorphism": r"""等价于控制论里开环 vs 闭环的稳定性差别、语言模型自回归生成的暴露偏差(exposure bias)、以及数值积分中误差随步数累积——根因都是"在自己生成的、与训练不同的分布上继续做预测"。""", +}, + +"insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping": { + "assumption": r"""安全可以写成可量化的约束 $J_C(\pi)=\mathbb{E}\big[\sum_t c_t\big]\le d$,且存在一个对偶乘子 $\lambda^\star$ 使带约束问题与无约束的拉格朗日问题 $\max_\pi\min_{\lambda\ge 0} J(\pi)-\lambda\,(J_C(\pi)-d)$ 同解(强对偶/Slater 条件成立)。""", + "failure": r"""当约束非凸、或可行域为空(任务本身与安全冲突)时强对偶失效,乘子 $\lambda$ 来回振荡而不收敛。把安全塞进奖励加权 $r-\beta c$ 看似等价,却把"绝不可越界"软化成"越界够便宜就做"——固定权重无法表达硬约束,这正是奖励塑形的根本缺陷。""", + "experiment": r"""同一安全驾驶任务下,对比固定惩罚权重与 [PID-Lagrangian](paper_pid_lagrangian.md) 自适应乘子:扫描权重 $\beta$,画出"违规率-回报"帕累托前沿。可证伪预测:拉格朗日法的前沿严格支配任意固定权重;若某个 $\beta$ 能同时匹配低违规与高回报,则约束法的优势被推翻。""", + "isomorphism": r"""与经济学的影子价格、最优控制的庞特里亚金协态变量、SVM 的 KKT 乘子是同一对偶结构;[离线 RL 的支撑约束](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 也是它的同型实例,只是约束的物理含义从"安全包络"换成"数据支撑"。""", +}, + +"insight:world_models_let_planning_be_done_in_imagination": { + "assumption": r"""存在一个学得的转移模型 $p_\theta(z_{t+1}\mid z_t,a_t)$,其多步展开的误差增长足够慢,使得在想象中累积的回报估计与真实环境足够相关——即模型在策略实际访问的分布上是可信的。""", + "failure": r"""长程展开时单步误差随步数复合,策略会发现并利用模型的漏洞(model exploitation),在想象里拿到现实中不存在的高回报。稀疏或长尾事件(行人鬼探头)在训练数据里太少,世界模型直接对其失真,规划因此对最关键场景视而不见。""", + "experiment": r"""测"想象-现实回报相关性随展开步数 $H$ 的衰减":用 [Dreamer V3](paper_dreamer_v3.md) 在 CartPole 像素观测上展开 $H=1\ldots 50$ 步,画想象回报与真实回报的相关系数曲线,并定位相关性跌破 0.5 的临界 $H^\star$。可证伪点:若 $H^\star$ 与策略性能无关,则"想象规划受模型展开误差主导"不成立。""", + "isomorphism": r"""等同于人脑的前瞻模拟、围棋的 [MCTS](paper_silver2017_alphazero.md) 在状态模型上做树搜索、以及模型预测控制(MPC)在动力学模型上滚动优化——都是"用一个可微/可查询的环境替身,把昂贵的真实试错搬进廉价的内部推演"。""", +}, + +"paradigm:differentiable_end_to_end_imitation": { + "assumption": r"""从传感器到轨迹的整条管线可微,且监督信号(专家轨迹)足够强、足够多,梯度能穿过感知-预测-规划把误差正确分配到上游。相信联合优化的全局最优优于各模块各自最优的拼装。""", + "failure": r"""当信号弱(罕见场景演示极少)或存在因果混淆(专家看后视镜才变道,模型只学到"变道前总会减速"的相关而非因果),端到端会学到捷径而非策略。可微也不等于可解释:一个不可分解的黑箱在认证与事故归因上代价高昂。""", + "experiment": r"""在 [UniAD](paper_2212.10156_uniad.md) 上做模块消融:逐一切断 BEV→检测、检测→预测、预测→规划之间的梯度(停止反传),测闭环分数下降幅度,定位"联合可微"真正贡献最大的那一环。可证伪点:若切断所有跨模块梯度后分数几乎不变,则"端到端联合优化"的收益被推翻。""", + "isomorphism": r"""与深度学习取代手工特征工程是同一支"特征学习 > 手工设计"的脉络(见 [Bitter Lesson](essay_bitter_lesson.md));也对应可微渲染把图形学反问题变成端到端优化、可微物理把控制问题接进梯度下降。""", +}, + +"paradigm:offline_rl": { + "assumption": r"""已有海量交互日志,但再与真实环境交互昂贵或危险;且日志隐含的行为策略 $\pi_\beta$ 的支撑足够宽,使"在支撑内做约束价值迭代"能拼出优于演示者的策略。保守是合理的归纳偏置。""", + "failure": r"""数据近优时离线 RL 退化为模仿(无缝合空间);数据严重次优且覆盖窄时,约束又把它锁死在低水平。离线到在线的切换最棘手:离线学到的保守策略一上线遇到分布偏移就崩,[Cal-QL](paper_cql.md) 正是为此而生。缺少在线指标时,模型选择本身就是未解难题。""", + "experiment": r"""在同一份次优日志上对比 [CQL](paper_cql.md) / [IQL](paper_iql.md) / [Decision Transformer](paper_decision_transformer.md),并用拟合 Q 评估(FQE)预测闭环排名,再与真实闭环排名比对。可证伪点:若离线指标与闭环排名相关性高,则"离线模型选择无解"被部分推翻——这本身就是一篇有价值的负结果。""", + "isomorphism": r"""是带数据支撑约束的动态规划(见 [统一洞察](insight_offline_rl_is_actually_constrained_dynamic_programming.md));与计量经济学的反事实估计、批量老虎机(batched bandits)、以及"只能看历史不能做实验"的流行病学因果推断共享同一困境。""", +}, + +"paradigm:world_model_paradigm": { + "assumption": r"""环境动力学可被一个生成模型以足够保真度学到,且在该模型内做规划/学习的收益超过模型误差带来的损失。相信"先学世界,再在世界里学策略"比无模型直接试错样本效率高一个量级。""", + "failure": r"""模型在数据稀疏区失真,策略会专门往这些区域钻以套取虚高回报;视频级世界模型还面临长程一致性与算力墙。保真度提升的边际成本陡增,何时"够用"没有理论判据,往往只能靠下游闭环回测。""", + "experiment": r"""控制变量地把世界模型的预测保真度(用 FVD / 多步重建误差度量)按档位降级,测下游策略闭环性能随保真度的响应曲线,找到"保真度饱和点"。可证伪预测:存在一个保真度阈值,越过后下游性能不再提升——若性能随保真度线性无饱和,则"realism 是训练价值下界"需要修正。""", + "isomorphism": r"""对应基于模型的 [强化学习](paradigm_model_based_rl.md)、卡尔曼滤波的内部状态预测、以及科学里"先建可计算模型再做数值实验"的方法论;[GAIA-1](paper_gaia1.md)、[Dreamer](paper_dreamer_v3.md) 分别是视频级与隐空间级的两种实现。""", +}, + +"paradigm:vla_paradigm": { + "assumption": r"""语言/视觉预训练得到的语义先验,能迁移成对物理动作有用的表征;把感知、推理、动作统一成一个 token 序列后,大模型的涌现能力可以外推到驾驶决策。相信"通才基座 + 少量对齐"胜过从零训练的专才。""", + "failure": r"""语言 token 与连续物理世界存在 grounding 鸿沟:模型能说出"该减速"却给不出可执行的 $a_t$;在安全攸关回路里还会产生幻觉动作。大模型的延迟与算力也常超出 30Hz 实时控制预算,迫使快慢双系统折中。""", + "experiment": r"""分离"会说"与"会做":让 VLA 同时输出自然语言决策与低层控制,统计二者一致性,并测当强制只用语言意图驱动一个独立低层控制器时闭环分数的变化。可证伪点:若语言意图与控制输出高度一致且可互替,则 grounding 鸿沟不存在——这会颠覆"语言不足以落地"的判断。""", + "isomorphism": r"""与机器人领域的 [RT-2](paper_rt2.md) / [OpenVLA](paper_openvla.md)、把规划当语言建模的 [tokenized 轨迹](move_tokenize_modalities.md) 思路同构;本质是"用一个序列模型吞掉模态边界",与多模态大模型把图文统一成 token 是同一招。""", +}, + +"paper:2212.10156": { + "assumption": r"""把检测、跟踪、建图、运动预测、占据预测全部用共享的 query 串成一条以"规划为最终目标"的可微管线,相信各子任务的中间表征若都服务于规划,则联合训练优于各自为政。query 充当模块间无损的信息总线。""", + "failure": r"""规划导向的联合训练让梯度被最终规划损失主导,可能牺牲上游子任务在其自身指标上的精度;query 总线一旦在某一环退化,误差会沿链路传播。开环 nuScenes 指标与真实闭环安全的相关性也受质疑,漂亮的位移误差未必转化为闭环成功率。""", + "experiment": r"""复刻 [lab03 的 query 直觉](../../labs/lab03_uniad_query_intuition.ipynb),逐个冻结上游任务头,测规划 L2 与碰撞率的变化,量化每个子任务对规划的边际贡献。可证伪点:若冻结运动预测头后规划几乎不退化,则"预测必须进规划闭环"的设计前提需重估。""", + "isomorphism": r"""query 总线与 [DETR](paper_carion2020.md) 的 object query、感知里把任务统一到一组可学习查询是同一招;"以下游目标统领上游表征"则与端到端语音识别取代手工对齐、可微渲染统一图形学子任务同构。""", +}, + +"paper:ross2011_dagger": { + "assumption": r"""训练时可以反复 query 专家:在策略自己访问到的状态上索取专家标注,从而把训练分布拉回到部署分布。相信"在自己会犯错的地方问专家怎么做"能把误差界从 $\mathcal{O}(\varepsilon T^2)$ 压到 $\mathcal{O}(\varepsilon T)$。""", + "failure": r"""现实中专家未必可在线 query(人类驾驶员无法对模型遇到的每一帧实时打方向),且专家在 OOD 状态上的标注本身可能不可靠。聚合数据集不断变大、分布持续漂移,也带来非平稳训练与遗忘问题。""", + "experiment": r"""对比 BC、DAgger、以及"加噪声扰动状态再用专家重标注"的廉价替代,在固定 query 预算下画"恢复成功率-标注次数"曲线。可证伪点:若加噪重标注在同等预算下追平 DAgger,则在线 query 专家的必要性被削弱——对工程落地是重要结论。""", + "isomorphism": r"""与主动学习"在不确定处查询标签"、在线学习的 no-regret 框架、以及对抗训练"在模型薄弱处补样本"同构;核心都是"让训练分布追着测试分布跑"。""", +}, + +"paper:schulman2017_ppo": { + "assumption": r"""用裁剪的重要性比率把每步策略更新限制在信赖域内,可在不显式求二阶约束的情况下近似单调改进。相信"小步多走 + 限制偏移"比一步到位更稳,且裁剪是 TRPO 硬约束的廉价一阶替身。""", + "failure": r"""裁剪只是启发式,并不真正保证 [TRPO](paper_schulman2015_trpo.md) 的单调改进界;对裁剪阈值 $\epsilon$、广义优势估计(GAE)的 $\lambda$、minibatch 轮数等超参敏感。奖励尺度漂移或优势估计偏差会让"信赖域"形同虚设。""", + "experiment": r"""在同一任务上扫描裁剪阈值 $\epsilon\in[0.05,0.4]$,记录每次更新的实际 KL 偏移分布,检验"裁剪是否真的把 KL 控制在目标附近"。可证伪点:若实测 KL 经常远超 $\epsilon$ 对应的隐含上界而性能照常,则"裁剪≈信赖域"的叙事需要修正。""", + "isomorphism": r"""信赖域思想与拟牛顿法的步长控制、邻近点算法(proximal methods)、以及监督学习里的梯度裁剪同源;"用一阶廉价代理替换二阶昂贵约束"则是优化工程里反复出现的母题。""", +}, + +"paper:vaswani2017": { + "assumption": r"""序列内的依赖可以完全由成对注意力 $\mathrm{softmax}(QK^\top/\sqrt{d})V$ 捕获,无需循环或卷积的归纳偏置;相信"全局可并行的关联"加上足够数据与算力,胜过 RNN 的顺序归纳偏置。""", + "failure": r"""自注意力对序列长度 $n$ 是 $\mathcal{O}(n^2)$ 计算与显存,长序列(高分辨率 BEV、长视频)撞算力墙;缺乏局部性/平移等先验,在小数据上不如带归纳偏置的模型,要靠规模与预训练补偿。""", + "experiment": r"""在固定算力预算下,对比标准 softmax 注意力与 [线性注意力](paper_linear_attention.md) 在长序列驾驶轨迹上的精度-延迟前沿,定位序列长度阈值 $n^\star$,越过它线性注意力反超。可证伪点:若任意长度下 softmax 都不被反超,则"长序列必须放弃二次注意力"的判断不成立。""", + "isomorphism": r"""注意力本质是按内容寻址的软查表,与数据库 join、图神经网络的消息传递、核方法的相似度加权同构;"把成对关系当一等公民"也正是本图谱用带类型边组织知识的同一思路。""", +}, + +"paper:cql": { + "assumption": r"""通过在 Bellman 误差上叠加"压低数据外动作 $Q$ 值"的正则,可让学到的 $Q$ 成为真实价值的下界,从而避免 $\max_{a'}$ 把 OOD 高估自举放大。相信保守下界足以支撑安全的策略改进。""", + "failure": r"""正则强度 $\alpha$ 过大时支撑内价值也被压平、退化为 BC;过小则 OOD 高估仍发散,可用区间窄。下界保证依赖若干分布假设,实践中并不严格成立;面对极次优或覆盖极窄的数据,CQL 也无能为力。""", + "experiment": r"""可视化 $Q$ 值随 $\alpha$ 的"过估计-保守"相变:在固定数据集上扫 $\alpha$,画支撑内/支撑外 $Q$ 的均值差与闭环回报,定位最优 $\alpha^\star$ 的窄窗。可证伪点:若闭环回报对 $\alpha$ 不敏感,则保守正则的精调价值被高估。""", + "isomorphism": r"""是 [离线 RL = 约束 DP](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 中 Penalty 项的一种具体实现,与 [IQL](paper_iql.md) 的 expectile、[BCQ](paper_bear.md) 的行为先验互为变体;"学一个悲观下界再据此决策"也见于鲁棒优化与最坏情形风险控制。""", +}, + +"move:treat_planning_as_conditional_generation": { + "assumption": r"""专家轨迹的经验分布里"好轨迹的形状"可被生成模型 $p_\theta(\tau\mid s_0,g,\mathcal{C})$ 直接学到,从而以采样替代显式非凸优化。相信整条分布的输出比单点最优更有用——尤其在多模态决策处。""", + "failure": r"""模仿专家会把专家偶发的危险样本一并学进,需 reward-aware 训练或后置安全过滤;扩散采样几十步在车端跑不动,要靠一致性模型或蒸馏压步数;遇到训练未见的条件组合(罕见路口拓扑),模型会强行生成熟悉模式造成无声失效。""", + "experiment": r"""在分叉路口构造双峰示教(直行/让行各半),对比高斯策略与 [扩散策略](paper_diffusion_policy_chi2023.md) 复现双峰的能力:测生成轨迹的模态覆盖率与"取均值塌缩到道路中央"的发生率。可证伪点:若高斯策略也能避免模态塌缩,则生成式规划在多模态上的优势被推翻。""", + "isomorphism": r"""与把检测当 [集合预测](move_treat_detection_as_set_prediction_with_learnable_queries.md)、把决策当 [序列建模](paradigm_sequence_modeling_for_decision.md) 是同一次"从判别/优化转向生成"的范式迁移;也对应文本生成、分子设计里"从分布采样可行解"的思路。""", +}, + +"paper:mnih2015_dqn": { + "assumption": r"""用经验回放打破样本相关性、用滞后的目标网络稳定自举目标,就能让 $Q$-learning 与深度网络稳定结合。相信"存储-重采样 + 缓慢移动的目标"足以驯服致命三要素(函数逼近 × 自举 × 离策略)。""", + "failure": r"""$\max_{a'}Q$ 带来系统性过估计偏差([Double-Q](move_double_q_to_reduce_overestimation.md) 才缓解);回放是均匀采样时对稀有关键经验利用不足;目标网络更新频率、回放容量等超参敏感。连续动作空间下 $\max$ 不可解,需另起炉灶([SAC](paper_sac.md))。""", + "experiment": r"""做去稳定消融:分别移除目标网络、缩小回放容量,画训练曲线发散程度,量化每个稳定化部件的边际贡献(本仓库 lab 已含 target-net 发散消融)。可证伪点:若移除目标网络后仍稳定收敛,则其必要性在该任务上被推翻。""", + "isomorphism": r"""目标网络=自举估计里"冻结一个慢变参照系",与时间差分学习的半梯度、批归一化里冻结统计量同源;经验回放则与离线数据复用、课程采样共享"复用历史经验"的母题。""", +}, + +"paper:silver2017_alphazero": { + "assumption": r"""在已知且可完美模拟的环境里,用 MCTS 做策略提升、用自博弈生成无尽数据、让网络蒸馏搜索结果,三者闭环可不依赖任何人类知识逼近最优。相信"搜索 + 自博弈"能把测试时算力转化为训练信号。""", + "failure": r"""强依赖一个完美、廉价、可回溯的环境模型——这正是驾驶不具备的([MuZero](paper_muzero.md) 改为学模型来松绑)。自博弈要求对称零和或可自我对弈的结构;搜索成本随分支因子爆炸,实时控制预算下难以照搬。""", + "experiment": r"""复刻 AlphaZero 自博弈循环(本仓库 [validation trace](validation_trace_alpha_zero_self_play_with_mcts_guided_policy.md)),系统削减每步 MCTS 模拟次数,测棋力随搜索预算的衰减,定位"网络先验单独能达到的下界"。可证伪点:若零搜索的纯网络先验已接近满搜索棋力,则"测试时搜索是棋力主因"被推翻。""", + "isomorphism": r"""是 [测试时算力替代训练时算力](insight_test_time_compute_substitutes_train_time_via_search.md) 的奠基范例,与大模型推理时的多次采样投票、规划里的 MPC 滚动优化同构;自蒸馏搜索结果也对应"用慢而准的过程教快而糙的模型"。""", +}, + +} + + +def main() -> int: + OUT.write_text(json.dumps(LENS, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + print(f"wrote {OUT.relative_to(OUT.parents[2])}: {len(LENS)} research lenses") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From a47556e7201cde25c36b4eedd0fae5bb259a1a4b Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 03:47:55 +0000 Subject: [PATCH 4/8] Round 3.1: keep wide display equations inside the panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single long $$…$$ (e.g. CQL's logsumexp objective) was 533px wide in a 430px side panel and pushed the entire card body into horizontal overflow, breaking the layout. Give display-math containers (.katex-display, .math-block, .math-display) overflow-x:auto + max-width:100% so a wide equation scrolls within its own box instead of widening the panel. Verified: card-body horizontal overflow gone (533->430), equation still fully readable by scroll. --- docs/atlas3d.css | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/atlas3d.css b/docs/atlas3d.css index 484e1f5..020bdef 100644 --- a/docs/atlas3d.css +++ b/docs/atlas3d.css @@ -212,6 +212,15 @@ input#yearSlider { width: 100%; accent-color: var(--accent); } .card-body table { width: 100%; border-collapse: collapse; margin: 10px 0; font-size: 12.5px; } .card-body th, .card-body td { border: 1px solid var(--line); padding: 6px 8px; text-align: left; } .card-body .katex { font-size: 1em; } +/* Long display equations must scroll within their own box, never widen the + panel. Without this a single wide $$…$$ (e.g. a logsumexp) pushes the whole + card into horizontal overflow and breaks the layout. */ +.card-body .katex-display, +.lens-body .katex-display { overflow-x: auto; overflow-y: hidden; max-width: 100%; padding: 2px 0 4px; } +.card-body .math-block, +.lens-body .math-block { overflow-x: auto; max-width: 100%; } +.card-body .math-display, +.lens-body .math-display { display: inline-block; max-width: 100%; overflow-x: auto; vertical-align: middle; } .trace-block { background: rgba(108,177,255,0.08); border-left: 3px solid var(--accent); padding: 10px 12px; margin: 10px 0; border-radius: 4px; } .trace-block h4 { margin: 0 0 6px; font-size: 12.5px; color: var(--accent); letter-spacing: 0.04em; text-transform: uppercase; } From 279ffc26c06ea3d75c8789a50a9e95b996bf2a8d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 03:58:17 +0000 Subject: [PATCH 5/8] Round 4: fill the missing canon + expand research lenses (subagent cluster) A four-way domain cluster authored full depth cards for the canonical papers the graph cited but never defined, eliminating the last dead links and the hollow gaps: - Imitation/IRL/PG theory: GAIL, AIRL, Ng-Russell IRL, MaxEnt IRL, the IRL framing, Ross-Bagnell (the O(eps*T^2) bound), + insights on distribution-shift bounds and max-entropy duality. - Perception/SSL/foundation: PointPillars, VoxelNet, CenterPoint, AlexNet, BERT, BYOL, VICReg, Bahdanau attention. - RL infra / world models: GAE, D4RL, PlaNet, the offline-RL tutorial, Spinning Up, preference learning, + the optimistic-bias insight. - Driving E2E / LLM-agent: ALVINN, Learning-by-Cheating, TCP, GameFormer, autoregressive driving policies, Codex, SayCan, + the multimodal-behavior insight. 31 new nodes / ~117 edges, each card ~3k-5.6k chars with real equations, no fabricated benchmark numbers, no meta-language. Also adopted 3 remaining orphan move cards as nodes. Research lenses expanded to 35 (added ViT, DETR, BEVFormer, SAC, MuZero, BEV-intermediate, foundation-model zero-shot driving, + 12 from the cluster). Graph: 538->541 nodes, 1568 edges. Full QC: dead internal links 45->0, orphan cards 4->0, 0 meta-language findings, 0 lint findings across 231 extended cards, math regression test green, every research-lens link resolves. Baseline 0/0. --- ...max_entropy_closes_policy_value_duality.md | 62 + ...behavior_is_intrinsic_to_traffic_scenes.md | 47 + ...provement_bounded_by_distribution_shift.md | 55 + ...q_learning_max_is_optimistically_biased.md | 46 + docs/data/cards/extended/paper_airl.md | 49 + docs/data/cards/extended/paper_alvinn.md | 48 + .../extended/paper_bahdanau2014_attention.md | 48 + docs/data/cards/extended/paper_bert.md | 55 + docs/data/cards/extended/paper_byol.md | 57 + docs/data/cards/extended/paper_centerpoint.md | 48 + docs/data/cards/extended/paper_codex.md | 48 + docs/data/cards/extended/paper_d4rl.md | 51 + docs/data/cards/extended/paper_drivegpt.md | 49 + docs/data/cards/extended/paper_gail.md | 49 + docs/data/cards/extended/paper_gameformer.md | 48 + docs/data/cards/extended/paper_irl.md | 50 + .../cards/extended/paper_krizhevsky2012.md | 48 + docs/data/cards/extended/paper_lbc.md | 51 + .../paper_levine_offline_rl_tutorial.md | 53 + .../extended/paper_ng_russell_2000_irl.md | 49 + docs/data/cards/extended/paper_planet.md | 52 + .../data/cards/extended/paper_pointpillars.md | 2 +- .../extended/paper_preference_learning.md | 45 + .../cards/extended/paper_ross_bagnell_2010.md | 52 + docs/data/cards/extended/paper_saycan.md | 49 + .../cards/extended/paper_schulman2016_gae.md | 48 + docs/data/cards/extended/paper_spinning_up.md | 46 + docs/data/cards/extended/paper_tcp_carla.md | 49 + docs/data/cards/extended/paper_vicreg.md | 55 + docs/data/cards/extended/paper_voxelnet.md | 47 + .../extended/paper_ziebart_max_ent_irl.md | 50 + docs/data/generated/round4_driving.json | 127 + docs/data/generated/round4_irl.json | 45 + docs/data/generated/round4_orphans.json | 13 + docs/data/generated/round4_perception.json | 38 + docs/data/generated/round4_rlinfra.json | 51 + docs/data/graph_extended.json | 8207 +++++++++-------- docs/data/graph_extended.stats.json | 69 +- docs/data/lens_fragments/driving.json | 20 + docs/data/lens_fragments/irl.json | 20 + docs/data/lens_fragments/perception.json | 20 + docs/data/lens_fragments/rlinfra.json | 20 + docs/data/research_lens.json | 114 + tools/.link_baseline.json | 4 +- tools/build_research_lens.py | 72 +- 45 files changed, 6688 insertions(+), 3638 deletions(-) create mode 100644 docs/data/cards/extended/insight_max_entropy_closes_policy_value_duality.md create mode 100644 docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md create mode 100644 docs/data/cards/extended/insight_policy_improvement_bounded_by_distribution_shift.md create mode 100644 docs/data/cards/extended/insight_q_learning_max_is_optimistically_biased.md create mode 100644 docs/data/cards/extended/paper_airl.md create mode 100644 docs/data/cards/extended/paper_alvinn.md create mode 100644 docs/data/cards/extended/paper_bahdanau2014_attention.md create mode 100644 docs/data/cards/extended/paper_bert.md create mode 100644 docs/data/cards/extended/paper_byol.md create mode 100644 docs/data/cards/extended/paper_centerpoint.md create mode 100644 docs/data/cards/extended/paper_codex.md create mode 100644 docs/data/cards/extended/paper_d4rl.md create mode 100644 docs/data/cards/extended/paper_drivegpt.md create mode 100644 docs/data/cards/extended/paper_gail.md create mode 100644 docs/data/cards/extended/paper_gameformer.md create mode 100644 docs/data/cards/extended/paper_irl.md create mode 100644 docs/data/cards/extended/paper_krizhevsky2012.md create mode 100644 docs/data/cards/extended/paper_lbc.md create mode 100644 docs/data/cards/extended/paper_levine_offline_rl_tutorial.md create mode 100644 docs/data/cards/extended/paper_ng_russell_2000_irl.md create mode 100644 docs/data/cards/extended/paper_planet.md create mode 100644 docs/data/cards/extended/paper_preference_learning.md create mode 100644 docs/data/cards/extended/paper_ross_bagnell_2010.md create mode 100644 docs/data/cards/extended/paper_saycan.md create mode 100644 docs/data/cards/extended/paper_schulman2016_gae.md create mode 100644 docs/data/cards/extended/paper_spinning_up.md create mode 100644 docs/data/cards/extended/paper_tcp_carla.md create mode 100644 docs/data/cards/extended/paper_vicreg.md create mode 100644 docs/data/cards/extended/paper_voxelnet.md create mode 100644 docs/data/cards/extended/paper_ziebart_max_ent_irl.md create mode 100644 docs/data/generated/round4_driving.json create mode 100644 docs/data/generated/round4_irl.json create mode 100644 docs/data/generated/round4_orphans.json create mode 100644 docs/data/generated/round4_perception.json create mode 100644 docs/data/generated/round4_rlinfra.json create mode 100644 docs/data/lens_fragments/driving.json create mode 100644 docs/data/lens_fragments/irl.json create mode 100644 docs/data/lens_fragments/perception.json create mode 100644 docs/data/lens_fragments/rlinfra.json diff --git a/docs/data/cards/extended/insight_max_entropy_closes_policy_value_duality.md b/docs/data/cards/extended/insight_max_entropy_closes_policy_value_duality.md new file mode 100644 index 0000000..92d2f36 --- /dev/null +++ b/docs/data/cards/extended/insight_max_entropy_closes_policy_value_duality.md @@ -0,0 +1,62 @@ +# 跨学科洞察 · 最大熵把策略与价值缝成一对偶 + +> 一旦给 RL 目标加上策略熵,"最优策略"就不再是 argmax 的硬选择,而变成价值函数的 softmax;reward、价值、策略三者被一个配分函数严丝合缝地绑定。这个软化既让逆向(从行为反推 reward)变得良定,又让正向(从 reward 求策略)有了闭式的策略形式——IRL 与 RL 因此成了同一对偶的两面。 + +## 抽象内核 + +最大熵 RL 的目标是带熵正则的回报: + +$$ +\pi^*=\arg\max_\pi\ \mathbb{E}_\pi\Big[\textstyle\sum_t r(s_t,a_t)+\alpha\,\mathcal{H}\big(\pi(\cdot\mid s_t)\big)\Big] +$$ + +它的解不是确定性 argmax,而是 Boltzmann 形式: + +$$ +\pi^*(a\mid s)=\frac{\exp\big(Q_{\text{soft}}(s,a)/\alpha\big)}{\sum_{a'}\exp\big(Q_{\text{soft}}(s,a')/\alpha\big)} +=\exp\!\Big(\tfrac{1}{\alpha}\big(Q_{\text{soft}}(s,a)-V_{\text{soft}}(s)\big)\Big) +$$ + +其中 $V_{\text{soft}}(s)=\alpha\log\sum_a \exp(Q_{\text{soft}}(s,a)/\alpha)$ 是配分函数的对数(softmax 取代了 hard max)。这条等式就是对偶的扣环: + +- **正向(RL)**:给定 $r$,soft Bellman 备份解出 $Q_{\text{soft}}$,策略立刻由 softmax 读出——[SAC](paper_sac.md)、soft Q-learning 就是这条。 +- **逆向(IRL)**:给定专家轨迹,假设它由上式生成,对 $r$ 做极大似然,梯度恰是 feature matching——[Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) 就是这条。 + +同一个配分函数 $V_{\text{soft}}$(正向当价值、逆向当似然归一)把两个方向焊在一起。 + +## 在不同领域的具现 + +| 领域 | 配分函数/softmax 扮演什么 | 对偶的两端 | +|---|---|---| +| 最大熵 RL | $V_{\text{soft}}=\alpha\log\sum_a e^{Q/\alpha}$ | reward → soft 策略 | +| 最大熵 IRL | 轨迹分布 $P(\tau)\propto e^{\theta^\top \mathbf{f}_\tau}$ 的归一项 | 专家轨迹 → reward | +| 统计物理 | 自由能 = $-T\log Z$ | 能量 ↔ 平衡态分布 | +| 概率图模型 | 对数配分函数 $\log Z(\theta)$ | 势函数 ↔ 边缘分布 | +| 凸优化 | log-sum-exp 是 max 的光滑对偶 | 原问题 ↔ 对偶问题 | +| 语言模型 | 温度采样的 softmax | logits ↔ token 分布 | +| 对抗模仿 | 判别器估计的密度比 | reward ↔ 占用度量([GAIL](paper_gail.md))| + +底层是同一数学对象:log-sum-exp 作为 max 的光滑、可微、有唯一对偶的替身。 + +## 它对自动驾驶的意义 + +软化带来的"多模态"恰好是驾驶最需要的。硬 argmax 策略在岔路口会把"左转"和"右转"平均成"直冲护栏";最大熵策略保留两个峰,给出概率化的多种合理动作——这与 [扩散策略](paper_diffusion_policy_chi2023.md) 想达到的多模态是同一诉求的两种实现。在他车/行人的行为预测里更直接:把"目的地"当 reward,最大熵给出可达轨迹的概率分布,天然输出"多种可能未来"而非单一点估计。 + +对偶性还给出工程上的双向通道。若你已有一个会开车的策略,可以反推出它隐含在优化什么 reward(用于诊断或迁移);若你已有 reward,可以正向求软策略。温度 $\alpha$ 是统一旋钮:$\alpha\to 0$ 退回确定性最优(果断但脆),$\alpha$ 大则探索充分、行为多样但不够锐利——这正是 [给策略加熵奖励鼓励探索](move_add_entropy_bonus_to_encourage_exploration.md) 在调的那个量。 + +## 什么时候被误用 + +- **以为熵越大越好**:$\alpha$ 过大时策略过度随机,在安全攸关的驾驶里等于乱开;熵是探索/多样性的工具,不是目标本身,部署期通常要把 $\alpha$ 退火到接近确定性。 +- **把 softmax 当真实分布**:最大熵假设专家行为服从 Boltzmann 形式,但人类示教未必如此;模型设定错了,反推的 reward 也错。 +- **忽略配分函数难算**:连续/高维动作下 $V_{\text{soft}}$ 的 $\log\sum e^{Q/\alpha}$ 要采样近似,估计噪声会同时污染正向与逆向——这是 SAC 与深度 maxent IRL 共同的工程痛点。 + +## 与其它节点的关系 + +- 它是 [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md)(逆向)与 [SAC](paper_sac.md)(正向)共享的同一数学结构,把二者点明为一对偶的两端。 +- 它为 [GAIL](paper_gail.md) 与 [AIRL](paper_airl.md) 的对抗目标提供概率底座——判别器估计的正是这个能量模型的密度比。 +- 它与 [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) 互为表里:对偶给出"如何把轨迹解压成 reward"的具体机制。 +- 它落到 [给策略加熵奖励鼓励探索](move_add_entropy_bonus_to_encourage_exploration.md) 这一具体动作上。 + +## 推演链路 + +最大熵原理(Jaynes 1957)→ [Ng & Russell 2000](paper_ng_russell_2000_irl.md) 暴露 IRL 不可辨识 → [Ziebart 等 2008 最大熵 IRL](paper_ziebart_max_ent_irl.md) 用熵消解歧义 → soft Q-learning / [SAC](paper_sac.md) 把同一软化搬回正向 RL → [GAIL](paper_gail.md)/[AIRL](paper_airl.md) 把能量模型对抗化 → 驾驶里的多模态行为预测与多模态策略。 diff --git a/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md b/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md new file mode 100644 index 0000000..83adb4b --- /dev/null +++ b/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md @@ -0,0 +1,47 @@ +# 跨学科洞察 · 交通场景的多模态性是内在的 + +> 在路口、汇流、无保护左转这些地方,未来不是"有噪声的一个答案",而是**几个都对的答案**:可以等也可以走,可以让也可以抢。一个把所有合理未来回归到均值的模型,会输出一条**两个有效机动的平均**——而那条平均轨迹本身往往落在低概率、甚至违规或会撞的区域。多模态不是建模的瑕疵,是世界的属性。 + +## 抽象内核 + +设某场景下未来轨迹的真实分布是双峰的,两条等概率的合理机动 $y_A$(直行通过)与 $y_B$(停车等待): + +$$p(y\mid s)=\tfrac{1}{2}\,\mathcal{N}(y_A,\sigma^2)+\tfrac{1}{2}\,\mathcal{N}(y_B,\sigma^2)$$ + +一个用 L2/最大似然训练的**单峰回归器**最小化期望平方误差,其最优解是条件均值: + +$$\hat y=\mathbb{E}[y\mid s]=\tfrac{1}{2}(y_A+y_B)$$ + +当两个模态分得够开($\|y_A-y_B\|\gg\sigma$)时,均值点 $\hat y$ 落在两峰之间的**低密度谷**里:$p(\hat y\mid s)\approx 0$。物理上这条"平均轨迹"可能是"以一半速度开进路口正中"——既不是通过也不是等待,是任何一个真实驾驶员都不会选、且常常违规或致碰的动作。 + +形式化代价:单峰高斯对双峰目标的最优拟合,其 KL 散度有不可消除的下界,约为 $\mathrm{KL}\big(p\,\|\,q^\star\big)\gtrsim \tfrac{1}{8}\|y_A-y_B\|^2/\sigma^2$(随模态间距平方增长)——模态越分离,单峰损失越大,且这部分损失**靠加数据、加参数都压不下去**,因为它源于假设类与真实分布的结构错配,而非估计误差。补救只有两条:要么让输出分布本身多模态(GMM / 扩散 / 自回归 token 采样),要么在训练中用熵/多样性奖励阻止模态塌缩([`move:add_entropy_bonus_to_encourage_exploration`](move_add_entropy_bonus_to_encourage_exploration.md))。 + +## 在不同领域的具现 + +| 领域 | 多模态的来源 | 单峰平均的恶果 | 治法 | +|---|---|---|---| +| 自动驾驶规划 | 路口走/停、左/右绕行 | 平均成"开进路口正中"的无效/危险轨迹 | GMM head、扩散规划器、自回归轨迹采样 | +| 运动预测 | 他车意图多义 | ADE 漂亮但落在无人会走的中线 | winner-take-all 多模态损失、anchor 轨迹 | +| 机器人抓取 | 多个等效抓取姿态 | 平均成抓空的中间姿态 | Diffusion Policy、隐式策略 | +| 图像生成 | 同一文本多种合理图像 | 回归 MSE 出模糊均值图 | 扩散 / 对抗 / 自回归 | +| 语言生成 | 同义多种续写 | 平均化退化成乏味高频词 | 采样 + 温度、束搜索多样化 | +| 控制 / 博弈 | 多个纳什均衡 | 取均值落入非均衡的不稳定点 | 显式枚举均衡([GameFormer](paper_gameformer.md) 层级博弈) | + +## 它对自动驾驶的意义 + +- **架构层面**:规划/预测的输出头必须能表达多峰——高斯混合、扩散、或把轨迹 token 化后自回归采样([`insight:tokenized_trajectories_let_planning_borrow_from_language_modeling`](insight_tokenized_trajectories_let_planning_borrow_from_language_modeling.md))。纯 L2 回归在交互密集场景天然受限。 +- **指标层面**:单模态 ADE/FDE 会奖励"贴近均值",反而鼓励模态塌缩;要用 minADE/minFDE、miss-rate 这类多模态指标,并配 [闭环评测](insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md) 才能暴露"平均轨迹会撞"的问题。 +- **训练层面**:模态塌缩是多模态 head 的常见病——所有混合分量收敛到主模态。需要 winner-take-all 匹配、熵正则或 anchor 化来维持分支多样性。 +- **交互层面**:在 [`problem:multi_agent_interaction_modeling_in_dense_traffic`](problem_multi_agent_interaction_modeling_in_dense_traffic.md) 里,多模态还来自"他车也在多模态地反应我",需要 [GameFormer](paper_gameformer.md) 式的博弈结构而不只是边际多峰。 +- **它解释为什么 [ALVINN](paper_alvinn.md)/纯 BC 在路口脆弱**:单峰回归把岔口的合理分歧平均成无效动作,叠加复合误差就更危险。 + +## 与其它节点的关系 + +- 它是 [`insight:tokenized_trajectories_let_planning_borrow_from_language_modeling`](insight_tokenized_trajectories_let_planning_borrow_from_language_modeling.md) 的强动机:自回归 token 采样天然多模态,一次解码不同 rollout 即不同合理未来。 +- 它与 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 互补:前者讲"在岔口该表达分歧而非平均",后者讲"偏离后该会纠回"——都是单峰 BC 的两类原则性失效。 +- 它直接 motivate 了 [`move:add_entropy_bonus_to_encourage_exploration`](move_add_entropy_bonus_to_encourage_exploration.md):熵奖励阻止策略/分布塌缩到单模态。 +- 它是 [GameFormer](paper_gameformer.md) 用 GMM + 层级博弈、以及扩散规划器存在的根本理由。 + +## 推演链路 + +[ALVINN](paper_alvinn.md)(单峰回归在岔口暴露问题)→ 运动预测里的多模态 anchor/GMM head → [Decision Transformer](paper_decision_transformer.md) 与 [DriveGPT](paper_drivegpt.md)(自回归采样天然多峰)→ 扩散规划器(用去噪采样表达分布)→ [GameFormer](paper_gameformer.md)(把多模态从"边际多峰"升级为"交互博弈下的多均衡")→ [闭环评测](insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md) 验证"平均轨迹会撞"。 diff --git a/docs/data/cards/extended/insight_policy_improvement_bounded_by_distribution_shift.md b/docs/data/cards/extended/insight_policy_improvement_bounded_by_distribution_shift.md new file mode 100644 index 0000000..eeea4e2 --- /dev/null +++ b/docs/data/cards/extended/insight_policy_improvement_bounded_by_distribution_shift.md @@ -0,0 +1,55 @@ +# 跨学科洞察 · 策略改进的上限被分布偏移锁死 + +> 无论是模仿、信赖域策略梯度还是离线 RL,"用旧策略/旧数据去评估新策略"这件事的可靠度,都正比于新旧分布的接近程度。一旦新策略把你带到旧分布没覆盖的地方,所有评估失效,改进的保证随之崩塌——分布偏移是策略改进的硬天花板。 + +## 抽象内核 + +把"在分布 $p$ 下学到的策略 $\hat\pi$,部署/评估在分布 $q$ 下"的性能差距统一写成: + +$$ +\big|\,\mathbb{E}_{q}[f]-\mathbb{E}_{p}[f]\,\big|\ \le\ 2\,f_{\max}\cdot D_{\mathrm{TV}}(p,q) +$$ + +其中 $f$ 是损失或优势函数。所有"安全改进"算法都在控制右边这一项 $D_{\mathrm{TV}}(p,q)$: + +- **模仿学习**:$p=d_{\pi^*}$(专家访问分布),$q=d_{\hat\pi}$(学到策略实际访问分布)。两者每步偏离一点,$D_{\mathrm{TV}}$ 随时间累积,得到 [Ross & Bagnell](paper_ross_bagnell_2010.md) 的 $O(\epsilon T^2)$ 界。 +- **信赖域策略梯度**:$p=d_{\pi_{\text{old}}}$,$q=d_{\pi_{\text{new}}}$。TRPO 显式把 $D_{\mathrm{KL}}(\pi_{\text{old}}\,\|\,\pi_{\text{new}})$ 约束在小球内,从而保证单调改进。 +- **离线 RL**:$p=$ 数据集行为分布,$q=$ 学到策略分布。CQL/IQL 用惩罚或回避把 $q$ 拉回 $p$ 的支撑内。 + +三者用的是同一条不等式,只是把哪个分布对当成"旧→新"而已。 + +## 在不同领域的具现 + +| 领域 | $p$(旧/已知分布) | $q$(新/目标分布) | 控制 $D(p,q)$ 的手段 | +|---|---|---|---| +| 模仿学习 | 专家访问分布 $d_{\pi^*}$ | 学到策略分布 $d_{\hat\pi}$ | DAgger 数据聚合 / forward training | +| 策略梯度 | 旧策略 $\pi_{\text{old}}$ | 新策略 $\pi_{\text{new}}$ | TRPO 信赖域 / PPO clip | +| 离线 RL | 数据集行为策略 $\pi_\beta$ | 目标策略 $\pi$ | CQL 保守惩罚 / BCQ 行为先验 | +| 重要性采样估计 | 提议分布 | 目标分布 | 权重截断、有效样本数监控 | +| 监督学习泛化 | 训练分布 | 测试分布 | 域适应、协变量偏移校正 | +| 模型基 RL | 真实动力学 | 学到的模型 rollout | 短 horizon、模型不确定性惩罚 | + +## 它对自动驾驶的意义 + +这条洞见把三件看似无关的工程难题统一成同一个量的不同侧面:BC 闭环开不动、PPO 步长不敢调大、离线 RL 上线就崩——全都是 $D(p,q)$ 失控。它给出一个统一的设计原则:任何"用历史评估未来"的环节,都必须显式监控并约束分布偏移,而不是寄望模型在没见过的状态上凭空泛化。 + +它也精确解释了 open-loop 与 closed-loop 指标的鸿沟。open-loop 评估默认 $q=p$(喂的是专家轨迹里的状态),所以测不出分布偏移;closed-loop 让车在自己造成的 $q$ 里行驶,$D(p,q)>0$ 的代价才暴露。这就是为什么离线 ADE/FDE 漂亮的模型一进 [闭环仿真](paradigm_imitation_learning.md) 就拉胯——见 [纯模仿学习无法从复合误差中自我恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md)。 + +实践推论:扩展数据多样性(增大 $p$ 的支撑)、限制每次策略更新幅度(缩小 $q$ 相对 $p$ 的偏移)、以及在分布外触发保守回退,是三条互补的杠杆。哪条最有效,取决于你的偏移是来自训练覆盖不足还是更新步子太大。 + +## 什么时候被误用 + +- **以为加数据总能治**:若新数据仍来自 $p$,它压的是误差幅度,不缩 $D(p,q)$;BC 堆专家数据治不了复合误差正是此理。 +- **把信赖域当万能锁**:约束 $D(p,q)$ 小能保证"不变差",但也意味着"改不快"——步子太保守时策略改进近乎停滞,这是 PPO 调 clip 系数时的真实张力。 +- **忽略支撑而只看散度**:两个分布 KL 不大却支撑不重叠时,重要性权重仍会爆炸;离线 RL 里"支撑覆盖"比"分布接近"更要命。 + +## 与其它节点的关系 + +- 它是 [Ross & Bagnell 2010](paper_ross_bagnell_2010.md) 的 $O(\epsilon T^2)$ 界与 [信赖域单调改进](move_trust_region_step_for_monotonic_improvement.md) 共享的同一内核——前者让偏移随时间累积,后者主动把它约束住。 +- 它与 [纯模仿学习无法从复合误差中自我恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 是因果两端:分布偏移是因,复合误差是果。 +- 它与 [离线 RL 本质上是带约束的动态规划](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 同源:离线 RL 的"约束在数据支撑内"就是把 $D(p,q)$ 压到零的极端版本。 +- 它通过 [DAgger 数据聚合验证](validation_trace_dataset_aggregation_for_imitation.md) 与 [TRPO](paper_schulman2015_trpo.md)、[PPO](../paper_schulman2017_ppo.md) 落到具体算法。 + +## 推演链路 + +[Ross & Bagnell 2010 高效归约](paper_ross_bagnell_2010.md)(量化模仿里的偏移)→ [Ross 等 2011 DAgger](../paper_ross2011_dagger.md)(用聚合压偏移)→ Kakade & Langford 2002 保守策略迭代 → [Schulman 2015 TRPO](paper_schulman2015_trpo.md)(把偏移约束成信赖域)→ [PPO](../paper_schulman2017_ppo.md)(clip 近似信赖域)→ 离线 RL 的 [CQL](paper_cql.md)/[IQL](paper_iql.md)(把偏移约束到数据支撑)→ 驾驶里闭环训练与分布外保守回退。 diff --git a/docs/data/cards/extended/insight_q_learning_max_is_optimistically_biased.md b/docs/data/cards/extended/insight_q_learning_max_is_optimistically_biased.md new file mode 100644 index 0000000..6d356e9 --- /dev/null +++ b/docs/data/cards/extended/insight_q_learning_max_is_optimistically_biased.md @@ -0,0 +1,46 @@ +# 跨学科洞察 · Q-learning 的 max 是系统性乐观偏差 + +> 只要 $Q$ 的估计带噪声,对动作取 $\max$ 这个动作本身就会系统性地高估真实价值——不是偶尔运气好,而是期望意义上一定偏高。根因是一个纯数学事实:$\mathbb{E}[\max] \ge \max \mathbb{E}$(Jensen 不等式应用于凸的 max 算子)。这条偏差在自举(bootstrapping)里被反复放大,是 DQN 早期不稳定的元凶,也是 Double-Q / Double-DQN 这一族修正的直接动机。 + +## 抽象内核 + +设每个动作的真实价值为 $q(a)$,但我们只有带噪声的估计 $Q(a) = q(a) + \varepsilon_a$,其中噪声 $\mathbb{E}[\varepsilon_a]=0$。Q-learning 的 Bellman 目标里有一项 $\max_a Q(a)$。因为 $\max$ 是凸函数,由 Jensen 不等式: + +$$ +\mathbb{E}\big[\max_a Q(a)\big] \;\ge\; \max_a \mathbb{E}\big[Q(a)\big] \;=\; \max_a q(a) +$$ + +即"对噪声估计取最大"的期望,永远不小于"真实最大值"。直觉上:$\max$ 算子会专门挑出那个噪声恰好为正、被运气抬高的动作;动作越多、噪声越大,这个被高估的概率越大,偏差(即两边之差)也越大。更糟的是,Q-learning 用 $\arg\max$ 选动作、又用同一组 $Q$ 值评估这个动作——选择与评估耦合,让乐观偏差无法在期望上抵消。 + +修正的核心是解耦"选哪个动作"与"该动作值多少"。Double-Q 用两套独立估计 $Q_A, Q_B$:用 $Q_A$ 选 $a^\star=\arg\max_a Q_A(a)$,再用 $Q_B(a^\star)$ 评估它。由于 $a^\star$ 的选择与 $Q_B$ 的噪声独立,$\mathbb{E}[Q_B(a^\star)]$ 不再被系统性抬高: + +$$ +\mathbb{E}\big[Q_B(\arg\max_a Q_A(a))\big] \;\le\; \mathbb{E}\big[\max_a Q_A(a)\big] +$$ + +这就是 [Double-Q 减少高估](move_double_q_to_reduce_overestimation.md) 这一动作的全部数学根据。 + +## 在不同领域的具现 + +| 领域 | "max over noisy estimates" 的形态 | 后果 / 修正 | +|---|---|---| +| 深度 RL | DQN 的 $\max_{a'} Q(s',a')$ 自举目标 | $Q$ 值系统性膨胀、训练发散;Double-DQN 用在线网选动作、目标网评估 | +| 连续控制 | actor-critic 里 critic 对策略动作的高估 | TD3 取双 critic 的最小值(clipped double-Q)做悲观目标 | +| 离线 RL | Bellman 备份查询 OOD 动作时的外推高估 | 见 [离线 RL 范式](paradigm_offline_rl.md):用保守正则把 OOD 的 $Q$ 压回 | +| 统计学 | 多重比较中"最大样本均值"作为总体均值的估计 | winner's curse;需做后选择校正 | +| 拍卖 / 经济学 | 共同价值拍卖中赢家恰是估值最高(最乐观)者 | the winner's curse,理性者应往下修正出价 | +| 优化 / 调参 | 在验证集上选"最佳超参"的那个分数 | 该分数对真实泛化是乐观估计;需独立测试集 | +| 投资组合 | 选历史收益最高的策略作为未来预期 | 回测过拟合,样本外收益系统性低于回测 | + +同一内核反复出现:**只要先用噪声估计做选择、再用相关的估计做评估,结果就乐观偏置。** 解药永远是同一招——把选择和评估的随机性解耦,或对最大化的结果主动施加悲观修正。 + +## 它对自动驾驶的意义 + +- **价值高估直接威胁安全**:若用 Q-learning / actor-critic 学驾驶策略,对"激进变道""压速通过"这类动作的价值若被系统性高估,策略会偏向危险但被误判为高回报的行为。悲观化(double-Q / 最小值 critic / 保守正则)不是可选的稳定性 trick,而是安全前提。 +- **离线驾驶日志放大问题**:自动驾驶大量依赖离线日志训练,而日志几乎不含危险动作的样本,Bellman 备份对这些 OOD 动作的高估无从被数据纠正——这正是 [离线 RL 范式](paradigm_offline_rl.md) 必须保守的根本原因。 +- **它是不确定性校准的特例**:能量化估计噪声,就能定向地往下修正乐观偏差,这与 [不确定性校准是安全委托的前提](insight_uncertainty_calibration_is_prerequisite_for_safe_delegation.md) 同源——知道自己哪里不确定,才能避免在不确定处盲目乐观。 +- **世界模型同样中招**:在学得的世界模型里规划时,策略会专挑模型高估回报的区域(model exploitation),本质是 $\max$ 偏差搬到了想象空间。 + +## 与其它节点的关系 + +它直接孕育 [Double-Q 减少高估](move_double_q_to_reduce_overestimation.md),并与 [目标网络稳定 off-policy 学习](move_bootstrap_target_network_to_stabilize_off_policy_learning.md) 互补:目标网络对付的是自举的反馈回路不稳定,本洞察对付的是自举里 $\max$ 的偏差方向,二者共同解释了为什么 [SAC](paper_sac.md)、TD3 这类现代算法默认携带双 critic + 目标网络。它是 [离线 RL 范式](paradigm_offline_rl.md) 保守性设计的理论前传——离线 RL 的所有悲观正则,本质都是在防同一个 $\max$ 把 OOD 动作抬上天。它与 [不确定性校准是安全委托的前提](insight_uncertainty_calibration_is_prerequisite_for_safe_delegation.md) 是兄弟洞察:前者说明"为什么会乐观",后者说明"量化不确定性才能修正乐观"。 diff --git a/docs/data/cards/extended/paper_airl.md b/docs/data/cards/extended/paper_airl.md new file mode 100644 index 0000000..5be3b0d --- /dev/null +++ b/docs/data/cards/extended/paper_airl.md @@ -0,0 +1,49 @@ +--- +id: paper:airl +title: "AIRL — Learning Robust Rewards with Adversarial Inverse RL" +title_zh: "AIRL(对抗逆强化学习)" +kind: paper +tier: A +authors: [Fu, J., Luo, K., Levine, S.] +venue: "ICLR 2018" +year: 2018 +topic: deep_rl +phase: core +deep_links: + - {label: "arXiv 1710.11248", url: "https://arxiv.org/abs/1710.11248"} +--- + +# AIRL(对抗逆强化学习) + +> GAIL 学到的是一个"能模仿"的策略,但判别器里没有可拿走、可迁移的 reward。AIRL 的转折是给判别器一个特殊结构,使其在最优时分解出一个对动力学变化鲁棒的奖励函数——从而恢复"为什么这么开",而不只是"怎么开"。 + +## 一个最小公式 / Math anchor +$$ +D_{\theta}(s,a,s')=\frac{\exp\big(f_{\theta}(s,a,s')\big)}{\exp\big(f_{\theta}(s,a,s')\big)+\pi(a\mid s)},\qquad +f_{\theta}(s,a,s')=g_{\theta}(s,a)+\gamma h_{\phi}(s')-h_{\phi}(s) +$$ +判别器不直接输出概率,而是用 $f_\theta$ 与当前策略密度 $\pi(a\mid s)$ 拼成。关键在 $f_\theta$ 的 reward-shaping 结构:$g_\theta$ 是要恢复的奖励,$\gamma h_\phi(s')-h_\phi(s)$ 是一个势函数 shaping 项。由于 [Ng 等关于 reward shaping 不变性的结论](paper_ng_russell_2000_irl.md),最优解下 $g_\theta(s)$ 收敛到真实 reward 加一个常数,而 shaping 项吸收掉与动力学耦合的部分——这正是"可迁移 reward"的来源。 + +## 它在图谱里的位置 +AIRL 是 [GAIL](paper_gail.md) 的直系后继:同样是对抗框架,但把判别器从"黑箱二分类器"改成"reward + shaping"的可解释结构,回答 GAIL 故意回避的问题——把 reward 真正拿回来。它因此重新接回 [Inverse RL](paper_irl.md) 的本来目标(恢复 reward 而非仅复制行为),并以 [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) 的能量模型为概率底座。它践行 [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md):恢复出的 $g_\theta$ 就是那个被压缩的隐式 reward。 + +## 架构 / 方法直觉 +训练流程与 [GAIL](paper_gail.md) 几乎一样(交替更新 $D$ 与策略,策略用 reward $\log D-\log(1-D)$ 做策略梯度),区别全在判别器的参数化。把判别器写成"以 $\pi$ 为基准的 importance-weighted 形式",等价于在做最大熵 IRL 的对抗近似。状态-only 的 $g_\theta(s)$ 配上势函数 shaping 是 disentangle 的核心:若让 $g_\theta$ 依赖动作或下一状态,恢复的 reward 会把动力学信息编进去,换个环境就失效。AIRL 的卖点正是——在训练动力学下学到的 reward,搬到测试动力学(比如换了摩擦系数)仍能驱动重新训练出好策略。 + +## 工程上真正要注意什么 +- 需要可计算或可估计的策略密度 $\pi(a\mid s)$:离散动作直接读,连续动作要用可求密度的策略(如高斯或 normalizing flow),这限制了能用的策略类。 +- disentangle 只在状态覆盖充分时成立:若专家轨迹没访问到的状态上 $g_\theta$ 没有约束,迁移时这些区域的 reward 是任意的。 +- 与 GAIL 共享全部对抗不稳定性,外加判别器结构更复杂、调参面更大。 +- 驾驶中"reward 可迁移"听起来诱人(在城市 A 学的开车偏好搬到城市 B),但前提是两地的状态语义对齐;若特征里混入地图特有信息,disentangle 假设破裂。 + +## Bitter-Lesson 视角 +AIRL 比 GAIL 更"重先验"——它用 reward-shaping 不变性这条人工设计的结构去强行 disentangle reward 与动力学。这在数据/算力不足、且确实需要跨环境迁移 reward 时是聪明的归纳偏置。但若目标只是开好车而不在意"恢复 reward 本身",规模化的 BC 或离线 RL 往往更省事。AIRL 的价值在于它锚定了一个 bitter lesson 暂时还吃不下的需求:可解释、可迁移的意图表示。 + +## 接下来读什么 +- [GAIL](paper_gail.md) — AIRL 去掉结构、只求模仿的前身 +- [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) — AIRL 对抗化的那个概率模型 +- [Ng & Russell 经典 IRL](paper_ng_russell_2000_irl.md) — reward shaping 不变性的源头 +- [Inverse RL 总览](paper_irl.md) — 恢复 reward 这一问题本身 +- [偏好学习](paper_preference_learning.md) — 另一条从人类信号恢复 reward 的路线 +- [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) +- [最大熵把策略与价值缝成对偶](insight_max_entropy_closes_policy_value_duality.md) diff --git a/docs/data/cards/extended/paper_alvinn.md b/docs/data/cards/extended/paper_alvinn.md new file mode 100644 index 0000000..fd195e2 --- /dev/null +++ b/docs/data/cards/extended/paper_alvinn.md @@ -0,0 +1,48 @@ +--- +id: paper:alvinn +title: "ALVINN — An Autonomous Land Vehicle in a Neural Network" +title_zh: "ALVINN(最早的神经网络端到端驾驶)" +kind: paper +tier: S +authors: [Pomerleau, D. A.] +venue: "NeurIPS 1989" +year: 1989 +topic: e2e_ad +phase: prereq +deep_links: + - {label: "NeurIPS 1988 论文页", url: "https://papers.nips.cc/paper/1988/hash/812b4ba287f5ee0bc9d43bbf5bbe87fb-Abstract.html"} +--- + +# ALVINN(最早的神经网络端到端驾驶) + +> 1989 年,Pomerleau 用一个三层全连接网络,直接把 30×32 的相机图像(外加一路激光测距)映射到方向盘转角,让卡车在真实道路上自主行驶。这是端到端驾驶范式的起点:不分模块、不写规则,让一个可训练函数吞掉从像素到控制的全部映射。 + +## 一个最小公式 / Math anchor +$$ +\hat a \;=\; f_\theta(I), \qquad +\theta^\star \;=\; \arg\min_\theta\ \mathbb{E}_{(I,a)\sim\mathcal{D}_\text{expert}}\big[\,\|f_\theta(I)-a\|^2\,\big] +$$ +$I\in\mathbb{R}^{30\times 32}$ 是降采样后的灰度图像,$a$ 是离散化为 45 个方向单元的转向输出,$f_\theta$ 是单隐层(29 个隐单元)的全连接网络。训练就是在人类驾驶演示上做行为克隆——一个最朴素的监督回归。这正是 [`paradigm:imitation_learning`](paradigm_imitation_learning.md) 的最小可执行形式。 + +## 它在图谱里的位置 +ALVINN 是整条端到端驾驶谱系的源头:它 manifest 了 [`paradigm:imitation_learning`](paradigm_imitation_learning.md),并直接 motivate 了三十年后的 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md)。它与 [模块化感知-规划流水线](paradigm_modular_perception_to_planning_pipeline.md) 形成根本对照:后者把驾驶切成感知/预测/规划逐段手工设计,而 ALVINN 在 1989 年就赌"让一个网络端到端学完"。下游的 [Learning by Cheating](paper_lbc.md)、[TCP](paper_tcp_carla.md)、[TransFuser](../paper_transfuser.md) 都是这条赌注在算力与数据成熟后的兑现。 + +## 架构 / 方法直觉 +backbone 是 960 输入 → 29 隐 → 45 输出的全连接网络,外加一个 8×32 的"道路强度反馈"输入回路。最关键的工程贡献不是网络本身,而是 Pomerleau 意识到纯演示数据的覆盖太窄——人开车几乎不会偏离车道,于是模型从没见过"如何从偏离中纠回"。他的解法是用图像变换(geometric transformation)人工合成大量"车处于偏移位姿"的样本及对应的纠偏转角,把单条专家轨迹扩成一个覆盖偏移状态的训练分布。这正是后来 [DAgger](paper_ross2011_dagger.md) 与影子模式数据飞轮要解决的同一问题的早期手工版。 + +## 工程上真正要注意什么 +- 1989 年的算力下,整个网络在 Sun-3/160 上以约 2 Hz 推理,车速被压到步行级——端到端不是"快",而是"可行性证明"。 +- 合成偏移样本是成败关键:去掉它,模型一旦轻微偏离就发散,复合误差迅速放大(见 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md))。 +- 输入分辨率 30×32 不是性能瓶颈而是算力妥协,提示"端到端"早期受限于硬件而非思想。 +- 单一道路类型上训练的网络无法泛化到新路况,泛化问题从第一天起就存在。 + +## Bitter-Lesson 视角 +ALVINN 是 Bitter Lesson 在驾驶上提前三十年的预演:它放弃车道线检测、放弃显式几何,用一个端到端学习的函数取代手工管线。它在 1989 年"过早正确"——思想对了,但缺数据、缺算力、缺仿真闭环。后来主流转向模块化感知,直到深度学习与大规模数据回归,[NVIDIA PilotNet](../paper_transfuser.md) 一类工作才让 ALVINN 的赌注真正兑现。教训是:方法的对错与时代的算力供给耦合,"对的太早"和"错"在结果上长期难以区分。 + +## 接下来读什么 +- [Learning by Cheating](paper_lbc.md) — 端到端在仿真里的现代复兴,用特权教师解决数据覆盖 +- [DAgger](paper_ross2011_dagger.md) — 把 ALVINN 的"合成偏移样本"理论化为数据集聚合 +- [模仿学习范式](paradigm_imitation_learning.md) — ALVINN 所属的母范式 +- [可微端到端模仿](paradigm_differentiable_end_to_end_imitation.md) — 三十年后的兑现形态 +- [insight: 单一模仿学习无法从复合误差中恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) +- [insight: 交通场景的多模态性是内在的](insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md) diff --git a/docs/data/cards/extended/paper_bahdanau2014_attention.md b/docs/data/cards/extended/paper_bahdanau2014_attention.md new file mode 100644 index 0000000..eaffc87 --- /dev/null +++ b/docs/data/cards/extended/paper_bahdanau2014_attention.md @@ -0,0 +1,48 @@ +--- +id: paper:bahdanau2014_attention +title: "Neural Machine Translation by Jointly Learning to Align and Translate" +title_zh: "Bahdanau 注意力(对齐即翻译:注意力机制的起点)" +kind: paper +tier: S +authors: [Bahdanau, D., Cho, K., Bengio, Y.] +venue: "ICLR 2015" +year: 2015 +topic: foundation_models +phase: prereq +deep_links: + - {label: "arXiv 1409.0473", url: "https://arxiv.org/abs/1409.0473"} +--- + +# Bahdanau 注意力(对齐即翻译:注意力机制的起点) + +> 这篇论文解决了 encoder-decoder 机器翻译的一个致命瓶颈:把整句源文压进一个定长向量,长句必然丢信息。它让 decoder 在生成每个目标词时回头"看"源句的所有隐状态,并用一个可学习的对齐打分动态加权——注意力机制由此诞生。后来的 Transformer 只是把这个想法推到极致。 + +## 一个最小公式 / Math anchor +$$ +e_{tj}=v_a^\top\tanh(W_a s_{t-1}+U_a h_j),\quad +\alpha_{tj}=\frac{\exp(e_{tj})}{\sum_k \exp(e_{tk})},\quad +c_t=\sum_{j}\alpha_{tj}\,h_j +$$ +生成第 $t$ 个目标词时,用上一解码状态 $s_{t-1}$ 与每个源词隐状态 $h_j$ 算一个对齐分数 $e_{tj}$(这里是加性/MLP 形式),softmax 归一成权重 $\alpha_{tj}$,再加权求和得到该步专属的上下文向量 $c_t$。decoder 不再受困于单一定长向量,而是每一步都"重新聚焦"源句的相关片段——$\alpha_{tj}$ 还顺带给出可读的软对齐矩阵。 + +## 它在图谱里的位置 +这是注意力的源头,[Transformer](../paper_vaswani2017.md) 的"Attention is All You Need"正是把它的加性对齐换成可并行的缩放点积、并堆成自注意力。它直接奠基了 [attention is typed entity communication](insight_attention_is_typed_entity_communication.md) 这条洞察,并经 Transformer 间接支撑 [BERT](paper_bert.md)、[DETR3D](paper_detr3d.md)、[BEVFormer](../paper_li2022bevformer.md) 等所有以注意力为核心的下游工作。相对状态空间序列模型 [Mamba](../paper_mamba.md),它代表"显式全局软检索"这一与之对照的范式。 + +## 架构 / 方法直觉 +基线是 RNN encoder-decoder:双向 RNN 把源句编成一串隐状态 $\{h_j\}$(每个 $h_j$ 兼含前后文),decoder 是另一个 RNN 逐词生成。关键改动只在 decoder:每生成一步,先用当前状态对所有 $h_j$ 打分、归一化、加权求和成 $c_t$,再把 $c_t$ 喂进解码。这相当于给 decoder 一个"软检索"接口——不必把整句记进一个向量,而是按需从源端取信息。副产品是注意力权重矩阵恰好对应词与词的软对齐,可视化后与人类的翻译对齐高度吻合,给了"模型在做什么"难得的可解释窗口。 + +## 工程上真正要注意什么 +- 这是 $O(T_{\text{src}}\times T_{\text{tgt}})$ 的全连接对齐,长序列上是计算与显存瓶颈——这正是后来稀疏/线性注意力以及 [Mamba](../paper_mamba.md) 等要优化的对象。 +- 加性注意力(MLP 打分)比点积更稳但更慢;Transformer 改用缩放点积并除以 $\sqrt{d}$ 来防止大维度下 softmax 饱和。 +- 注意力权重可视化诱人,但"高权重 = 因果解释"是常见误读;它只反映软对齐,不等于模型的真实推理依据。 +- 当年仍嵌在 RNN 里,本质是顺序计算;注意力真正释放并行红利要等到 Transformer 完全去掉循环结构。 + +## Bitter-Lesson 视角 +Bahdanau 注意力用一个可学习、可微的检索机制替代了"必须把信息塞进固定容器"的人工约束,让模型自己决定每一步该看哪里。它的深远意义不在翻译质量本身,而在提供了一个极其通用、随数据与算力线性受益的信息路由原语——Transformer 把它从"decoder 看 encoder"扩展为"序列内所有位置互相看",进而成为支撑大模型 scaling 的核心组件。这是 bitter lesson 的典型剧本:一个通用机制取代专用结构,并随规模不断兑现红利。 + +## 接下来读什么 +- [Transformer](../paper_vaswani2017.md) — 把注意力推到极致、去掉循环 +- [insight: attention is typed entity communication](insight_attention_is_typed_entity_communication.md) — 注意力的统一解读 +- [BERT](paper_bert.md) — 注意力骨架上的掩码预训练 +- [Mamba](../paper_mamba.md) — 以状态空间替代全局注意力的对照路线 +- [DETR3D](paper_detr3d.md) — 注意力进入 3D 检测的实例 diff --git a/docs/data/cards/extended/paper_bert.md b/docs/data/cards/extended/paper_bert.md new file mode 100644 index 0000000..4183085 --- /dev/null +++ b/docs/data/cards/extended/paper_bert.md @@ -0,0 +1,55 @@ +--- +id: paper:bert +title: "BERT — Pre-training of Deep Bidirectional Transformers for Language Understanding" +title_zh: "BERT(掩码语言建模与双向预训练)" +kind: paper +tier: S +authors: [Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.] +venue: "NAACL 2019" +year: 2019 +topic: foundation_models +phase: prereq +deep_links: + - {label: "arXiv 1810.04805", url: "https://arxiv.org/abs/1810.04805"} +--- + +# BERT(掩码语言建模与双向预训练) + +> BERT 用"随机遮住一部分词、让模型根据双向上下文把它们填回来"这一极简自监督目标,在海量无标注文本上预训练出通用语言表示,然后只需在小数据上微调就能横扫一众下游任务。它把掩码预测确立为可规模化的自监督信号,这个思想随后被 MAE 等工作整体搬到视觉。 + +## 一个最小公式 / Math anchor +$$ +\mathcal{L}_{\text{MLM}}=-\sum_{i\in\mathcal{M}}\log P\big(x_i \mid x_{\setminus\mathcal{M}}\big),\qquad +P(x_i\mid \cdot)=\mathrm{softmax}\big(W\,h_i\big) +$$ +$\mathcal{M}$ 是被遮的 token 位置集合(约 15%),模型用未遮上下文 $x_{\setminus\mathcal{M}}$ 同时利用左右两侧信息预测每个被遮 token。$h_i$ 是 Transformer 编码器在位置 $i$ 的输出表示,过线性层 + softmax 得词表分布。关键在"双向":与自回归只看左侧不同,MLM 一次性条件于完整双向上下文,因此学到的表示对理解类任务更充分。 + +## 它在图谱里的位置 +BERT 的骨架是 [Transformer](../paper_vaswani2017.md) 的编码器;它把预训练目标从机器翻译换成掩码填空,是 [masked prediction yields self-supervised signal](insight_masked_prediction_yields_self_supervised_signal.md) 这条洞察的奠基实例,也是 [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) 的语言侧支柱。它与生成式自回归路线(GPT 系)形成"双向理解 vs 单向生成"的对照,并直接启发视觉侧的掩码建模(MAE/BEiT)。 + +## 架构 / 方法直觉 +纯 Transformer 编码器堆叠(base 12 层 / large 24 层)。输入由 token + 段落 + 位置三种 embedding 相加,句首插一个 `[CLS]` 汇聚整句表示。两个预训练任务:(1) MLM——把选中 15% 的 token 中 80% 换成 `[MASK]`、10% 换随机词、10% 不变(缓解预训练-微调间 `[MASK]` 不出现的分布失配);(2) NSP——判断两句是否相邻(后续 RoBERTa 证明 NSP 可去掉、把 MLM 训得更久更重要)。微调时几乎不动结构,只在顶部加一个任务头联合更新。核心直觉:双向上下文 + 大语料 + 自监督 = 通用语义表示。 + +## 对照:自回归 vs 掩码两种自监督 +| 维度 | 自回归 LM(GPT 系) | 掩码 LM(BERT) | +|---|---|---| +| 条件信息 | 仅左侧上下文 | 双向上下文 | +| 训练信号密度 | 每个位置都预测 | 仅约 15% 位置 | +| 天然能力 | 直接生成 | 理解/抽取,生成需改造 | +| 预训练-推理一致性 | 高(无 `[MASK]` 失配) | 需 80/10/10 技巧弥补 | + +## 工程上真正要注意什么 +- `[MASK]` 只在预训练出现、微调/推理不出现,是天然分布失配;80/10/10 混合是必要补丁,不能省。 +- WordPiece 子词切分让词表可控并处理未登录词;中文常退化为按字切分,分词策略直接影响效果。 +- NSP 信号弱,RoBERTa 去掉它、加大 batch 与训练步数反而更好——说明"任务设计 < 数据规模与训练充分度"。 +- 微调对学习率与 warmup 敏感,小数据上极易过拟合;预训练表示是真正的价值所在,下游头很轻。 + +## Bitter-Lesson 视角 +BERT 的力量不来自任务工程的精巧,而来自一个极通用、能吃下任意规模无标注文本的自监督目标。掩码填空不需要人工标签、不需要领域知识,纯靠数据与算力放大——这正是 bitter lesson 偏爱的形态。后来 RoBERTa 把性能进一步提升靠的不是更聪明的目标,而是更多数据、更长训练、更大 batch,再次印证"可规模化的简单目标"胜过"精巧但难扩展的设计"。 + +## 接下来读什么 +- [Transformer](../paper_vaswani2017.md) — BERT 的编码器骨架 +- [insight: masked prediction yields self-supervised signal](insight_masked_prediction_yields_self_supervised_signal.md) — 它奠基的核心洞察 +- [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) — 它所属的范式 +- [BYOL](paper_byol.md) — 视觉自监督的另一条(非掩码)路线 +- [Bahdanau attention](paper_bahdanau2014_attention.md) — 注意力机制的源头 diff --git a/docs/data/cards/extended/paper_byol.md b/docs/data/cards/extended/paper_byol.md new file mode 100644 index 0000000..883445e --- /dev/null +++ b/docs/data/cards/extended/paper_byol.md @@ -0,0 +1,57 @@ +--- +id: paper:byol +title: "BYOL — Bootstrap Your Own Latent: Self-Supervised Learning without Negatives" +title_zh: "BYOL(无负样本的自监督表示学习)" +kind: paper +tier: A +authors: [Grill, J.-B., et al.] +venue: "NeurIPS 2020" +year: 2020 +topic: ssl_vision +phase: core +deep_links: + - {label: "arXiv 2006.07733", url: "https://arxiv.org/abs/2006.07733"} +--- + +# BYOL(无负样本的自监督表示学习) + +> BYOL 打破了"对比学习必须有负样本才不会坍缩"的共识:它只用同一张图的两个增广视图,让一个 online 网络去预测一个缓慢更新的 target 网络的表示。没有负样本、没有显式的"推开不同样本"项,靠预测头的不对称性 + 动量目标,居然就学不出平凡解。 + +## 一个最小公式 / Math anchor +$$ +\mathcal{L}=\Big\lVert \overline{q_\theta(z_\theta)}-\overline{z'_\xi}\Big\rVert_2^2 +=2-2\cdot\frac{\langle q_\theta(z_\theta),\,z'_\xi\rangle}{\lVert q_\theta(z_\theta)\rVert_2\,\lVert z'_\xi\rVert_2}, +\qquad +\xi \leftarrow \tau\,\xi+(1-\tau)\,\theta +$$ +$z_\theta$ 是 online 网络对视图 1 的投影,$q_\theta$ 是只属于 online 侧的预测头;$z'_\xi$ 是 target 网络对视图 2 的投影。loss 是二者 L2 归一化后的均方误差(等价于负余弦相似度)。target 参数 $\xi$ 不接收梯度,只用动量 $\tau$ 缓慢跟随 online 参数。停梯度 + 预测头 + 动量目标三者共同阻止了"两边都输出常数"的坍缩。 + +## 它在图谱里的位置 +BYOL 是非对比(non-contrastive)自监督的代表,与依赖负样本的对比学习路线形成核心对照;它和 [VICReg](paper_vicreg.md) 同属"如何在无负样本下防坍缩"的不同解答(架构技巧 vs 显式正则)。它正面例证 [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md),思想上与 [DINOv2](../paper_dinov2.md) 的自蒸馏 + 动量教师一脉相承,是 [label efficiency for 3D annotation](problem_label_efficiency_for_3d_annotation.md) 所需"少标签预训练"的视觉范式来源。 + +## 架构 / 方法直觉 +两条孪生分支:online 分支 = encoder → projector → predictor,target 分支 = encoder → projector(无 predictor)。同一图像经两种随机增广分别送入两条分支,online 预测 target 的投影,loss 对称地算两次(交换视图角色)。只有 online 分支反传梯度,target 由 online 的指数滑动平均得到。为什么不坍缩是它最反直觉处:predictor 的存在打破两分支对称,使"恒定输出"不是稳定不动点;动量目标提供一个移动得足够慢、足够稳定的回归靶。后续分析(如 SimSiam)进一步指出停梯度是防坍缩的关键,动量并非绝对必需但能提稳。 + +## 对照:对比 vs 非对比自监督 +| 维度 | 对比(SimCLR/MoCo) | BYOL(非对比) | VICReg(非对比) | +|---|---|---|---| +| 防坍缩机制 | 负样本显式推开 | 停梯度 + predictor + 动量 | 方差/协方差正则 | +| 对 batch 大小 | 敏感(需大负样本池) | 不敏感 | 不敏感 | +| 关键脆弱点 | 负样本采样质量 | BatchNorm/超参微妙 | 三项权重平衡 | +| 是否需动量目标 | MoCo 需,SimCLR 不需 | 需(或停梯度替代) | 不需 | + +## 工程上真正要注意什么 +- 早期复现争议:有工作指出 projector 里的 BatchNorm 隐式引入了"批内对比"信息,移除后性能下降,提示坍缩防护可能比论文叙述更微妙——务必保留并对齐 BN 配置。 +- 对增广策略高度敏感:颜色抖动、随机裁剪的强度决定学到的不变性;增广太弱会学到捷径(如颜色直方图)。 +- 动量系数 $\tau$ 通常取很接近 1(如 0.996 并随训练上调);过小 target 抖动、过大更新太慢。 +- 没有负样本意味着无法直接监控"表示是否在坍缩",需额外跟踪表示的奇异值谱/有效秩作为健康指标。 + +## Bitter-Lesson 视角 +BYOL 表面是个巧妙的架构技巧(predictor + 动量),但它服务的目标极其通用:从无标签图像中学可迁移表示。它移除了对比学习对大负样本池、对 batch 规模的工程负担,让自监督更易随数据扩展。沿 bitter-lesson 视角看,真正持久的不是"用不用负样本"这个具体设计,而是"用一个不需要人工标签、能吃下海量数据的目标来学表示"——BYOL 是这条主线上降低工程门槛的一步。 + +## 接下来读什么 +- [VICReg](paper_vicreg.md) — 用显式正则替代架构技巧的非对比方案 +- [DINOv2](../paper_dinov2.md) — 自蒸馏 + 动量教师的大规模延续 +- [BERT](paper_bert.md) — 语言侧的另一类自监督(掩码预测) +- [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) — 它所属的范式 +- [problem: label efficiency for 3D annotation](problem_label_efficiency_for_3d_annotation.md) — 少标签预训练的下游动机 diff --git a/docs/data/cards/extended/paper_centerpoint.md b/docs/data/cards/extended/paper_centerpoint.md new file mode 100644 index 0000000..a146e92 --- /dev/null +++ b/docs/data/cards/extended/paper_centerpoint.md @@ -0,0 +1,48 @@ +--- +id: paper:centerpoint +title: "CenterPoint — Center-based 3D Object Detection and Tracking" +title_zh: "CenterPoint(以中心点为基元的 3D 检测与跟踪)" +kind: paper +tier: A +authors: [Yin, T., Zhou, X., Krähenbühl, P.] +venue: "CVPR 2021" +year: 2021 +topic: scene_understanding +phase: core +deep_links: + - {label: "arXiv 2006.11275", url: "https://arxiv.org/abs/2006.11275"} + - {label: "tianweiy/CenterPoint", url: "https://github.com/tianweiy/CenterPoint"} +--- + +# CenterPoint(以中心点为基元的 3D 检测与跟踪) + +> CenterPoint 把 2D 的 CenterNet 思路搬到点云 BEV:不再用预设的 anchor 框,而是在 BEV 热力图上预测每个目标的中心点,再从中心回归尺寸、朝向、高度和速度。anchor-free 让它天然处理任意朝向的目标,速度回归又让它顺手做出近乎免费的多目标跟踪。 + +## 一个最小公式 / Math anchor +$$ +\hat{Y}_{xyc}=\exp\!\Big(-\frac{(x-\tilde{x}_c)^2+(y-\tilde{y}_c)^2}{2\sigma_c^2}\Big),\qquad +\mathcal{L}_{hm}=-\frac{1}{N}\sum_{xyc}\!\begin{cases}(1-\hat{Y})^\alpha\log\hat{Y}, & Y{=}1\\[2pt] (1-Y)^\beta\hat{Y}^\alpha\log(1-\hat{Y}), & \text{else}\end{cases} +$$ +对每个类别 $c$ 在 BEV 网格上画一张高斯热力图:目标中心处为峰、向外按 $\sigma_c$ 衰减。训练用 focal-style 的 penalty-reduced loss($Y=1$ 为正样本峰,邻域负样本按 $(1-Y)^\beta$ 降权)。推理时取热力图局部极大即检测中心,再在该位置查回归头拿框参数——彻底绕开 anchor 匹配与 NMS。 + +## 它在图谱里的位置 +CenterPoint 通常以 [VoxelNet](paper_voxelnet.md)(稀疏卷积变体)或 [PointPillars](paper_pointpillars.md) 作为 BEV backbone,只替换检测头,因此是这两者的自然演进。它与 anchor-based 的 [PointPillars](paper_pointpillars.md) 检测头形成 "anchor vs center" 的对照,思想上与图像域的 [DETR3D](paper_detr3d.md) 同属"摆脱手工 anchor/NMS 后处理"的潮流。它处于 [modular perception → planning pipeline](paradigm_modular_perception_to_planning_pipeline.md) 的检测+跟踪环节,其 BEV 输出可被 [BEVFusion](paper_bevfusion.md) 直接复用。 + +## 架构 / 方法直觉 +两阶段:第一阶段在 BEV 特征上预测每类中心热力图,并在每个中心位置回归亚像素偏移、$z$ 高度、$(w,l,h)$ 尺寸、朝向 $(\sin\theta,\cos\theta)$ 以及相邻两帧的速度 $(v_x,v_y)$。第二阶段(可选)从一阶段框的各面中心提取点特征做置信度与框精修。跟踪几乎免费:用预测速度把上一帧中心外推到当前帧,再做最近邻贪婪匹配即可——不需要单独的运动模型或学习式关联。anchor-free 的核心好处是朝向不受 anchor 角度档位约束,对行人、骑车人这类小而旋转多变的目标更友好。 + +## 工程上真正要注意什么 +- 高斯半径 $\sigma_c$ 要按目标尺寸自适应;半径过大相邻目标的峰会粘连,过小正样本太稀导致召回低。 +- 速度回归依赖相邻帧标注一致性;时间戳/自车运动补偿没对齐会让速度系统性偏移,进而拖垮基于速度外推的跟踪。 +- 中心可能落在没有任何点的空体素(目标被遮挡时),热力图回归对此比 anchor 更鲁棒,但回归头仍需足够感受野。 +- 多类共享 backbone、分头预测时,类间样本不均衡(车 vs 骑车人)需调每类 loss 权重,否则稀有类被淹没。 + +## Bitter-Lesson 视角 +CenterPoint 删掉了 3D 检测里两块最重的手工组件:anchor 先验(尺寸/角度的离散枚举)与 NMS 后处理。它把检测重述成"在连续 BEV 场上找峰值并回归属性",让网络直接学习目标的存在与几何,而不是去匹配人类预设的框模板。这与去 anchor、去 NMS 在 2D 视觉中的整体趋势同源——减少手工归纳偏置、让可学习的稠密预测接管,正是 bitter lesson 的体现。 + +## 接下来读什么 +- [VoxelNet](paper_voxelnet.md) — 它常用的体素 backbone 源头 +- [PointPillars](paper_pointpillars.md) — anchor-based 检测头的对照 +- [DETR3D](paper_detr3d.md) — 图像域的去 anchor/去 NMS 同潮流 +- [BEVFusion](paper_bevfusion.md) — 复用其 BEV 输出做多模态融合 +- [modular perception → planning pipeline](paradigm_modular_perception_to_planning_pipeline.md) — 检测+跟踪所在环节 diff --git a/docs/data/cards/extended/paper_codex.md b/docs/data/cards/extended/paper_codex.md new file mode 100644 index 0000000..297cb7a --- /dev/null +++ b/docs/data/cards/extended/paper_codex.md @@ -0,0 +1,48 @@ +--- +id: paper:codex +title: "Codex — Evaluating Large Language Models Trained on Code" +title_zh: "Codex(代码大模型:code-as-action 驾驶 agent 的底座)" +kind: paper +tier: B +authors: [Chen, M., Tworek, J., Jun, H., Yuan, Q., et al.] +venue: "arXiv 2021" +year: 2021 +topic: llm_agent +phase: core +deep_links: + - {label: "arXiv 2107.03374", url: "https://arxiv.org/abs/2107.03374"} + - {label: "HumanEval 数据集", url: "https://github.com/openai/human-eval"} +--- + +# Codex(代码大模型) + +> 在 GPT 的基础上用海量公开代码继续训练,得到一个能从自然语言描述生成可执行程序的模型,并配一个可验证的评测协议 HumanEval——直接拿单元测试判定"对不对"。它的真正意义不止于写代码:它把"程序"确立为一种**可被 LLM 流畅生成、又能被执行器精确求值的中间动作表征**,为 code-as-action 智能体奠基。 + +## 一个最小公式 / Math anchor +$$ +\text{pass@}k \;=\; \mathbb{E}_{\text{problems}}\left[\,1-\frac{\binom{n-c}{k}}{\binom{n}{k}}\,\right] +$$ +对每题采样 $n$ 个程序、其中 $c$ 个通过全部单元测试,则任取 $k$ 个里至少有一个正确的概率即 pass@$k$。这个指标的精髓在于**用执行结果而非文本相似度判分**——生成是概率的、模糊的,验证是离散的、确定的。这条"生成-执行-验证"回路正是 code-as-action agent 的内核:LLM 提出动作(代码),环境(解释器/工具)给出可信反馈。pass@$k$ 随 $k$ 上升说明"多采样 + 可验证选择"能把弱生成器抬成强求解器,是 [`move:add_entropy_bonus_to_encourage_exploration`](move_add_entropy_bonus_to_encourage_exploration.md) 在生成空间的对应直觉。 + +## 它在图谱里的位置 +Codex extends [GPT-3](../paper_gpt3.md) 到代码域,是 [`paradigm:llm_agent_paradigm`](paradigm_llm_agent_paradigm.md) 的关键基石。它使能 [`insight:tool_use_extends_language_model_into_environment_grounded_actor`](insight_tool_use_extends_language_model_into_environment_grounded_actor.md)——程序是最通用的工具调用形式。在驾驶域,它是 [`paradigm:knowledge_driven_reflective_agent`](paradigm_knowledge_driven_reflective_agent.md) 把 LLM 决策落成可执行代码/API 调用的语言能力来源,下游连到 [DiLu](paper_2309.16292_dilu.md)、[Agent-Driver](paper_2311.10813_agent_driver.md) 这类 LLM 驾驶认知 agent,并与 [SayCan](paper_saycan.md) 形成对照(一个用代码、一个用价值函数把语言落到动作)。 + +## 架构 / 方法直觉 +架构上 Codex 不新:仍是 decoder-only GPT,只是在代码语料上继续预训练(外加面向 docstring→function 的有监督微调变体)。真正有方法论价值的是**评测范式的转变**:从困惑度/BLEU 转向"能不能跑通"。这把 LLM 从"看起来对"推向"做得对",并揭示一个反复出现的工程模式——让模型生成结构化、可执行、可验证的中间表征(程序、API 序列、约束),再交给确定性引擎执行与校验,从而把神经网络的灵活性与符号系统的精确性焊接起来。code-as-action 由此成为 grounding 的一条主路:动作不再是不可解释的连续向量,而是可读、可测、可回滚的代码。 + +## 工程上真正要注意什么 +- **执行沙箱是必需品**:让 LLM 生成并运行代码必须隔离,安全攸关系统里更要约束可调用的 API 白名单。 +- **pass@$k$ 靠多采样**:单次生成可靠性有限,工程上常配多采样 + 自验证/自一致投票挑选。 +- **幻觉 API**:模型会调用不存在的函数;在驾驶栈里要把可用工具(地图查询、轨迹优化器、约束检查器)显式约束在 prompt/语法里。 +- **延迟与实时性**:代码生成 + 执行不在 30Hz 控制环里,它属于慢系统/离线策略合成,需快慢分层。 +- **训练数据偏置**:代码语料的分布决定能力边界,冷门库/领域 DSL 表现差,驾驶 DSL 需专门适配。 + +## Bitter-Lesson 视角 +Codex 表面是"让模型用人类发明的符号系统(编程语言)",似乎在加结构。但它其实顺应 Bitter Lesson 的更深一层:**与其手写求解器,不如学一个能生成求解器的通用模型**。代码作为动作的好处不是引入领域规则,而是提供一个**可执行、可验证、可组合**的通用动作空间——验证由廉价的解释器免费提供,正契合"用搜索/计算换性能"。对驾驶而言,它提示一条路:LLM 不必内化所有数值与几何能力,可以通过工具调用 / function calling 调用专用组件,把精确性外包给确定性引擎,把灵活性留给学习。 + +## 接下来读什么 +- [GPT-3](../paper_gpt3.md) — Codex 的母模型与自回归底座 +- [SayCan](paper_saycan.md) — 另一条把语言落到动作的路(价值函数 grounding) +- [DiLu](paper_2309.16292_dilu.md) — 知识驱动的 LLM 驾驶决策 agent +- [Agent-Driver](paper_2311.10813_agent_driver.md) — LLM + 工具的驾驶认知栈 +- [insight: 工具调用把语言模型延展为环境接地的行动者](insight_tool_use_extends_language_model_into_environment_grounded_actor.md) diff --git a/docs/data/cards/extended/paper_d4rl.md b/docs/data/cards/extended/paper_d4rl.md new file mode 100644 index 0000000..b7549bf --- /dev/null +++ b/docs/data/cards/extended/paper_d4rl.md @@ -0,0 +1,51 @@ +--- +id: paper:d4rl +title: "D4RL — Datasets for Deep Data-Driven Reinforcement Learning" +title_zh: "D4RL(离线强化学习基准数据集套件)" +kind: paper +tier: A +authors: [Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.] +venue: "arXiv 2020" +year: 2020 +topic: evaluation_benchmark +phase: core +deep_links: + - {label: "arXiv 2004.07219", url: "https://arxiv.org/abs/2004.07219"} + - {label: "rail-berkeley/d4rl", url: "https://github.com/rail-berkeley/d4rl"} +--- + +# D4RL(离线强化学习基准数据集套件) + +> 离线 RL 之所以一度难以横向比较,是因为每篇论文都自造数据集。D4RL 把"数据集 + 环境 + 归一化打分"标准化成一套公共基准,并刻意构造了一批专门暴露离线 RL 弱点的数据分布(次优、窄覆盖、多策略混合、人为制造的 stitching 需求),让"算法是否真的能从次优数据里缝出好策略"变成可度量的事。 + +## 一个最小公式 / Math anchor +$$ +\text{score}_{\text{norm}} \;=\; 100 \times \frac{J(\pi) - J_{\text{random}}}{J_{\text{expert}} - J_{\text{random}}} +$$ +D4RL 的归一化分数把每个任务的策略回报 $J(\pi)$ 线性映射到 $[0,100]$:随机策略约 0 分,专家策略约 100 分。这一步看似平凡却是关键——不同任务回报量级相差几个数量级,不归一化就无法在一张表里聚合比较。它把"绝对回报"这个不可比的量,转成"你离专家还有多远"这个可跨任务平均的量。 + +## 它在图谱里的位置 +D4RL 是 [离线 RL 范式](paradigm_offline_rl.md) 的实验地基:[CQL](paper_cql.md)、[IQL](paper_iql.md)、[Decision Transformer](paper_decision_transformer.md) 等几乎所有离线 RL 方法都在它上面报数。它把 [洞察:离线 RL 本质是带约束的动态规划](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 变成可证伪的——通过 medium-replay、medium-expert 这类混合数据集,直接考验算法能否做轨迹缝合(stitching)。它与 [Levine 离线 RL 综述](paper_levine_offline_rl_tutorial.md) 同期、同作者团队,一个给理论框架,一个给度量标准。 + +## 架构 / 方法直觉 +D4RL 不是模型而是数据集集合。核心设计是用"数据生成策略的质量谱"来制造难度梯度: +- **random / medium / expert**:分别由随机策略、半训练策略、收敛策略采集,考验算法在不同数据质量下的表现。 +- **medium-replay**:训练到中等水平时整个 replay buffer,覆盖广但充满次优轨迹——这是检验 stitching 能力的关键集。 +- **medium-expert**:把次优与专家数据混合,逼算法分辨"该模仿谁"。 +- 任务覆盖 MuJoCo 运动控制、AntMaze 长程导航(专门考验稀疏奖励下的轨迹缝合)、Adroit 灵巧手、Kitchen 多阶段操作、以及含真实人类示教与窄分布的 CARLA/Flow 等。AntMaze 因奖励极稀疏 + 强依赖缝合,成为区分"真离线 RL"与"伪装的 BC"的试金石。 + +## 工程上真正要注意什么 +- 归一化分常被误读:100 不是上限,缝合得当的算法在 medium-replay 上可超过采集数据的专家水平。 +- D4RL v0 与后续版本的数据集存在差异,跨论文比较务必核对版本与 commit,否则数字不可比。 +- AntMaze 的奖励与 done 信号处理(稀疏 0/1、是否减 1 偏移)对结果影响极大,是复现踩坑高发区。 +- 原始仓库依赖较老的 MuJoCo / mujoco-py,现代复现多迁移到 Minari / d4rl 重打包版本;启动前先确认数据加载链路。 + +## Bitter-Lesson 视角 +D4RL 的长期价值不在某个具体数据集,而在它把"离线 RL 进展"钉成一个可累积、可对比的标量。基准的存在让社区能用统一尺子衡量谁真的有进步——这本身就是 bitter lesson 的前提:你得先能可靠测量,才能让"更多数据 + 更多算力"这条主线压过精巧的手工技巧。但它也有反噬:当一个固定基准被刷到接近饱和,算法会过拟合到基准的特定分布(尤其 stitching 任务的人为结构),与真实世界长尾分布脱节——这正是驾驶领域转向闭环、数据引擎式评测的动因。 + +## 接下来读什么 +- [离线 RL 范式](paradigm_offline_rl.md) — D4RL 服务的问题域 +- [CQL](paper_cql.md) / [IQL](paper_iql.md) — 在 D4RL 上确立 SOTA 的两条主线 +- [Decision Transformer](paper_decision_transformer.md) — 序列建模派在 D4RL 上的对照 +- [Levine 离线 RL 综述](paper_levine_offline_rl_tutorial.md) — 配套的理论视角 +- [洞察:离线 RL 本质是带约束的动态规划](insight_offline_rl_is_actually_constrained_dynamic_programming.md) diff --git a/docs/data/cards/extended/paper_drivegpt.md b/docs/data/cards/extended/paper_drivegpt.md new file mode 100644 index 0000000..f87558a --- /dev/null +++ b/docs/data/cards/extended/paper_drivegpt.md @@ -0,0 +1,49 @@ +--- +id: paper:drivegpt +title: "DriveGPT — Autoregressive Driving Policy as a Foundation Model" +title_zh: "DriveGPT(GPT 式自回归驾驶策略 / 自回归驾驶基座范式)" +kind: paper +tier: B +authors: [多家工作(范式综合)] +venue: "2023– 自回归驾驶基座线" +year: 2023 +topic: e2e_ad +phase: frontier +deep_links: + - {label: "GPT-3 — 自回归生成母版", url: "https://arxiv.org/abs/2005.14165"} + - {label: "Decision Transformer — RL 即序列建模", url: "https://arxiv.org/abs/2106.01345"} +--- + +# DriveGPT(自回归驾驶基座范式) + +> 把驾驶行为当语言来生成:将自车与他车的未来运动**离散化成 token**,用一个 GPT 式因果 transformer 在海量驾驶日志上做下一 token 预测,于是规划变成"续写"。这条线没有单一规范论文,它是一个范式——把 LLM 的训练配方(大规模自监督、自回归生成、采样得到多样性)整体搬到驾驶轨迹/行为生成上,以期复现 scaling 带来的涌现与零样本泛化。 + +## 一个最小公式 / Math anchor +$$ +p_\theta\big(\tau\big)=\prod_{t=1}^{T}p_\theta\big(z_t\mid z_{ 传统模仿路线要么 BC(在动作上做监督,复合误差),要么 IRL(先恢复 reward 再 RL,外层套内层极其昂贵)。GAIL 的转折是:用占用度量(occupancy measure)把"匹配专家的状态-动作分布"写成一个 GAN 目标,绕开显式 reward,直接训出一个会闭环行驶的策略。 + +## 一个最小公式 / Math anchor +$$ +\min_{\pi}\ \max_{D\in(0,1)^{S\times A}}\ \mathbb{E}_{\pi}\big[\log D(s,a)\big]+\mathbb{E}_{\pi_E}\big[\log(1-D(s,a))\big]-\lambda H(\pi) +$$ +判别器 $D$ 学着把策略 $\pi$ 产生的 $(s,a)$ 判成"假"、专家 $\pi_E$ 的判成"真";策略 $\pi$ 则用 $-\log D(s,a)$ 当 reward 去骗过 $D$。当二者达到纳什均衡时,$\pi$ 的占用度量 $\rho_\pi$ 与专家 $\rho_{\pi_E}$ 在 Jensen-Shannon 散度意义下重合——即分布匹配,而不是逐动作匹配。$-\lambda H(\pi)$ 是最大熵正则,保证策略不过度确定化。 + +## 它在图谱里的位置 +GAIL 是 [Inverse RL](paper_irl.md) 路线的"跳过 reward"分支:Ho & Ermon 在论文里证明 IRL 是占用匹配的对偶问题,而 GAIL 把内层 RL 与外层 IRL 折叠成一个对抗博弈。它直接缓解 [BC 复合误差问题](problem_behavior_cloning_compounds_errors_over_time.md),因为策略是在自己访问的状态分布上被打分的(on-policy),而非只见专家状态。它体现 [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md),并以 [最大熵把策略与价值缝成对偶](insight_max_entropy_closes_policy_value_duality.md) 为理论底座。下游 [AIRL](paper_airl.md) 进一步把判别器结构化以恢复可迁移 reward。 + +## 架构 / 方法直觉 +两个网络交替更新:判别器 $D$ 是普通二分类器(交叉熵损失),策略 $\pi$ 用 [TRPO](paper_schulman2015_trpo.md) 或 [PPO](../paper_schulman2017_ppo.md) 这类 on-policy 策略梯度更新,reward 信号取 $-\log D(s,a)$。关键设计是把模仿目标转写成"分布匹配":BC 在 $d_{\pi_E}$ 上做监督,GAIL 在 $d_\pi$ 上采样并要求其分布逼近 $d_{\pi_E}$,所以策略被迫在自己会犯错的状态里也学会回到专家分布——这正是 BC 缺失的"纠偏"信号。熵正则来自 [给策略加熵奖励鼓励探索](move_add_entropy_bonus_to_encourage_exploration.md)。 + +## 工程上真正要注意什么 +- 对抗训练不稳定:$D$ 太强会让 reward 信号饱和($\log D \to -\infty$),策略梯度方差爆炸;通常要给 $D$ 限速(更小学习率、梯度惩罚或 spectral norm)。 +- reward 非平稳:$-\log D$ 随 $D$ 更新而漂移,给 critic 的价值估计带来移动靶问题,GAE 的 $\lambda$ 要调得更保守。 +- 样本效率低:每轮策略更新都要新的 on-policy rollout,环境交互成本高;这在真车上几乎不可行,只能在仿真器里跑。 +- 驾驶场景下,$(s,a)$ 的状态表示极敏感——若 $s$ 含有可被 $D$ 用作"作弊特征"的量(如时间戳、车辆 id),$D$ 会学到捷径而非真正的行为差异。 + +## Bitter-Lesson 视角 +GAIL 站在"少人工先验"一侧:它不要求工程师手写 reward 特征(这是经典 IRL 的核心负担),把先验交给一个可学习的判别器。但它仍依赖 on-policy RL 的算力换样本,且对抗优化本身是一种结构性脆弱。比起后来纯靠规模的 BC(大数据 + 多模态损失),GAIL 更像"用算力把分布匹配做对"的中间形态——当仿真交互便宜时它划算,当只有离线日志时它让位给离线 RL。 + +## 接下来读什么 +- [AIRL](paper_airl.md) — 把判别器结构化以恢复可迁移 reward +- [Inverse RL 总览](paper_irl.md) — GAIL 跳过的那个 reward 恢复问题 +- [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) — 熵正则的理论源头 +- [Ng & Russell 经典 IRL](paper_ng_russell_2000_irl.md) — reward 不可辨识性的原始论证 +- [DAgger](../paper_ross2011_dagger.md) — 用数据聚合而非对抗来治复合误差的对照 +- [TRPO](paper_schulman2015_trpo.md) — GAIL 默认的内层策略优化器 +- [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) +- [最大熵把策略与价值缝成对偶](insight_max_entropy_closes_policy_value_duality.md) diff --git a/docs/data/cards/extended/paper_gameformer.md b/docs/data/cards/extended/paper_gameformer.md new file mode 100644 index 0000000..2545843 --- /dev/null +++ b/docs/data/cards/extended/paper_gameformer.md @@ -0,0 +1,48 @@ +--- +id: paper:gameformer +title: "GameFormer — Game-theoretic Modeling and Learning of Transformer-based Interactive Prediction and Planning" +title_zh: "GameFormer(用层级 transformer 做博弈式交互预测与规划)" +kind: paper +tier: A +authors: [Huang, Z., Liu, H., Lv, C.] +venue: "ICCV 2023" +year: 2023 +topic: planning +phase: core +deep_links: + - {label: "arXiv 2303.05760", url: "https://arxiv.org/abs/2303.05760"} + - {label: "MCZhi/GameFormer 代码", url: "https://github.com/MCZhi/GameFormer"} +--- + +# GameFormer(用层级 transformer 做博弈式交互) + +> 密集交通里他车的未来不是独立给定的背景,而是**对自车决策做出反应的对手**:你变道,旁车可能让也可能逼。GameFormer 把这件事形式化为一个**层级博弈**——用一个 transformer 解码器迭代地让每个 agent 在"上一层所有其他 agent 的预测"条件下更新自己的预测,逼近博弈的层级理性(level-$k$)均衡,从而把交互预测与自车规划统一在同一可微网络里。 + +## 一个最小公式 / Math anchor +$$ +\hat Y_i^{(k)}=\mathcal{D}\Big(q_i,\ \mathcal{E}(\text{scene}),\ \big\{\hat Y_j^{(k-1)}\big\}_{j\neq i}\Big),\qquad k=1,\dots,K +$$ +第 $i$ 个 agent 在第 $k$ 层的预测 $\hat Y_i^{(k)}$,是在**所有其他 agent 上一层预测** $\{\hat Y_j^{(k-1)}\}_{j\neq i}$ 的条件下生成的——这正是博弈论里 level-$k$ 推理的可微化:level-0 是无交互的边际预测,每加一层就让每个 agent 多想一步"别人会怎么回应我"。训练损失叠加每一层的多模态轨迹回归(高斯混合)与分类,自车规划是把 ego 当作其中一个被约束 agent 从最终层读出。$K$ 控制推理深度,$K{\to}\infty$ 在理想下收敛到不动点(近似纳什/层级均衡)。 + +## 它在图谱里的位置 +GameFormer 直面 [`problem:multi_agent_interaction_modeling_in_dense_traffic`](problem_multi_agent_interaction_modeling_in_dense_traffic.md):在密集交互里"预测他车"和"规划自车"不可分离。它属于 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md) 的交互增强版,与 [TCP](paper_tcp_carla.md)、[Learning by Cheating](paper_lbc.md) 这类把他车当背景的端到端方法形成对照。其层级迭代解码 manifest 了 [`insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes`](insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md)——每个 agent 每层都输出多模态高斯混合,拒绝把两种合理反应平均掉。它在 nuPlan/WOMD 这类交互密集 benchmark 上展示优势。 + +## 架构 / 方法直觉 +编码器把地图 polyline、各 agent 历史用 transformer 编成场景上下文。解码器是核心:维护一组 agent query,在 $K$ 层里反复做"自更新 + 跨 agent 注意"——第 $k$ 层每个 query 既看场景,又 cross-attend 到第 $k{-}1$ 层其他所有 agent 的预测轨迹。这把"我预测你、你预测我"的循环依赖展开成有限层迭代,避免了显式求解博弈均衡的不可微与组合爆炸。自车(ego)作为一个特殊 agent,其最终层输出即规划;可再叠加一个细化模块把可行性/代价约束注入。多模态用 GMM 表达,每个 agent 同时保留若干意图分支。 + +## 工程上真正要注意什么 +- **层数 $K$ 是计算-理性权衡**:$K$ 太小退化成无交互边际预测,太大显存与延迟陡增、且收益递减;典型只取个位数层。 +- **多模态退化(mode collapse)**:GMM 分支容易塌到单一主模态,需要 winner-take-all/最近 mode 匹配的训练技巧维持多样性,否则博弈层级也只在一个模态上推理。 +- **ego 约束注入**:把规划当成"被约束的 agent"读出,需要额外保证可行性(动力学、碰撞),纯预测损失不保证可执行。 +- **闭环 vs 开环 gap 仍在**:交互建模改善的是预测/开环交互指标,闭环里他车策略与训练分布不一致时(reactive sim)优势会缩水。 +- **数据需要真交互**:在以巡航为主、交互稀疏的日志上训练,层级博弈学不到东西,得专门挑路口/汇流/博弈场景。 + +## Bitter-Lesson 视角 +GameFormer 引入了显式的博弈结构(level-$k$ 迭代),看似与"少加先验"相悖。但它加的是**计算结构而非领域规则**——它没有手写"路口该让谁",只是给网络一个"反复让 agent 互相条件化"的可微算子,让交互理性从数据里学出来。这是 Bitter Lesson 容许的那类结构:把博弈这一普适计算模式编码成架构归纳偏置,而把具体均衡留给学习。相比之下,把每条交规写成硬约束才是会被苦涩教训冲走的做法。长期看,随着大规模轨迹基座模型出现,这种层级交互或许会作为涌现行为被吸收进统一序列模型。 + +## 接下来读什么 +- [TCP](paper_tcp_carla.md) — 把他车当背景的端到端规划对照 +- [Learning by Cheating](paper_lbc.md) — 特权信息下的交互(教师可见他车真值) +- [problem: 密集交通中的多智能体交互建模](problem_multi_agent_interaction_modeling_in_dense_traffic.md) +- [insight: 交通场景的多模态性是内在的](insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md) +- [insight: 把轨迹 token 化让规划借用语言模型工具](insight_tokenized_trajectories_let_planning_borrow_from_language_modeling.md) diff --git a/docs/data/cards/extended/paper_irl.md b/docs/data/cards/extended/paper_irl.md new file mode 100644 index 0000000..d82b92f --- /dev/null +++ b/docs/data/cards/extended/paper_irl.md @@ -0,0 +1,50 @@ +--- +id: paper:irl +title: "Inverse Reinforcement Learning — The Problem" +title_zh: "逆强化学习问题(总览)" +kind: paper +tier: B +authors: [—] +venue: "—" +year: 2000 +topic: rl_foundations +phase: core +deep_links: + - {label: "Ng & Russell 2000", url: "https://ai.stanford.edu/~ang/papers/icml00-irl.pdf"} +--- + +# 逆强化学习问题(总览) + +> 强化学习是"给定 reward,求最优行为";逆强化学习把箭头反过来——"给定专家行为,反推它在优化什么 reward"。这看似只是换个方向,实则触及一个更深的诉求:当 reward 难以手写(什么叫"开得好"?)时,从示教里把意图本身学出来。 + +## 一个最小公式 / Math anchor +$$ +\text{RL:}\quad \pi^*=\arg\max_\pi\ \mathbb{E}_\pi\Big[\textstyle\sum_t \gamma^t R(s_t,a_t)\Big] +\qquad\Longleftrightarrow\qquad +\text{IRL:}\quad \text{find } R \ \text{s.t. } \pi_E\in\arg\max_\pi\ \mathbb{E}_\pi\Big[\textstyle\sum_t \gamma^t R(s_t,a_t)\Big] +$$ +左边是 forward RL:$R$ 已知,求 $\pi^*$。右边是 IRL:观测到专家 $\pi_E$(或其轨迹),反求一个让 $\pi_E$ 成为最优的 $R$。这个反问题天生不适定——$R\equiv c$ 这种平凡解、以及无穷多 reward 都能解释同一行为,所以全部 IRL 算法的核心都在于"用什么额外原则从这族解里挑一个"。 + +## 它在图谱里的位置 +这是一张把整条线串起来的总览卡。问题由 [Ng & Russell 2000](paper_ng_russell_2000_irl.md) 形式化并暴露不可辨识;[Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) 用最大熵原则消解歧义;[GAIL](paper_gail.md) 把"恢复 reward"绕成"匹配占用度量"的对抗博弈;[AIRL](paper_airl.md) 又把 reward 从判别器里结构化地拿回来。它与纯 [模仿学习](paradigm_imitation_learning.md) 形成对照:模仿复制行为,IRL 恢复意图,因而原则上能跨动力学迁移。它正是 [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) 这条洞见想做的事。 + +## 架构 / 方法直觉 +所有 IRL 方法共享一个双层结构:外层在 reward 空间搜索,内层(部分或近似地)解一个 forward RL/规划来评估候选 reward 下专家是否最优。它们的分野只在两点——(1) 用什么准则在不可辨识的解族里挑 reward(最大间隔、最大熵、对抗匹配);(2) reward 用什么表示(手写线性特征 vs 神经网络)。相对于直接 BC,IRL 多绕一圈但买到一个可解释、可重用的 reward:换了车辆动力学或地图,只要 reward 仍成立,重新跑一遍 forward RL 即可,而 BC 学到的策略则要从头再学。 + +## 工程上真正要注意什么 +- 双层优化昂贵:每评估一个候选 reward 都要(近似)解一次 RL,这是 IRL 长期难以规模化的根因。 +- 专家次优性:真实示教带噪、非严格最优,硬性"专家最优"假设会被违背,故现代方法几乎都走最大熵这类软化路线。 +- reward 不可辨识意味着评估困难——学到的 reward "对不对"没有 ground truth,通常只能间接用"重训出的策略好不好"来衡量。 +- 驾驶里 IRL 的真正用武之地往往不是控车,而是行为预测:把他车/行人的目的地当 reward,反推其可能轨迹分布。 + +## Bitter-Lesson 视角 +IRL 的整段历史就是 bitter lesson 的微缩剧:早期靠手写特征 + 强假设,逐步被概率原则与可学习判别器取代。但 IRL 也守住了一块规模化难以直接吃下的高地——当目标不是"模仿好"而是"理解为什么"、需要可迁移的意图表示时,恢复 reward 仍有不可替代的价值。算力增长改变的是怎么解,而非这个反问题本身值不值得问。 + +## 接下来读什么 +- [Ng & Russell 经典 IRL](paper_ng_russell_2000_irl.md) — 问题的形式化与不可辨识性 +- [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) — 用最大熵挑唯一解 +- [GAIL](paper_gail.md) — 跳过显式 reward 的对抗模仿 +- [AIRL](paper_airl.md) — 恢复可迁移 reward +- [模仿学习范式](paradigm_imitation_learning.md) — 复制行为 vs 恢复意图的对照 +- [偏好学习](paper_preference_learning.md) — 用比较信号恢复 reward +- [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) diff --git a/docs/data/cards/extended/paper_krizhevsky2012.md b/docs/data/cards/extended/paper_krizhevsky2012.md new file mode 100644 index 0000000..8458cc4 --- /dev/null +++ b/docs/data/cards/extended/paper_krizhevsky2012.md @@ -0,0 +1,48 @@ +--- +id: paper:krizhevsky2012 +title: "AlexNet — ImageNet Classification with Deep Convolutional Neural Networks" +title_zh: "AlexNet(ImageNet 时刻:深度卷积网络的起点)" +kind: paper +tier: S +authors: [Krizhevsky, A., Sutskever, I., Hinton, G. E.] +venue: "NeurIPS 2012" +year: 2012 +topic: ssl_vision +phase: prereq +deep_links: + - {label: "NeurIPS 2012 paper", url: "https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html"} +--- + +# AlexNet(ImageNet 时刻:深度卷积网络的起点) + +> AlexNet 在 2012 年的 ImageNet 大赛上以远超第二名的优势夺冠,第一次让"大数据 + GPU + 深度卷积网络"这条组合在公开基准上压倒性击败手工特征流水线。它本身的技术细节会很快过时,但它点燃的范式转移——表示学习取代特征工程——是后续整个深度学习浪潮的起点。 + +## 一个最小公式 / Math anchor +$$ +\mathrm{ReLU}(x)=\max(0,x),\qquad +b^i_{x,y}=a^i_{x,y}\Big/\Big(k+\alpha\!\!\sum_{j=\max(0,i-n/2)}^{\min(N-1,i+n/2)}\!\!(a^j_{x,y})^2\Big)^{\beta} +$$ +左边是 ReLU——用非饱和激活替代 sigmoid/tanh,让梯度在深层不再指数衰减,是当年能训起 8 层网络的关键之一。右边是局部响应归一化(LRN),在相邻通道间做侧向抑制。今天 ReLU 仍是主力,LRN 已被 BatchNorm 取代——这恰好说明:哪些设计是本质的、哪些是时代权宜,要靠后续历史来分辨。 + +## 它在图谱里的位置 +AlexNet 是整条视觉表示学习脉络的源头:[ResNet](paper_he2015_resnet.md) 用残差连接把"更深更好"推到极致,[ViT](../paper_vit.md) 则换掉卷积归纳偏置改用注意力,[DINOv2](../paper_dinov2.md)/[DINOv3](../paper_2508.10104_dinov3.md) 把它的"监督预训练 + 迁移"升级为大规模自监督。它正面例证了 [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) 的前半句(数据+算力 scaling),是 [foundation pretraining decouples data from task](insight_foundation_pretraining_decouples_data_from_task.md) 这条洞察的历史奠基。 + +## 架构 / 方法直觉 +五个卷积层加三个全连接层,因单卡显存不足而把网络拆到两块 GTX 580 GPU 上并行(早期的模型并行)。三个真正可复用的工程要素:(1) ReLU——加速收敛、缓解梯度消失;(2) Dropout——在全连接层随机置零,抑制过拟合;(3) 数据增广——随机裁剪、水平翻转、PCA 颜色抖动,廉价地扩充等效样本量。它的胜利不在于某个巧妙模块,而在于把"足够大的网络 + 足够多的数据 + 足够强的算力 + 防过拟合手段"凑齐,让端到端学到的层级特征全面碾压 SIFT/HOG + SVM 这类手工管线。 + +## 工程上真正要注意什么 +- 历史教训而非现役实践:LRN 已废弃,Dropout 在卷积层也基本被 BatchNorm/数据增广取代;不要照搬其具体超参。 +- 它揭示的真实瓶颈是数据与算力,不是架构巧思——这一判断在十年后被 scaling laws 反复印证。 +- 双 GPU 拆分是当年显存所迫的工程妥协,今天已无必要,但"模型放不下单卡就切分"的思路在大模型时代以张量/流水线并行的形式重生。 +- 监督预训练 + 下游微调的迁移范式由它带火,但其对大量人工标签的依赖正是后来自监督([BYOL](paper_byol.md)、[VICReg](paper_vicreg.md))要摆脱的东西。 + +## Bitter-Lesson 视角 +AlexNet 是 bitter lesson 的标志性证据:在它之前,计算机视觉的主流是设计越来越精巧的手工特征;它用一个让网络自己从原始像素学特征的通用方法,加上当时刚够用的 GPU 算力,一举终结了那条路线。Sutton 后来总结的"通用的、能随算力扩展的方法终将胜出",在这里得到第一个大规模、公开、不容辩驳的实证。它不是终点而是开关——打开后,整个领域转向"如何让方法更通用、更可扩展"。 + +## 接下来读什么 +- [ResNet](paper_he2015_resnet.md) — 把"更深"做到可训练的残差革命 +- [ViT](../paper_vit.md) — 用注意力替代卷积归纳偏置 +- [BYOL](paper_byol.md) — 摆脱标签的自监督表示学习 +- [DINOv2](../paper_dinov2.md) — 大规模自监督视觉基座 +- [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) — 它开启的 scaling 范式 +- [insight: foundation pretraining decouples data from task](insight_foundation_pretraining_decouples_data_from_task.md) diff --git a/docs/data/cards/extended/paper_lbc.md b/docs/data/cards/extended/paper_lbc.md new file mode 100644 index 0000000..be323b5 --- /dev/null +++ b/docs/data/cards/extended/paper_lbc.md @@ -0,0 +1,51 @@ +--- +id: paper:lbc +title: "Learning by Cheating" +title_zh: "Learning by Cheating(先作弊再蒸馏的端到端驾驶)" +kind: paper +tier: A +authors: [Chen, D., Zhou, B., Koltun, V., Krähenbühl, P.] +venue: "CoRL 2019" +year: 2019 +topic: e2e_ad +phase: core +deep_links: + - {label: "arXiv 1912.12294", url: "https://arxiv.org/abs/1912.12294"} + - {label: "dotchen/LearningByCheating 代码", url: "https://github.com/dotchen/LearningByCheating"} +--- + +# Learning by Cheating(先作弊再蒸馏) + +> 把"从像素学开车"这一难问题拆成两步:先训练一个能直接看到仿真器真值地图与他车状态的**特权教师**(privileged agent),它不必解决感知,因此能学到近乎完美的驾驶策略;再让一个只吃相机像素的**学生**(sensorimotor agent)去蒸馏教师的逐状态动作。在 CARLA 上,这种"先作弊、再蒸馏"显著优于直接端到端 BC。 + +## 一个最小公式 / Math anchor +$$ +\underbrace{\pi^{\text{priv}}=\arg\min_{\pi}\ \mathbb{E}_{s\sim\mathcal{D}}\big[\ell(\pi(\phi(s)),a^\star(s))\big]}_{\text{阶段一:特权教师}} +\quad\Longrightarrow\quad +\underbrace{\pi^{\text{sens}}=\arg\min_{\pi}\ \mathbb{E}_{s\sim d_{\pi^{\text{sens}}}}\big[\ \mathrm{KL}\big(\pi^{\text{priv}}(\phi(s))\,\|\,\pi(I(s))\big)\big]}_{\text{阶段二:感知学生}} +$$ +$\phi(s)$ 是仿真器内的特权状态(鸟瞰真值布局、他车位姿),$I(s)$ 是相机像素。关键在阶段二的期望取在**学生自己访问的状态分布** $d_{\pi^{\text{sens}}}$ 上:学生在任意状态都能向"在场"的教师索取监督(教师可在 off-policy 状态上即时给出动作),这等价于一次取之不尽的在线 [DAgger](paper_ross2011_dagger.md),从而把协变量偏移压住。教师还输出**所有高层指令(直行/左转/右转/跟随)下的动作**,学生因此在一帧里就拿到多分支监督。 + +## 它在图谱里的位置 +LBC 是 [`paradigm:imitation_learning`](paradigm_imitation_learning.md) 在仿真闭环里的现代复兴,也是 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md) 的一个里程碑实现。它把 [ALVINN](paper_alvinn.md) 三十年前手工合成偏移样本要解决的"覆盖与纠偏"问题,换成"特权教师任意状态可标注"的优雅工程解。它直接 validate 了 [`paper:carla_leaderboard`](paper_carla_leaderboard.md) 作为闭环评测台,并与 [TransFuser](../paper_transfuser.md)、[TCP](paper_tcp_carla.md) 同属 CARLA 强 baseline 谱系。蒸馏思想上它与 [GameFormer](paper_gameformer.md) 的层级师生回路遥相呼应。 + +## 架构 / 方法直觉 +两个 agent 共享一套高层指令接口。**教师**输入鸟瞰真值栅格(道路、车道、他车、红绿灯),输出未来若干 waypoint,因为没有感知噪声,它能学得极稳。**学生**是相机 CNN,输出同样的 waypoint,再交给一个简单的 PID/横纵向控制器跟踪。蒸馏不是只对最终选定动作做监督,而是对教师在**每个可能指令分支**上的输出都做监督——这把"白盒教师"当成一个可在任意状态、任意条件下查询的标注器,信息密度远高于一条 on-policy 专家轨迹。学生因此既见到了正常行驶,也见到了自己偏离后教师给出的纠偏动作。 + +## 工程上真正要注意什么 +- **教师必须真的"作弊"得彻底**:特权输入要是仿真器真值,任何感知缺陷漏进教师都会成为蒸馏的天花板。 +- **on-policy 蒸馏 vs off-policy**:在学生自己 rollout 的状态上向教师索标注,是性能的关键来源;退化成纯离线模仿就丢掉了 DAgger 红利。 +- **控制器解耦**:waypoint→控制交给经典控制器,学生只学几何意图,避免网络去拟合执行器动力学(这也是与 [TCP](paper_tcp_carla.md) 直接融合控制分支的设计分歧点)。 +- **sim-only 的边界**:方法依赖"教师能看到真值",真实世界没有这种特权信息源,迁移到路测需用离线日志 + 高精地图近似教师,效果会打折。 +- **红绿灯/罕见交规**长尾仍靠教师覆盖;教师没见过的场景,学生也无从学起。 + +## Bitter-Lesson 视角 +LBC 表面像"加结构"(拆两阶段、引特权信息),但它加的不是手写驾驶规则,而是**更好的监督信号来源**——它承认"感知"和"决策"在数据效率上是两个难度截然不同的问题,先用免费真值把决策这块学透,再把负担只留给感知。这与 Bitter Lesson 不冲突:它没有把人类驾驶知识编码进策略,只是重排了学习课程。代价是依赖一个能提供特权真值的仿真器,离不开 [`paradigm:modular_perception_to_planning_pipeline`](paradigm_modular_perception_to_planning_pipeline.md) 之外的"上帝视角"基础设施。 + +## 接下来读什么 +- [ALVINN](paper_alvinn.md) — 端到端模仿的源头,手工版的覆盖问题 +- [DAgger](paper_ross2011_dagger.md) — LBC 的"在场教师"本质上是无限 DAgger +- [TCP](paper_tcp_carla.md) — 同台 CARLA、把轨迹与控制双分支融合的后继 +- [TransFuser](../paper_transfuser.md) — 传感器融合路线的强 baseline 对照 +- [insight: 单一模仿学习无法从复合误差中恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) +- [insight: 交通场景的多模态性是内在的](insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md) diff --git a/docs/data/cards/extended/paper_levine_offline_rl_tutorial.md b/docs/data/cards/extended/paper_levine_offline_rl_tutorial.md new file mode 100644 index 0000000..492114f --- /dev/null +++ b/docs/data/cards/extended/paper_levine_offline_rl_tutorial.md @@ -0,0 +1,53 @@ +--- +id: paper:levine_offline_rl_tutorial +title: "Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems" +title_zh: "离线强化学习综述(Levine 等)" +kind: paper +tier: B +authors: [Levine, S., Kumar, A., Tucker, G., Fu, J.] +venue: "arXiv 2020" +year: 2020 +topic: deep_rl +phase: core +deep_links: + - {label: "arXiv 2005.01643", url: "https://arxiv.org/abs/2005.01643"} +--- + +# 离线强化学习综述(Levine 等) + +> 这篇长综述把"只能用一份固定日志、不能再与环境交互"这个设定的全部困难,归结到一句话:分布偏移(distributional shift)。它系统梳理了为什么朴素的 off-policy 算法在离线设定下会崩、有哪几类应对思路(策略约束、价值正则、不确定性惩罚、模型基方法、序列建模),并诚实地列出尚未解决的开放问题——尤其是离线模型选择与离线到在线的过渡。 + +## 一个最小公式 / Math anchor +$$ +Q(s,a) \leftarrow r(s,a) + \gamma\,\mathbb{E}_{s'}\big[\max_{a'} Q(s',a')\big], +\qquad a' \notin \mathrm{supp}(\pi_\beta(\cdot\mid s')) +$$ +离线 RL 的病根全在这个 $\max_{a'}$ 上:Bellman 备份要对所有动作取最大,但日志只覆盖行为策略 $\pi_\beta$ 的支撑。一旦 $\arg\max$ 落在支撑外(out-of-distribution 动作),$Q$ 的估计纯属外推、无任何样本纠正,而最大化算子又专挑这些被高估的值,误差在自举中正反馈式放大。综述把"所有离线 RL 算法的设计动机"统一解释为:用各种方式阻止备份去查询 $\mathrm{supp}(\pi_\beta)$ 之外的动作。 + +## 它在图谱里的位置 +这篇综述是 [离线 RL 范式](paradigm_offline_rl.md) 的纲领性文献,与同期同团队的 [D4RL](paper_d4rl.md) 互补——一个给理论框架,一个给度量标准。它把后续具体算法摆进同一坐标系:[CQL](paper_cql.md) 是"价值正则"分支,[IQL](paper_iql.md) 是"避开 OOD 动作"分支,[Decision Transformer](paper_decision_transformer.md) 是"序列建模绕过 Bellman"分支。它从理论上支撑了 [离线 RL 本质是带约束的动态规划](insight_offline_rl_is_actually_constrained_dynamic_programming.md) 这条洞察。 + +## 架构 / 方法直觉 +综述不提新算法,而提供一张分类地图,把已有方法按"如何抑制分布偏移"分类: +- **策略约束(policy constraint)**:直接要求学到的策略 $\pi$ 不远离 $\pi_\beta$(KL、MMD、或显式行为克隆正则)。 +- **价值正则(value regularization)**:在价值层面对 OOD 动作做悲观惩罚,CQL 是代表。 +- **不确定性量化**:用集成或贝叶斯估计 $Q$ 的方差,对高方差区域打折。 +- **模型基离线 RL**:学环境模型,在模型 rollout 上做规划,但对模型不确定的区域施加惩罚(MOPO/MOReL)。 +- **重要性采样 / 序列建模**:要么纠偏 off-policy 评估,要么干脆把 RL 改写成监督式序列预测。 +核心直觉是:离线 RL 能达到的上界被数据覆盖范围钉死,算法的工作只是"在可信区域内尽量缝出好策略,对区域外保持悲观"。 + +## 工程上真正要注意什么 +- **离线模型选择是真正的痛点**:没有在线环境就无法靠回报选超参,拟合 Q 评估(FQE)等离线代理指标本身也不可靠,综述把它列为头号开放问题。 +- 策略约束法对 $\pi_\beta$ 的估计质量敏感;当日志由多个策略混采时,"贴近行为策略"会贴向一个不存在的平均策略。 +- 离线到在线过渡时,过度保守的初始化一上线遇到分布偏移就崩,需要专门设计(如校准)。 +- 不要把离线 RL 当成行为克隆的替代:数据近优时它退化为 BC,真正增益来自次优数据上的轨迹缝合。 + +## Bitter-Lesson 视角 +这篇综述本质上是在记录一个"数据/算力尚不足以蛮力解决"的阶段:因为不能在线收集更多数据,社区只能往算法里塞各种保守性归纳偏置(约束、惩罚、悲观)。bitter lesson 的潜台词是——当海量多样的日志能覆盖足够宽的支撑时,"惩罚 OOD"这件精巧的事会被"数据本身就覆盖了 OOD"所稀释。综述自己也指出,真正可扩展的方向可能是把离线学习与可控的在线/仿真数据收集闭环起来,而非无止境地打磨保守正则。这正是自动驾驶数据引擎的思路。 + +## 接下来读什么 +- [离线 RL 范式](paradigm_offline_rl.md) — 本综述服务的问题域总览 +- [D4RL](paper_d4rl.md) — 配套的基准与度量 +- [CQL](paper_cql.md) / [IQL](paper_iql.md) — 价值正则 vs 避开 OOD 两条主线 +- [Decision Transformer](paper_decision_transformer.md) — 序列建模派 +- [离线 RL 本质是带约束的动态规划](insight_offline_rl_is_actually_constrained_dynamic_programming.md) diff --git a/docs/data/cards/extended/paper_ng_russell_2000_irl.md b/docs/data/cards/extended/paper_ng_russell_2000_irl.md new file mode 100644 index 0000000..ce7a8c6 --- /dev/null +++ b/docs/data/cards/extended/paper_ng_russell_2000_irl.md @@ -0,0 +1,49 @@ +--- +id: paper:ng_russell_2000_irl +title: "Algorithms for Inverse Reinforcement Learning" +title_zh: "逆强化学习的算法(Ng & Russell 2000)" +kind: paper +tier: A +authors: [Ng, A. Y., Russell, S.] +venue: "ICML 2000" +year: 2000 +topic: rl_foundations +phase: core +deep_links: + - {label: "ICML 2000 PDF", url: "https://ai.stanford.edu/~ang/papers/icml00-irl.pdf"} +--- + +# 逆强化学习的算法(Ng & Russell 2000) + +> 这篇奠基论文把一个看似哲学的问题——"给定一个最优行为,能否反推它在优化什么"——形式化成可解的优化问题,并同时点破了它的致命病灶:reward 不可辨识(ill-posed)。后续整条 IRL 线,都是在和这个不可辨识性较劲。 + +## 一个最小公式 / Math anchor +$$ +\mathbb{E}\big[V^{\pi^*}(s)\big]\ \ge\ \mathbb{E}\big[V^{\pi}(s)\big]\quad\forall\,\pi +\;\;\Longleftrightarrow\;\; +\big(\mathbf{P}_{a^*}-\mathbf{P}_{a}\big)\big(\mathbf{I}-\gamma\mathbf{P}_{a^*}\big)^{-1}\mathbf{R}\ \ge\ 0 +$$ +要让观测到的策略 $\pi^*$ 在某个 reward $\mathbf{R}$ 下最优,等价于上面这组对所有动作 $a$ 成立的线性不等式。问题立刻暴露:$\mathbf{R}=0$ 永远满足(任何策略都最优),无数个 $\mathbf{R}$ 也满足。Ng & Russell 的对策是加目标函数——最大化"最优动作与次优动作的价值间隔",并用 $\ell_1$ 惩罚逼出稀疏 reward,把不适定问题正则成一个线性规划。 + +## 它在图谱里的位置 +这是 [Inverse RL](paper_irl.md) 这一整问题的开山之作,确立了它的标准提法与核心难点。它直接催生了 [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md)(用最大熵原理给"选哪个 reward"一个原则性答案),并经由最大熵这一步通向 [GAIL](paper_gail.md) 与 [AIRL](paper_airl.md) 的对抗化实现。它论证的 reward shaping 不变性,正是 [AIRL](paper_airl.md) 用来 disentangle reward 与动力学的理论杠杆。它从根上支撑 [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) 这条洞见。 + +## 架构 / 方法直觉 +论文给出三种设定下的算法:有限状态已知 $\pi^*$(解线性规划)、大/连续状态空间下用线性 reward 特征基函数逼近、以及只观测到采样轨迹而非完整策略时的版本。共同思路是:把"$\pi^*$ 最优"翻译成对 reward 的线性约束,再叠加一个偏好次优间隔最大化 + 稀疏正则的目标。一个深刻的副产物是 reward shaping 定理:给 reward 加任意势函数差 $\gamma\Phi(s')-\Phi(s)$ 不改变最优策略——这说明 reward 在策略意义下天然存在一整族等价类,不可辨识不是算法缺陷而是问题的内在性质。 + +## 工程上真正要注意什么 +- 线性 reward 特征基的选择决定一切:表达力不足则学不出真实意图,过强则更不可辨识。这与"手写特征"的负担直接挂钩。 +- 需要假设观测策略确实最优;真实人类示教是近优且带噪的,原始 LP 对噪声脆弱(后续最大熵版本正是为此而生)。 +- 大状态空间需要解内层 MDP(求 $V^\pi$),外层再优化 reward——双层循环,计算昂贵。 +- 在驾驶里直接用 2000 版几乎不可行:状态高维、专家非最优、特征难手写;它的价值是概念框架而非可部署算法。 + +## Bitter-Lesson 视角 +这篇是 IRL 谱系里最"重人工先验"的一端:它依赖手写 reward 特征 + 线性结构 + 最优性假设。在算力与数据稀缺的 2000 年这是必要的。整条 IRL 演化史可读作一部"逐步把人工先验换成可学习结构"的历史——从手写特征到最大熵概率模型([Ziebart](paper_ziebart_max_ent_irl.md)),再到判别器吃掉特征工程([GAIL](paper_gail.md)/[AIRL](paper_airl.md))。Ng & Russell 的贡献是把问题钉死,让后人知道要替换的究竟是什么。 + +## 接下来读什么 +- [Ziebart 最大熵 IRL](paper_ziebart_max_ent_irl.md) — 用最大熵原理消解不可辨识性 +- [Inverse RL 总览](paper_irl.md) — 这篇定义的那个问题 +- [GAIL](paper_gail.md) — 用判别器替换手写特征的对抗实现 +- [AIRL](paper_airl.md) — 把 shaping 不变性用作迁移工具 +- [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md) +- [偏好学习](paper_preference_learning.md) — 用比较而非示教恢复 reward 的另一路 diff --git a/docs/data/cards/extended/paper_planet.md b/docs/data/cards/extended/paper_planet.md new file mode 100644 index 0000000..5cbf432 --- /dev/null +++ b/docs/data/cards/extended/paper_planet.md @@ -0,0 +1,52 @@ +--- +id: paper:planet +title: "PlaNet — Learning Latent Dynamics for Planning from Pixels" +title_zh: "PlaNet(从像素学习潜空间动力学并规划)" +kind: paper +tier: A +authors: [Hafner, D., Lillicrap, T., Fischer, I., Villegas, R., Ha, D., Lee, H., Davidson, J.] +venue: "ICML 2019" +year: 2019 +topic: world_models +phase: core +deep_links: + - {label: "arXiv 1811.04551", url: "https://arxiv.org/abs/1811.04551"} + - {label: "google-research/planet", url: "https://github.com/google-research/planet"} +--- + +# PlaNet(从像素学习潜空间动力学并规划) + +> PlaNet 证明了一件事:可以纯从像素学一个紧凑的潜空间环境模型,然后完全在潜空间里做规划,不靠无模型 RL 也能解决连续控制任务,且样本效率比当时的无模型方法高一个数量级。它的核心贡献是 RSSM(Recurrent State-Space Model)——一个把确定性记忆与随机隐变量结合的潜动力学模型,以及"在潜空间里跑 CEM 规划"的范式。它是 [Dreamer](paper_dreamer_v3.md) 系列的直接前身。 + +## 一个最小公式 / Math anchor +$$ +s_t = f(s_{t-1}, a_{t-1}),\qquad +z_t \sim p(z_t \mid s_t),\qquad +\hat o_t \sim p(o_t \mid s_t, z_t) +$$ +$$ +\mathcal{L} = \mathbb{E}\Big[\underbrace{\log p(o_t\mid s_t,z_t)}_{\text{重建}} - \underbrace{D_{\text{KL}}\big(q(z_t\mid o_{\le t},a_{ 很多任务的奖励函数根本写不出来("做个后空翻"该怎么打分?)。这篇工作放弃手写奖励,改让人类对两段智能体行为片段做"哪个更好"的成对比较,从这些比较里拟合一个奖励模型,再用标准 RL 去最大化它。关键发现是:只需对极小一部分交互做人类标注(远少于直接 reward shaping 的工作量),就能学会人类难以言说的目标。这是后来 RLHF 整条技术路线的种子。 + +## 一个最小公式 / Math anchor +$$ +P\big[\sigma_1 \succ \sigma_2\big] \;=\; \frac{\exp\sum_t \hat r(s_t^1,a_t^1)}{\exp\sum_t \hat r(s_t^1,a_t^1) + \exp\sum_t \hat r(s_t^2,a_t^2)} +$$ +这是 Bradley–Terry 偏好模型:两段轨迹片段 $\sigma_1,\sigma_2$ 被人类偏好的概率,由它们累积奖励之差经 softmax 决定。奖励模型 $\hat r$ 的训练目标就是让这个预测概率匹配人类标注(交叉熵)。它把"偏序比较"这种弱、相对、无量纲的监督信号,反演成一个绝对的标量奖励函数——一旦有了 $\hat r$,下游就回到熟悉的 RL 最大化问题。注意奖励的绝对尺度不可辨识(同时加常数偏好概率不变),所以实践中要做归一化。 + +## 它在图谱里的位置 +这篇是对齐(alignment)方向的奠基工作之一,把"奖励从哪来"这个问题从"人工设计"转向"从人类判断中学习"。它向下游孕育了 [DPO](paper_rlhf_dpo.md) 这类把偏好直接转成策略损失、跳过显式奖励模型的方法。它与模仿学习里的逆强化学习([GAIL](paper_gail.md) / [AIRL](paper_airl.md))是姊妹关系——都在"反推奖励/目标",只是监督信号不同:IRL 用专家演示,本工作用成对偏好。它体现了 [人类示教压缩了隐式奖励函数](insight_human_demonstrations_compress_implicit_reward_function.md) 这条洞察的偏好版本。 + +## 架构 / 方法直觉 +系统是三个组件的异步闭环:(1) RL 策略与环境交互产生轨迹;(2) 从轨迹中采样片段对,交给人类标注偏好,存入比较数据集;(3) 用比较数据集持续训练奖励模型 $\hat r$,策略再去最大化最新的 $\hat r$。三者并行滚动是关键——奖励模型和策略共同进化,标注预算被花在策略当前真正访问的状态上(主动学习式的查询),从而极大压缩所需人类反馈量。这与"先一次性标完再训练"的离线流程形成对比。 + +## 工程上真正要注意什么 +- **奖励黑客(reward hacking)**:策略会专挑奖励模型估计虚高、但人类其实不认可的行为。论文里就出现过智能体学会"看起来像抓取却没真抓"的投机解。奖励模型必须随策略持续更新,否则被钻空子。 +- 片段对的采样策略影响标注效率:优先标注奖励模型不确定的对(disagreement-based)比随机采样省标注。 +- 人类标注有噪声且非传递,Bradley–Terry 假设的传递性只是近似。 +- 奖励尺度不可辨识,需归一化;否则 RL 的熵正则、学习率都会随尺度漂移。 + +## Bitter-Lesson 视角 +这项工作的深远之处在于:它没有让人去设计奖励函数(强先验、易出错),而是把人类放在一个可扩展的位置上——只做二选一的比较,剩下交给学习与算力。这正契合 bitter lesson:"让人提供尽量弱、尽量易获取的信号,把结构交给数据和优化去补。"成对比较恰是人类能稳定、廉价提供的最弱监督之一,因此这条路线后来能扩展到 LLM 规模(RLHF),把"对齐人类意图"变成一个可被算力放大的学习问题。 + +## 接下来读什么 +- [DPO](paper_rlhf_dpo.md) — 跳过显式奖励模型,直接从偏好优化策略 +- [GAIL](paper_gail.md) / [AIRL](paper_airl.md) — 从演示反推目标的姊妹路线 +- [人类示教压缩了隐式奖励函数](insight_human_demonstrations_compress_implicit_reward_function.md) +- [PPO](paper_schulman2017_ppo.md) — RLHF 中最大化学到奖励的标准优化器 diff --git a/docs/data/cards/extended/paper_ross_bagnell_2010.md b/docs/data/cards/extended/paper_ross_bagnell_2010.md new file mode 100644 index 0000000..730a7c4 --- /dev/null +++ b/docs/data/cards/extended/paper_ross_bagnell_2010.md @@ -0,0 +1,52 @@ +--- +id: paper:ross_bagnell_2010 +title: "Efficient Reductions for Imitation Learning" +title_zh: "模仿学习的高效归约(Ross & Bagnell 2010)" +kind: paper +tier: A +authors: [Ross, S., Bagnell, J. A.] +venue: "AISTATS 2010" +year: 2010 +topic: rl_foundations +phase: core +deep_links: + - {label: "AISTATS 2010 PDF", url: "https://www.ri.cmu.edu/pub_files/2010/5/Ross-AIStats10-paper.pdf"} +--- + +# 模仿学习的高效归约(Ross & Bagnell 2010) + +> 这篇用一行界把"行为克隆为什么会在闭环里崩"讲透了:把监督学习当模仿用,单步误差 $\epsilon$ 会沿 $T$ 步以 $O(\epsilon T^2)$ 复合。它同时给出第一个不二次复合的归约(forward training),为一年后的 DAgger 铺好了理论地基。 + +## 一个最小公式 / Math anchor +$$ +J(\hat\pi)\ \le\ J(\pi^*)+O\big(\epsilon\,T^2\big) +\qquad\text{vs.}\qquad +J(\hat\pi_{\text{forward}})\ \le\ J(\pi^*)+O\big(\epsilon\,T\big) +$$ +左式是朴素 BC 的代价界:训练时学到的策略在专家分布 $d_{\pi^*}$ 上单步犯错率为 $\epsilon$,但闭环执行时一旦偏离,后续状态服从 $d_{\hat\pi}$,模型在那里没见过、错得更狠,误差逐步复合成 $T^2$ 级。右式是 forward training 的界:通过让每一步在它自己会到达的状态分布上训练,把复合压回线性 $T$。$T^2$ 与 $T$ 之差,就是"闭环灾难"的精确量化。 + +## 一个最小公式背后的机制 +二次项的根来自分布偏移:BC 最小化的是 $\mathbb{E}_{s\sim d_{\pi^*}}[\ell(s)]$,但部署时承受的是 $\mathbb{E}_{s\sim d_{\hat\pi}}[\ell(s)]$。两个分布的总变差距离本身随时间增长(每步偏一点),误差与距离相乘,得到二次。这正是 [纯模仿学习无法从复合误差中自我恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 的数学内核。 + +## 它在图谱里的位置 +这是 [问题:行为克隆误差随时间复合](problem_behavior_cloning_compounds_errors_over_time.md) 的奠基性证明,直接动机了 [DAgger](../paper_ross2011_dagger.md)——DAgger 是把这里的 forward training 换成更实用的 no-regret 在线学习版本,同样达到 $O(\epsilon T)$。它支撑 [策略改进被分布偏移上界锁死](insight_policy_improvement_bounded_by_distribution_shift.md) 与 [纯模仿学习无法从复合误差中自我恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 两条洞见,并解释了为何 [模仿学习范式](paradigm_imitation_learning.md) 必须配数据聚合或价值函数才能闭环可用。 + +## 架构 / 方法直觉 +论文不是提一个网络,而是提"归约"——把模仿学习归约成一串监督学习子问题,并按归约方式给出不同的界。朴素 BC 是把整段轨迹当一个 i.i.d. 监督集,所以承受 $T^2$。Forward training 反其道而行:为每个时间步 $t$ 训一个单独的策略 $\pi_t$,且训练数据来自前 $t-1$ 步执行已学策略后实际到达的状态分布——让每一步都在"自己会遇到的状态"上学,分布偏移被逐步消化而非累积。代价是要存 $T$ 个策略且需要可重置的交互式专家。 + +## 工程上真正要注意什么 +- forward training 需要为每个 horizon step 单独训练,$T$ 大时不实用——这正是 [DAgger](../paper_ross2011_dagger.md) 用单一策略 + 数据聚合取而代之的动机。 +- 界依赖"可查询的交互式专家":能在任意状态上问专家"你会怎么做"。真实驾驶里这意味着要么有人类陪驾随时接管标注,要么有特权专家(如带全局信息的规划器)。 +- $\epsilon$ 与 $T^2$ 的乘积结构解释了为什么"开环 ADE/FDE 很好但闭环开不动":开环指标只估 $\epsilon$,闭环安全才暴露 $T^2$。 +- 它是理论卡片:实践中你跑的是 DAgger 或其变体,但要靠这篇理解"为什么不能只堆 BC 数据"。 + +## Bitter-Lesson 视角 +这篇是难得的"先验=理论洞察"而非"先验=手工特征"的例子。它没有引入任何领域知识,只是精确刻画了一个结构性陷阱,反而指明了正确的算力投放方向:与其把算力砸在更多专家数据(只压 $\epsilon$,不动 $T^2$),不如把交互预算用在数据聚合上(改变复合阶数)。这条洞见在规模时代依然成立——它告诉你哪种"加数据"有用、哪种是徒劳。 + +## 接下来读什么 +- [DAgger](../paper_ross2011_dagger.md) — 把 forward training 换成实用的在线 no-regret 版本 +- [策略改进被分布偏移上界锁死](insight_policy_improvement_bounded_by_distribution_shift.md) — 这篇上界的可迁移内核 +- [纯模仿学习无法从复合误差中自我恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) — 复合误差的洞见版 +- [模仿学习范式](paradigm_imitation_learning.md) — BC 为何需要补救 +- [问题:行为克隆误差随时间复合](problem_behavior_cloning_compounds_errors_over_time.md) — 它证明的那个问题 +- [GAIL](paper_gail.md) — 用分布匹配而非数据聚合来治复合误差的另一路 diff --git a/docs/data/cards/extended/paper_saycan.md b/docs/data/cards/extended/paper_saycan.md new file mode 100644 index 0000000..31030c0 --- /dev/null +++ b/docs/data/cards/extended/paper_saycan.md @@ -0,0 +1,49 @@ +--- +id: paper:saycan +title: "SayCan — Do As I Can, Not As I Say" +title_zh: "SayCan(用可供性把 LLM 计划接到现实:会说不等于能做)" +kind: paper +tier: A +authors: [Ahn, M., Brohan, A., Brown, N., et al. (Google)] +venue: "CoRL 2022" +year: 2022 +topic: llm_agent +phase: core +deep_links: + - {label: "arXiv 2204.01691", url: "https://arxiv.org/abs/2204.01691"} + - {label: "项目页 say-can.github.io", url: "https://say-can.github.io/"} +--- + +# SayCan(用可供性接地 LLM 计划) + +> LLM 知道"打扫洒出的可乐"大概要"拿海绵、擦干净、扔罐子",但它不知道**此刻这个机器人在这个房间里能不能做到**。SayCan 的核心:让 LLM 给出"哪个技能在语义上更有用"(say),让每个技能的价值函数给出"在当前状态下做这个技能能成功的概率"(can),两者**相乘**来选下一步原语。语言提供任务知识,可供性提供物理可行性,缺一不可。 + +## 一个最小公式 / Math anchor +$$ +a^\star=\arg\max_{a\in\mathcal{A}}\ \underbrace{p_{\text{LLM}}\big(\ell_a\mid i,\,h\big)}_{\text{Say:语义有用性}}\ \cdot\ \underbrace{V_a^{\pi}(s)}_{\text{Can:可供性/成功概率}} +$$ +$\mathcal{A}$ 是一组预训练低层技能("拿起海绵""走到桌前"…),$\ell_a$ 是技能的语言描述,$i$ 是用户指令、$h$ 是历史。第一项是 LLM 对"这个技能与目标的相关性"打分;第二项 $V_a^\pi(s)$ 是该技能在当前状态成功概率的价值函数(由 RL/affordance 模型给出)。乘积的妙处:**LLM 想做但做不到的(高 say、低 can)被价值压低;能做但无意义的(高 can、低 say)被语言压低**——只有"既相关又可行"的技能胜出。逐步贪心选取、执行、再选,直到 LLM 输出"done"。 + +## 它在图谱里的位置 +SayCan 是 [`paradigm:llm_agent_paradigm`](paradigm_llm_agent_paradigm.md) 的奠基工作之一,直面 [`problem:grounding_language_token_to_continuous_physical_world`](problem_grounding_language_token_to_continuous_physical_world.md)——它给出 grounding 的一个清晰范式:用价值函数把抽象语言计划锚到当前物理可行集。它 manifest 了 [`insight:tool_use_extends_language_model_into_environment_grounded_actor`](insight_tool_use_extends_language_model_into_environment_grounded_actor.md)(技能即可调用动作),与 [Codex](paper_codex.md) 的 code-as-action 形成对照(一个用可执行代码、一个用可供性价值落地语言)。它是 [`paradigm:vla_paradigm`](paradigm_vla_paradigm.md)([RT-2](paper_rt2.md)、[OpenVLA](paper_openvla.md))之前"规划与执行解耦"的代表,并影响了驾驶域 [`paradigm:knowledge_driven_reflective_agent`](paradigm_knowledge_driven_reflective_agent.md) 的设计。 + +## 架构 / 方法直觉 +两套系统协作。**高层**:一个冻结的 LLM 把长指令分解,并对技能库里每个原语打"语义相关"分。**低层**:一组各自训练好的技能策略,每个都配一个价值/affordance 函数估计"从当前状态执行能成功的概率"。仲裁是逐步的乘积最大化:选一个原语→执行→更新状态与历史→再选。这种"LLM 出意图、价值函数把关可行性"的分工,本质是把不可靠的语言规划**约束在机器人真正能做的事情的流形上**,从而避免 LLM 那种"听起来完美但物理上荒谬"的计划。它不微调 LLM,靠提示工程 + 外挂价值函数实现 grounding,工程上极轻。 + +## 工程上真正要注意什么 +- **技能库的覆盖即能力上限**:SayCan 只能组合已有原语,库里没有的动作再聪明的 LLM 也唤不出来。 +- **价值函数质量是安全关键**:$V_a^\pi(s)$ 高估会让系统选到其实做不到的危险动作;驾驶域里这等于"以为来得及变道"。 +- **贪心分解的短视**:逐步乘积是贪心,缺乏对长程后果的回溯,复杂任务里会走进死胡同,需要重规划。 +- **语言-技能描述对齐**:$\ell_a$ 写得好坏直接影响 LLM 打分,描述工程不可忽视。 +- **驾驶迁移的差异**:机器人技能离散且低频,驾驶是高频连续控制;SayCan 的直接对应是"离散高层机动(变道/让行/跟车)+ 各自可行性评估",而非底层方向盘控制。 + +## Bitter-Lesson 视角 +SayCan 看似很"手工"——预定义技能库、外挂价值函数、提示工程。但它给出的真正洞见经得起 Bitter Lesson 的检验:**纯靠 scaling 语言模型不会自动获得物理 grounding,因为可行性信息根本不在文本里**。say 与 can 的乘积是对"语言知识与环境反馈是两类不同信号、必须各自学习再融合"这一事实的承认。后续 VLA([RT-2](paper_rt2.md))试图把两者压进单一端到端模型,让 grounding 从大规模交互数据里涌现而非手工拼接——那是更 Bitter-Lesson 的方向,但 SayCan 的"可行性必须被显式估计"这一约束在安全攸关系统里仍长期有效。 + +## 接下来读什么 +- [Codex](paper_codex.md) — 另一种 grounding:用可执行代码做动作 +- [RT-2](paper_rt2.md) — 把 say 与 can 压进单一端到端 VLA +- [DiLu](paper_2309.16292_dilu.md) — 知识驱动的 LLM 驾驶决策 +- [Agent-Driver](paper_2311.10813_agent_driver.md) — LLM + 工具 + 价值校验的驾驶栈 +- [problem: 把语言 token 接地到连续物理世界](problem_grounding_language_token_to_continuous_physical_world.md) +- [insight: 工具调用把语言模型延展为环境接地的行动者](insight_tool_use_extends_language_model_into_environment_grounded_actor.md) diff --git a/docs/data/cards/extended/paper_schulman2016_gae.md b/docs/data/cards/extended/paper_schulman2016_gae.md new file mode 100644 index 0000000..bce4420 --- /dev/null +++ b/docs/data/cards/extended/paper_schulman2016_gae.md @@ -0,0 +1,48 @@ +--- +id: paper:schulman2016_gae +title: "GAE — High-Dimensional Continuous Control Using Generalized Advantage Estimation" +title_zh: "GAE(广义优势估计)" +kind: paper +tier: A +authors: [Schulman, J., Moritz, P., Levine, S., Jordan, M., Abbeel, P.] +venue: "ICLR 2016" +year: 2016 +topic: deep_rl +phase: core +deep_links: + - {label: "arXiv 1506.02438", url: "https://arxiv.org/abs/1506.02438"} +--- + +# GAE(广义优势估计) + +> 策略梯度的方差几乎全部来自"优势估计"这一项。GAE 用一个标量 $\lambda \in [0,1]$ 把"单步 TD(低方差、有偏)"和"蒙特卡洛回报(无偏、高方差)"连续地插值成一条谱,让你用一个旋钮调节 bias/variance 折中。它是 TRPO、PPO 等所有现代 on-policy 算法默认的优势计算方式。 + +## 一个最小公式 / Math anchor +$$ +\hat{A}^{\text{GAE}(\gamma,\lambda)}_t \;=\; \sum_{l=0}^{\infty} (\gamma\lambda)^{l}\,\delta_{t+l}, +\qquad +\delta_t \;=\; r_t + \gamma V(s_{t+1}) - V(s_t) +$$ +$\delta_t$ 是单步 TD 残差(用学到的价值函数 $V$ 做 baseline)。GAE 把未来一系列 TD 残差按 $(\gamma\lambda)^l$ 指数加权累加。两个极端值揭示它的本质:$\lambda=0$ 时 $\hat{A}_t=\delta_t$,退化为单步 TD,偏差最大、方差最小;$\lambda=1$ 时 $\hat{A}_t=\sum_l \gamma^l r_{t+l}-V(s_t)$,退化为蒙特卡洛优势,无偏但方差随 horizon 爆炸。中间的 $\lambda$(实践中常取 0.95–0.98)在这条谱上取一个甜点。 + +## 它在图谱里的位置 +GAE 是 on-policy 策略梯度的"优势估计标准件",直接喂给 [TRPO](paper_schulman2015_trpo.md) 与 [PPO](paper_schulman2017_ppo.md)——这两篇论文报告的实验全部默认开启 GAE。它与 [DQN](paper_mnih2015_dqn.md) 那条 value-based / off-policy 路线形成对照:GAE 假设数据来自当前策略(on-policy),而 DQN 用 replay buffer 复用旧数据。它也是任何"actor-critic + 自举价值函数"框架的母版,[SAC](paper_sac.md) 虽走 off-policy 路线但同样依赖"用 critic 当 baseline 降方差"这一核心思想。 + +## 架构 / 方法直觉 +GAE 不是一个新网络,而是一个估计量。它把两个独立的近似误差解耦:价值函数 $V$ 的偏差,与采样回报的方差。$\gamma$ 是 MDP 自带的折扣因子(决定"看多远"),$\lambda$ 是 GAE 额外引入的"信用分配长度"旋钮(决定"信任 bootstrap 多深")。关键洞察是:如果 $V$ 已经相当准,就该多信任它(小 $\lambda$,少累加真实回报);如果 $V$ 很糙,就该多用真实回报纠偏(大 $\lambda$)。实现上它等价于对 TD($\lambda$) 的 eligibility-trace 做反向递推,一行 `adv = delta + gamma*lam*adv` 从轨迹末尾往前扫即可,几乎零额外开销。 + +## 工程上真正要注意什么 +- $\lambda$ 不要当成无关紧要的超参。它对最终回报的影响常常超过学习率;$\lambda=0.95$ 是稳妥起点,长 horizon 任务可往 0.97–0.99 调。 +- 优势必须做 batch 内标准化(减均值除标准差),否则梯度尺度随回报量级漂移,PPO 的 clip 阈值会失效。 +- 轨迹被时间截断(truncation,非真正终止)时,末状态要用 $V(s_T)$ bootstrap 接回;漏掉这一步会在每个 rollout 边界注入系统性偏差。 +- GAE 严格 on-policy:每次更新后旧 rollout 立即作废,不能塞进 replay buffer 复用,这是它样本效率不如 off-policy 方法的根因。 + +## Bitter-Lesson 视角 +GAE 是"用一点结构换大量稳定性"的范例:它没有引入新的可学习参数,只是把一个已知的 bias/variance 数学事实显式参数化成一个旋钮。这与 bitter lesson 并不冲突——它降低的是优化器的方差,从而让更大规模、更通用的策略训练得以收敛。值得注意的是更现代的实践趋势是把 $\lambda$ 也调成相对不敏感(靠大 batch 压方差),让算力而非精调旋钮来吸收方差,这正是 bitter lesson 的渐进体现。 + +## 接下来读什么 +- [TRPO](paper_schulman2015_trpo.md) — GAE 的首发搭档,信赖域约束的策略更新 +- [PPO](paper_schulman2017_ppo.md) — GAE 最广泛的使用场景,clip 替代信赖域 +- [SAC](paper_sac.md) — off-policy 一侧的 actor-critic 对照 +- [DQN](paper_mnih2015_dqn.md) — value-based / off-policy 路线对照 +- [Spinning Up](paper_spinning_up.md) — GAE 推导与可读实现的最佳入口 diff --git a/docs/data/cards/extended/paper_spinning_up.md b/docs/data/cards/extended/paper_spinning_up.md new file mode 100644 index 0000000..00105a8 --- /dev/null +++ b/docs/data/cards/extended/paper_spinning_up.md @@ -0,0 +1,46 @@ +--- +id: paper:spinning_up +title: "Spinning Up in Deep RL" +title_zh: "Spinning Up in Deep RL(深度强化学习教学资源)" +kind: paper +tier: B +authors: [Achiam, J., OpenAI] +venue: "OpenAI educational resource" +year: 2018 +topic: deep_rl +phase: prereq +deep_links: + - {label: "spinningup.openai.com", url: "https://spinningup.openai.com/"} + - {label: "openai/spinningup", url: "https://github.com/openai/spinningup"} +--- + +# Spinning Up in Deep RL(深度强化学习教学资源) + +> 这不是一篇论文,而是 OpenAI 出品的深度 RL 教学资源:一套把策略梯度、信赖域、actor-critic 等核心算法从公式推导到极简可读实现一气贯通的材料。它的价值在于"短而正确"——每个算法都配一份去掉工程噪声、保留算法骨架的参考实现(VPG / TRPO / PPO / DDPG / TD3 / SAC),是把数学直觉与可运行代码对齐的最快入口。 + +## 一个最小公式 / Math anchor +$$ +\nabla_\theta J(\theta) \;=\; \mathbb{E}_{\tau\sim\pi_\theta}\!\Big[\sum_{t} \nabla_\theta \log \pi_\theta(a_t\mid s_t)\,\hat{A}_t\Big] +$$ +这是 Spinning Up 反复推导的策略梯度核心式:增大那些优势 $\hat{A}_t>0$ 的动作的对数概率,压低 $\hat{A}_t<0$ 的。资源里清楚地讲明,从这一条式子出发,换不同的 $\hat{A}_t$ 估计(蒙特卡洛回报、加 baseline、[GAE](paper_schulman2016_gae.md))、换不同的约束方式(信赖域、clip),就分别长成 VPG、TRPO、PPO——把一族算法统一在一个梯度表达式下,是它最有教学价值的视角。 + +## 它在图谱里的位置 +Spinning Up 是 [GAE](paper_schulman2016_gae.md)、[TRPO](paper_schulman2015_trpo.md)、[PPO](paper_schulman2017_ppo.md)、[SAC](paper_sac.md) 这一串 on/off-policy 策略梯度算法的统一阅读入口与参考实现来源。它把这些论文里散落的推导收敛成一致记号,因此在图谱里它指向(feeds)几乎所有深度 RL 算法节点,是从"读公式"到"跑代码"之间的桥。 + +## 架构 / 方法直觉 +它的组织逻辑是一棵决策树:先按 model-free / model-based 分叉,再在 model-free 内按 policy optimization(on-policy,直接优化策略)与 Q-learning(off-policy,学价值再导出策略)分叉,最后指出 DDPG/TD3/SAC 这类 actor-critic 是两者的混合。每个算法页都遵循同一模板:关键方程 → 伪代码 → 实现要点 → 常见坑。参考实现刻意"扁平"——不做过度抽象,让读者能逐行对照伪代码,这种"可读性优先于工程完备"的取舍正是它区别于 RLlib/SB3 等生产库的地方。 + +## 工程上真正要注意什么 +- 它的实现是教学版,不要直接搬去做大规模训练:缺少向量化环境、分布式采样、混合精度等生产优化。 +- 算法选择树是有用的心智模型,但边界已被后来者模糊(如离线 RL、序列建模派不在这棵树里)。 +- 配套的"Key Papers"清单是按主题组织的高质量文献索引,比单看某篇论文更适合建立全局观。 +- 由于发布于 2018,前沿部分(offline RL、世界模型、RLHF)需要另找资料补齐。 + +## Bitter-Lesson 视角 +Spinning Up 把一族算法压缩成"同一个梯度式 + 不同的方差控制/约束方式",这本身就是一种"找最小不变核"的思路——与 bitter lesson 强调的"少结构、可扩展"暗合。它隐含的教益是:真正经得起规模考验的,往往是那个最简单、最通用的算法骨架(PPO 的广泛使用就是例证),而不是堆满 trick 的复杂变体。把算法理解到能用几十行写出来的程度,是判断"哪些是本质、哪些是工程修补"的前提。 + +## 接下来读什么 +- [GAE](paper_schulman2016_gae.md) — 优势估计的 bias/variance 旋钮 +- [TRPO](paper_schulman2015_trpo.md) / [PPO](paper_schulman2017_ppo.md) — on-policy 信赖域两代 +- [SAC](paper_sac.md) — off-policy 最大熵 actor-critic +- [DQN](paper_mnih2015_dqn.md) — value-based 路线起点 diff --git a/docs/data/cards/extended/paper_tcp_carla.md b/docs/data/cards/extended/paper_tcp_carla.md new file mode 100644 index 0000000..bfbe44d --- /dev/null +++ b/docs/data/cards/extended/paper_tcp_carla.md @@ -0,0 +1,49 @@ +--- +id: paper:tcp_carla +title: "TCP — Trajectory-guided Control Prediction" +title_zh: "TCP(轨迹引导的控制预测:轨迹分支 + 控制分支融合)" +kind: paper +tier: A +authors: [Wu, P., Jia, X., Chen, L., Yan, J., Li, H., Qiao, Y.] +venue: "NeurIPS 2022" +year: 2022 +topic: e2e_ad +phase: core +deep_links: + - {label: "arXiv 2206.08129", url: "https://arxiv.org/abs/2206.08129"} + - {label: "OpenDriveLab/TCP 代码", url: "https://github.com/OpenDriveLab/TCP"} +--- + +# TCP(轨迹引导的控制预测) + +> 端到端驾驶长期在两种输出表征间二选一:预测一条**未来轨迹**再交给下游控制器跟踪,或者直接回归**底层控制量**(油门/刹车/转向)。TCP 的观察是这两种表征各有所长——轨迹分支看得远、对长程意图稳健,控制分支反应快、对瞬时动态敏感——于是把它们**做成同一网络的两个分支并在推理时按场景融合**,在 CARLA 上以单相机就拿到了当年很强的闭环成绩。 + +## 一个最小公式 / Math anchor +$$ +a_t=\alpha(s)\,\underbrace{\pi^{\text{ctrl}}(s)}_{\text{控制分支直接回归}}+\big(1-\alpha(s)\big)\,\underbrace{\mathcal{C}\!\big(\pi^{\text{traj}}(s)\big)}_{\text{轨迹分支经控制器映射}} +$$ +$\pi^{\text{traj}}$ 输出未来 waypoint,经一个跟踪控制器 $\mathcal{C}(\cdot)$(横纵向 PID/前馈)转成可执行控制;$\pi^{\text{ctrl}}$ 直接输出 $(\text{steer},\text{throttle},\text{brake})$。融合权重 $\alpha(s)$ 依速度等场景特征自适应——直觉是**高速/巡航时信任前瞻的轨迹分支,低速/起步/急停时信任反应快的控制分支**。两分支共享同一视觉 backbone,控制分支还以多步 GRU 自回归展开,让它具备类似轨迹分支的"看未来"能力。 + +## 它在图谱里的位置 +TCP 属于 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md),与 [Learning by Cheating](paper_lbc.md)、[TransFuser](../paper_transfuser.md) 同为 [`paper:carla_leaderboard`](paper_carla_leaderboard.md) 上的强 baseline,常被 [Bench2Drive](paper_bench2drive.md) 收入对比集。它的核心张力——"轨迹表征 vs 控制表征"——正是 [`insight:tokenized_trajectories_let_planning_borrow_from_language_modeling`](insight_tokenized_trajectories_let_planning_borrow_from_language_modeling.md) 与经典控制之间的接缝。它 manifest 了 [`paradigm:imitation_learning`](paradigm_imitation_learning.md),并把 LBC 中"waypoint→外置控制器"的解耦设计往前推了一步:让网络自己也学一条端到端控制通路。 + +## 架构 / 方法直觉 +共享视觉编码器(ResNet 系)+ 测量编码(速度、目标点、高层指令)→ 两个 head。轨迹分支用 GRU 自回归吐 waypoint;控制分支同样用 GRU 多步展开控制量,并通过**注意力把轨迹分支预测的未来位置作为引导信息**喂给控制分支("trajectory-guided"由此得名)——控制分支因此不是盲目反应,而是被前瞻意图约束。两分支都用模仿损失训练;融合发生在推理时的逐步 $\alpha(s)$ 加权。这种"两条解码路径互相借力 + 场景自适应仲裁"是它优于单一表征的来源。 + +## 工程上真正要注意什么 +- **融合权重 $\alpha(s)$ 的设计是成败点**:固定权重退化为简单平均,反而抵消两分支优势;论文用基于速度的切换近似。 +- **控制分支易过拟合执行器**:直接回归控制量会把仿真器特定的动力学吃进网络,迁移到不同车辆参数时要谨慎。 +- **单相机输入**:TCP 证明在 CARLA 上无需 LiDAR 也能取得强成绩,但这部分得益于仿真器感知相对干净;真实长尾下传感冗余仍重要。 +- **轨迹分支需要好的下游控制器**:$\mathcal{C}(\cdot)$ 的整定质量直接决定轨迹分支落地效果,别把跟踪误差算到网络头上。 +- **闭环优于开环**:TCP 的优势主要体现在闭环 driving score,开环 L2 未必领先——再次印证开环指标的乐观性。 + +## Bitter-Lesson 视角 +TCP 加的"结构"是表征层面的——它没有写驾驶规则,而是承认**输出表征的选择本身是一个有信息量的设计变量**,并让模型在两种学习得到的表征间自适应。这与 Bitter Lesson 相容:双分支与融合权重都是可学/可调的,没有手写"何时该刹车"的逻辑。长期看,随着大轨迹模型与统一 token 化([`insight:tokenized_trajectories_let_planning_borrow_from_language_modeling`](insight_tokenized_trajectories_let_planning_borrow_from_language_modeling.md))成熟,这种手工双分支可能被单一生成式策略吸收;但 TCP 揭示的"轨迹 vs 控制各有 horizon 优势"的物理直觉会长期有效。 + +## 接下来读什么 +- [Learning by Cheating](paper_lbc.md) — waypoint + 外置控制器路线的前驱 +- [TransFuser](../paper_transfuser.md) — 多传感器融合的对照路线 +- [Bench2Drive](paper_bench2drive.md) — 标准化闭环评测里的 TCP 位置 +- [GameFormer](paper_gameformer.md) — 把交互建模进规划的后续方向 +- [insight: 把轨迹 token 化让规划借用语言模型工具](insight_tokenized_trajectories_let_planning_borrow_from_language_modeling.md) +- [insight: 闭环评测才是规划器唯一的真值](insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md) diff --git a/docs/data/cards/extended/paper_vicreg.md b/docs/data/cards/extended/paper_vicreg.md new file mode 100644 index 0000000..1c18388 --- /dev/null +++ b/docs/data/cards/extended/paper_vicreg.md @@ -0,0 +1,55 @@ +--- +id: paper:vicreg +title: "VICReg — Variance-Invariance-Covariance Regularization for SSL" +title_zh: "VICReg(方差-不变性-协方差正则的自监督)" +kind: paper +tier: A +authors: [Bardes, A., Ponce, J., LeCun, Y.] +venue: "ICLR 2022" +year: 2022 +topic: ssl_vision +phase: core +deep_links: + - {label: "arXiv 2105.04906", url: "https://arxiv.org/abs/2105.04906"} +--- + +# VICReg(方差-不变性-协方差正则的自监督) + +> VICReg 把自监督防坍缩从"靠负样本"或"靠停梯度/动量等架构技巧"改成三条显式正则项:不变性(两视图表示要一致)、方差(每维要保持足够散度)、协方差(不同维度要去相关)。它不需要负样本、不需要动量目标、不需要预测头,两条分支甚至可以完全对称且架构不同。 + +## 一个最小公式 / Math anchor +$$ +\mathcal{L}=\underbrace{\lambda\,\lVert Z-Z'\rVert^2_F/N}_{\text{invariance}} ++\underbrace{\mu\sum_{j}\max\!\big(0,\,\gamma-\sqrt{\mathrm{Var}(Z_{:j})+\epsilon}\big)}_{\text{variance}} ++\underbrace{\nu\sum_{i\neq j} \big[C(Z)\big]^2_{ij}}_{\text{covariance}} +$$ +$Z,Z'$ 是两视图的一批投影表示。不变性项拉近配对表示;方差项用 hinge 强制每个维度在批内的标准差不低于阈值 $\gamma$(防止维度坍缩成常数);协方差项把表示协方差矩阵 $C(Z)$ 的非对角元压向 0(防止信息挤进少数相关维度,即维度间冗余)。三项各管一件事,组合起来既保信息又防坍缩。 + +## 它在图谱里的位置 +VICReg 与 [BYOL](paper_byol.md) 同属非对比自监督,但走"显式正则"而非"架构不对称"这条路,二者构成"为何不坍缩"的两种机制对照。它把防坍缩从隐晦的工程依赖(BatchNorm、停梯度)抬升为可分析的目标函数,思想上对 [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) 更"白盒"。它服务于 [label efficiency for 3D annotation](problem_label_efficiency_for_3d_annotation.md):稳定、易调的预训练让少标签下游更可靠。 + +## 架构 / 方法直觉 +两条孪生分支各接一个 expander(把 backbone 输出升到高维,使方差/协方差正则有足够维度施展)。同图两增广分别过两分支得 $Z,Z'$,直接套上面的三项损失,两边对称、都反传梯度。它最干净的地方是把"表示要满足什么"显式写进 loss:信息要被均匀地铺在各维度上(方差),各维度别重复编码同一信息(协方差),配对样本要一致(不变性)。因为不依赖批内负样本对,VICReg 对 batch 大小不敏感;因为两分支不必共享权重,它天然适合多模态/异构编码器(如图像-文本各用一套 encoder 对齐)。 + +## 对照:三种非对比/对比方案的防坍缩内核 +| 方案 | 防坍缩内核 | 需负样本 | 需动量目标 | 需 predictor | +|---|---|---|---|---| +| SimCLR | 负样本排斥 | 是 | 否 | 否 | +| BYOL | 停梯度 + 动量 + predictor | 否 | 是 | 是 | +| VICReg | 方差 hinge + 协方差去相关 | 否 | 否 | 否 | + +## 工程上真正要注意什么 +- 三项权重 $(\lambda,\mu,\nu)$ 的平衡是核心调参:方差项太弱会坍缩、太强会过度膨胀表示尺度;协方差项太强会损害有用相关。 +- expander 维度要足够大(常远高于 backbone 输出维),否则协方差去相关无处施展,正则形同虚设。 +- 方差用的是 batch 内标准差,batch 太小时统计噪声大、hinge 失稳;虽不像对比那样需要超大 batch,但也别太小。 +- 两分支可异构,这是相对 BYOL 的实用优势:在跨模态对齐里很方便,但要注意两侧表示尺度匹配。 + +## Bitter-Lesson 视角 +VICReg 没有引入新的人工标签或领域先验,只是把"一个好表示该有的统计性质"写成正则——这仍是一个通用、可随数据扩展的自监督目标。相对 BYOL,它的贡献是把防坍缩这件原本依赖隐晦架构技巧的事变得显式、可分析、易调,从而降低了把自监督 scale 上去时的工程不确定性。bitter-lesson 视角下,重要的不是哪一项正则更"聪明",而是它让"无标签学表示"这条主线更稳健、更易复现地放大。 + +## 接下来读什么 +- [BYOL](paper_byol.md) — 架构技巧防坍缩的对照路线 +- [DINOv2](../paper_dinov2.md) — 大规模自监督视觉基座 +- [BERT](paper_bert.md) — 语言侧的自监督(掩码预测)对照 +- [scaling data with self-supervision](paradigm_scaling_data_with_self_supervision.md) — 它所属的范式 +- [problem: label efficiency for 3D annotation](problem_label_efficiency_for_3d_annotation.md) — 下游少标签动机 diff --git a/docs/data/cards/extended/paper_voxelnet.md b/docs/data/cards/extended/paper_voxelnet.md new file mode 100644 index 0000000..ed3375c --- /dev/null +++ b/docs/data/cards/extended/paper_voxelnet.md @@ -0,0 +1,47 @@ +--- +id: paper:voxelnet +title: "VoxelNet — End-to-End Learning for Point Cloud Based 3D Detection" +title_zh: "VoxelNet(体素特征端到端学习的 3D 检测开端)" +kind: paper +tier: A +authors: [Zhou, Y., Tuzel, O.] +venue: "CVPR 2018" +year: 2018 +topic: scene_understanding +phase: prereq +deep_links: + - {label: "arXiv 1711.06396", url: "https://arxiv.org/abs/1711.06396"} +--- + +# VoxelNet(体素特征端到端学习的 3D 检测开端) + +> VoxelNet 第一次证明:不需要手工设计点云特征,把空间体素化后用一个可学习的 Voxel Feature Encoding(VFE)层直接从原始点学特征,再接 3D 卷积和 RPN,就能端到端做 3D 检测。它确立了"体素 + 可学习编码"这条主线,PointPillars、SECOND、CenterPoint 都是它的后裔。 + +## 一个最小公式 / Math anchor +$$ +\mathbf{f}_i = \mathrm{FCN}\big([\,\mathbf{p}_i\,;\ \mathbf{p}_i-\mathbf{v}\,]\big),\qquad +\mathbf{f}_i^{\text{out}} = \big[\,\mathbf{f}_i\,;\ \max_{j\in V}\mathbf{f}_j\,\big] +$$ +体素 $V$ 内每个点 $\mathbf{p}_i$ 先增广上"相对体素质心 $\mathbf{v}$ 的偏移",过全连接得到逐点特征 $\mathbf{f}_i$;再把体素内逐点 max-pool 出的全局特征拼回每个点(point-wise concat element-wise max)。多层 VFE 堆叠后取体素级聚合,得到稀疏 4D 张量交给 3D 卷积。这种"逐点 + 局部聚合再回灌"的设计正是 PointNet 思想在体素内的实例化。 + +## 它在图谱里的位置 +VoxelNet 是 [PointPillars](paper_pointpillars.md) 的直接前身——后者把它的 3D 卷积换成"单柱 + 2D 卷积"来提速;也是 [CenterPoint](paper_centerpoint.md) 默认 backbone(VoxelNet/稀疏卷积变体)的源头。它处于 [modular perception → planning pipeline](paradigm_modular_perception_to_planning_pipeline.md) 的 LiDAR 检测环节,回应 [label efficiency for 3D annotation](problem_label_efficiency_for_3d_annotation.md):可学习编码替代手工特征,使标注的几何信号被更充分利用。 + +## 架构 / 方法直觉 +三段式:(1) 体素分区 + VFE——把点云划成规则体素,每体素随机采样固定点数,堆叠 VFE 层学体素特征,产出稀疏 4D 张量;(2) 中间 3D 卷积——沿高度方向聚合并下采样,把稀疏体素特征压成 BEV 特征图;(3) RPN——在 BEV 上做 anchor-based 的二维卷积检测,回归 7-DoF 框。核心贡献是 VFE:在它之前点云检测普遍依赖手工占用栅格、强度直方图等特征,VoxelNet 让"从原始点到体素特征"完全可微、端到端训练。 + +## 工程上真正要注意什么 +- 原版用稠密 3D 卷积,显存与速度是硬伤;真正让这条路线落地的是后续 SECOND 引入的稀疏 3D 卷积(只在非空体素上算),二者常被并提。 +- 体素内固定采样点数 $T$(如 35)会丢点,高密度近处目标信息损失大;空体素占绝大多数,必须用稀疏数据结构存储。 +- anchor 角度只设 0°/90° 两档,斜向目标靠回归补偿;角度回归易出现"翻转 180°"的方向歧义,需加方向分类辅助 loss(SECOND 的改进点)。 +- 体素分辨率与检测范围的乘积决定 3D 张量大小,是显存爆炸的首要原因。 + +## Bitter-Lesson 视角 +VoxelNet 是这条脉络里最"bitter-lesson 正确"的一步:它把点云检测中长期依赖的手工特征工程整体替换为可学习编码,押注"让网络自己从数据学特征"。后续的演进(PointPillars 提速、SECOND 稀疏化、CenterPoint 换头)都是在它确立的可学习表示之上做工程优化,而非回退到手工特征。这正是特征学习战胜手工设计的典型轨迹。 + +## 接下来读什么 +- [PointPillars](paper_pointpillars.md) — 把 3D 卷积压成 2D 的提速版 +- [CenterPoint](paper_centerpoint.md) — 以体素 backbone 为基础换 center-based 头 +- [BEVFusion](paper_bevfusion.md) — 体素 BEV 与相机 BEV 的融合 +- [modular perception → planning pipeline](paradigm_modular_perception_to_planning_pipeline.md) — 它所处的管线 +- [problem: label efficiency for 3D annotation](problem_label_efficiency_for_3d_annotation.md) — 可学习编码的动机 diff --git a/docs/data/cards/extended/paper_ziebart_max_ent_irl.md b/docs/data/cards/extended/paper_ziebart_max_ent_irl.md new file mode 100644 index 0000000..50c8eb0 --- /dev/null +++ b/docs/data/cards/extended/paper_ziebart_max_ent_irl.md @@ -0,0 +1,50 @@ +--- +id: paper:ziebart_max_ent_irl +title: "Maximum Entropy Inverse Reinforcement Learning" +title_zh: "最大熵逆强化学习(Ziebart 等 2008)" +kind: paper +tier: A +authors: [Ziebart, B. D., Maas, A., Bagnell, J. A., Dey, A. K.] +venue: "AAAI 2008" +year: 2008 +topic: rl_foundations +phase: core +deep_links: + - {label: "AAAI 2008 PDF", url: "https://www.aaai.org/Papers/AAAI/2008/AAAI08-227.pdf"} +--- + +# 最大熵逆强化学习(Ziebart 等 2008) + +> [Ng & Russell](paper_ng_russell_2000_irl.md) 点破 IRL 不可辨识——无数 reward 都能解释同一行为。Ziebart 的转折是请出最大熵原理:在所有匹配专家特征期望的轨迹分布里,选熵最大的那个。这一刀既消解了歧义,又顺手处理了人类示教带噪、近优而非严格最优的现实。 + +## 一个最小公式 / Math anchor +$$ +P(\tau)=\frac{1}{Z(\theta)}\exp\big(\theta^{\top}\mathbf{f}_\tau\big),\qquad +\mathbf{f}_\tau=\sum_{t}\mathbf{f}(s_t,a_t),\qquad +\nabla_\theta\mathcal{L}=\tilde{\mathbf{f}}-\sum_{\tau}P(\tau)\,\mathbf{f}_\tau +$$ +轨迹 $\tau$ 的概率正比于其累计特征 $\mathbf{f}_\tau$ 在 reward 权重 $\theta$ 下的指数——reward 越高的轨迹越可能被选,但不是一定(这就是"近优带噪"的概率化)。极大似然的梯度漂亮地等于"专家经验特征期望 $\tilde{\mathbf{f}}$ 减去模型期望特征":当二者相等时拟合完成。这就是经典的 feature-matching 条件,最大熵保证在满足它的无穷多分布里挑唯一一个。 + +## 它在图谱里的位置 +Ziebart 把 [Ng & Russell](paper_ng_russell_2000_irl.md) 的不适定 IRL 升级成一个良定的概率模型,是 [Inverse RL](paper_irl.md) 现代谱系的真正起点。它的能量模型 $P(\tau)\propto e^{\theta^\top \mathbf{f}_\tau}$ 正是 [GAIL](paper_gail.md) 与 [AIRL](paper_airl.md) 对抗化所近似的对象——判别器本质在估计这个配分函数的梯度。它是 [最大熵把策略与价值缝成对偶](insight_max_entropy_closes_policy_value_duality.md) 这条洞见最干净的实例,也支撑 [人类示教其实把隐式奖励压缩在轨迹里](insight_human_demonstrations_compress_implicit_reward_function.md)。 + +## 架构 / 方法直觉 +核心计算是配分函数 $Z(\theta)$ 与状态访问频率(state visitation frequency)。论文用一个前向-后向的动态规划(类似 HMM 的 forward-backward)在确定性 MDP 上精确求出每个状态被最优-软策略访问的期望次数,从而得到模型特征期望,再做梯度上升更新 $\theta$。关键直觉是把"硬最优"换成"软最优":高 reward 轨迹概率高但不独占,这天然容纳了人类示教的次优与多样性。这一软化后来被 soft Q-learning / [SAC](paper_sac.md) 继承——最大熵 RL 与最大熵 IRL 是同一枚硬币的正反面。 + +## 工程上真正要注意什么 +- 精确版本要求枚举或 DP 求状态访问频率,只在小型/确定性 MDP 可行;大空间需用采样近似(后来的 guided cost learning、深度 maxent IRL)。 +- 仍依赖手写线性特征 $\mathbf{f}(s,a)$:reward 表达力被特征基锁死,这是它与 GAIL/AIRL 的分水岭。 +- 随机动力学下配分函数计算更棘手,原始 forward-backward 假设确定性转移。 +- 在驾驶里它是行人/车辆轨迹预测的经典工具——把"目的地"当 reward,最大熵给出多模态可达路径分布,这恰好契合预测任务要的"多种合理未来"。 + +## Bitter-Lesson 视角 +最大熵 IRL 是一次漂亮的"用原理替代任意选择":它不再像 Ng & Russell 那样靠工程师挑 margin 目标,而是用一条信息论原则(最大熵)唯一确定分布。但它仍把特征工程留给人。真正把特征也交给数据的是后来的对抗与深度版本。Ziebart 的位置因此是承上启下——它给了一个有原则、可微、能算梯度的目标,让"用算力学 reward"第一次成为可能。 + +## 接下来读什么 +- [Ng & Russell 经典 IRL](paper_ng_russell_2000_irl.md) — 它要修复的那个不可辨识问题 +- [GAIL](paper_gail.md) — 把这个能量模型对抗化、扔掉手写特征 +- [AIRL](paper_airl.md) — 对抗化且恢复可迁移 reward +- [Inverse RL 总览](paper_irl.md) — 它升级的那个问题框架 +- [SAC](paper_sac.md) — 把最大熵从 IRL 搬回 forward RL +- [最大熵把策略与价值缝成对偶](insight_max_entropy_closes_policy_value_duality.md) +- [给策略加熵奖励鼓励探索](move_add_entropy_bonus_to_encourage_exploration.md) diff --git a/docs/data/generated/round4_driving.json b/docs/data/generated/round4_driving.json new file mode 100644 index 0000000..ac448a1 --- /dev/null +++ b/docs/data/generated/round4_driving.json @@ -0,0 +1,127 @@ +{ + "$comment": "Round 4 driving-E2E/LLM-agent batch: end-to-end driving lineage (ALVINN -> LBC -> TCP -> GameFormer -> DriveGPT) and LLM-agent grounding (Codex, SayCan), plus the multi-modal-traffic insight. Nodes + at least one anchoring edge each to existing nodes.", + "nodes": [ + { + "id": "paper:alvinn", + "kind": "paper", + "tier": "S", + "topic": "e2e_ad", + "year": 1989, + "label": "ALVINN", + "label_zh": "ALVINN(最早的神经网络端到端驾驶)", + "summary_zh": "Pomerleau 1989 用单隐层全连接网络把相机图像直接映射到转向,是端到端驾驶与行为克隆的源头;并用合成偏移样本预演了协变量偏移的工程解。" + }, + { + "id": "paper:lbc", + "kind": "paper", + "tier": "A", + "topic": "e2e_ad", + "year": 2019, + "label": "Learning by Cheating", + "label_zh": "Learning by Cheating(先作弊再蒸馏的端到端驾驶)", + "summary_zh": "先训练能看仿真器真值的特权教师把决策学透,再让只吃像素的学生在自己访问的状态上蒸馏教师,相当于无限 DAgger,是 CARLA 上的强 baseline。" + }, + { + "id": "paper:tcp_carla", + "kind": "paper", + "tier": "A", + "topic": "e2e_ad", + "year": 2022, + "label": "TCP", + "label_zh": "TCP(轨迹引导的控制预测:轨迹分支 + 控制分支融合)", + "summary_zh": "把端到端驾驶的轨迹表征与底层控制表征做成同一网络的两分支并按场景自适应融合,单相机即取得当年很强的 CARLA 闭环成绩。" + }, + { + "id": "paper:gameformer", + "kind": "paper", + "tier": "A", + "topic": "planning", + "year": 2023, + "label": "GameFormer", + "label_zh": "GameFormer(用层级 transformer 做博弈式交互预测与规划)", + "summary_zh": "用层级 transformer 解码器迭代地让每个 agent 在他者上一层预测条件下更新自己,把 level-k 博弈推理可微化,统一交互预测与自车规划。" + }, + { + "id": "paper:drivegpt", + "kind": "paper", + "tier": "B", + "topic": "e2e_ad", + "year": 2023, + "label": "DriveGPT", + "label_zh": "DriveGPT(GPT 式自回归驾驶策略 / 自回归驾驶基座范式)", + "summary_zh": "把驾驶行为 token 化后用 GPT 式自回归在海量日志上做下一 token 预测,使规划变成续写;代表把 LLM 训练配方整体搬到驾驶的基座范式。" + }, + { + "id": "paper:codex", + "kind": "paper", + "tier": "B", + "topic": "llm_agent", + "year": 2021, + "label": "Codex", + "label_zh": "Codex(代码大模型:code-as-action 驾驶 agent 的底座)", + "summary_zh": "在 GPT 上用海量代码继续训练并以单元测试(pass@k)评测,把程序确立为可生成又可精确求值的动作表征,是 code-as-action 智能体的语言底座。" + }, + { + "id": "paper:saycan", + "kind": "paper", + "tier": "A", + "topic": "llm_agent", + "year": 2022, + "label": "SayCan", + "label_zh": "SayCan(用可供性把 LLM 计划接到现实:会说不等于能做)", + "summary_zh": "用 LLM 给技能打语义有用性分(say)、用价值函数给当前状态成功概率打分(can),相乘选动作,把抽象语言计划锚到物理可行集。" + }, + { + "id": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "kind": "insight", + "tier": "insight", + "topic": "planning", + "year": 2023, + "label": "Multi-modal behavior is intrinsic to traffic scenes", + "label_zh": "交通场景的多模态性是内在的", + "summary_zh": "路口的未来是几个都对的答案;单峰回归把两种合理机动平均成落在低密度/违规区的无效轨迹,因此必须用分布式/生成式规划器与熵奖励。" + } + ], + "edges": [ + {"source": "paper:alvinn", "target": "paradigm:imitation_learning", "rel": "manifests"}, + {"source": "paper:alvinn", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "motivates"}, + {"source": "paper:alvinn", "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "contrasts"}, + {"source": "paper:alvinn", "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "rel": "motivates"}, + + {"source": "paper:lbc", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "manifests"}, + {"source": "paper:lbc", "target": "paper:alvinn", "rel": "extends"}, + {"source": "paper:lbc", "target": "paper:carla_leaderboard", "rel": "validates"}, + {"source": "paper:lbc", "target": "paradigm:imitation_learning", "rel": "manifests"}, + + {"source": "paper:tcp_carla", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "manifests"}, + {"source": "paper:tcp_carla", "target": "paper:lbc", "rel": "extends"}, + {"source": "paper:tcp_carla", "target": "paper:carla_leaderboard", "rel": "validates"}, + {"source": "paper:tcp_carla", "target": "paper:bench2drive", "rel": "feeds"}, + + {"source": "paper:gameformer", "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", "rel": "covers"}, + {"source": "paper:gameformer", "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "rel": "manifests"}, + {"source": "paper:gameformer", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "extends"}, + {"source": "paper:gameformer", "target": "paper:tcp_carla", "rel": "contrasts"}, + + {"source": "paper:drivegpt", "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "manifests"}, + {"source": "paper:drivegpt", "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", "rel": "manifests"}, + {"source": "paper:drivegpt", "target": "paper:gpt3", "rel": "extends"}, + {"source": "paper:drivegpt", "target": "paradigm:imitation_learning", "rel": "composes"}, + {"source": "paper:drivegpt", "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "rel": "manifests"}, + + {"source": "paper:codex", "target": "paper:gpt3", "rel": "extends"}, + {"source": "paper:codex", "target": "paradigm:llm_agent_paradigm", "rel": "enables"}, + {"source": "paper:codex", "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", "rel": "manifests"}, + {"source": "paper:codex", "target": "paradigm:knowledge_driven_reflective_agent", "rel": "enables"}, + + {"source": "paper:saycan", "target": "paradigm:llm_agent_paradigm", "rel": "manifests"}, + {"source": "paper:saycan", "target": "problem:grounding_language_token_to_continuous_physical_world", "rel": "covers"}, + {"source": "paper:saycan", "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", "rel": "manifests"}, + {"source": "paper:saycan", "target": "paper:rt2", "rel": "parallel"}, + {"source": "paper:saycan", "target": "paper:codex", "rel": "contrasts"}, + + {"source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "target": "move:add_entropy_bonus_to_encourage_exploration", "rel": "motivates"}, + {"source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", "rel": "feeds"}, + {"source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", "rel": "feeds"} + ] +} diff --git a/docs/data/generated/round4_irl.json b/docs/data/generated/round4_irl.json new file mode 100644 index 0000000..e9b0b53 --- /dev/null +++ b/docs/data/generated/round4_irl.json @@ -0,0 +1,45 @@ +{ + "$comment": "Round 4 IRL/imitation batch", + "nodes": [ + {"id":"paper:gail","kind":"paper","tier":"A","topic":"deep_rl","year":2016,"label":"GAIL","label_zh":"GAIL(生成对抗模仿学习)","summary_zh":"把模仿写成占用度量匹配的 GAN 博弈,跳过显式 reward 直接训出闭环策略;用判别器吃掉特征工程,缓解 BC 的复合误差。"}, + {"id":"paper:airl","kind":"paper","tier":"A","topic":"deep_rl","year":2018,"label":"AIRL","label_zh":"AIRL(对抗逆强化学习)","summary_zh":"给 GAIL 的判别器一个 reward+势函数 shaping 结构,最优时分解出对动力学变化鲁棒、可迁移的奖励函数。"}, + {"id":"paper:ng_russell_2000_irl","kind":"paper","tier":"A","topic":"rl_foundations","year":2000,"label":"Ng & Russell IRL","label_zh":"逆强化学习的算法(Ng & Russell 2000)","summary_zh":"IRL 奠基作:把'从行为反推 reward'形式化为线性规划,并点破 reward 不可辨识这一内在病灶与 reward shaping 不变性。"}, + {"id":"paper:ziebart_max_ent_irl","kind":"paper","tier":"A","topic":"rl_foundations","year":2008,"label":"MaxEnt IRL","label_zh":"最大熵逆强化学习(Ziebart 2008)","summary_zh":"用最大熵原理在无数等价 reward 中挑唯一解,把 IRL 升级为良定的能量模型,并天然容纳人类示教的次优与带噪。"}, + {"id":"paper:irl","kind":"paper","tier":"B","topic":"rl_foundations","year":2000,"label":"Inverse RL","label_zh":"逆强化学习问题(总览)","summary_zh":"把 RL 的箭头反过来——给定专家行为反推其优化的 reward;问题天生不适定,全部算法的核心在于用什么原则从解族里挑一个。"}, + {"id":"paper:ross_bagnell_2010","kind":"paper","tier":"A","topic":"rl_foundations","year":2010,"label":"Efficient Reductions for IL","label_zh":"模仿学习的高效归约(Ross & Bagnell 2010)","summary_zh":"证明朴素 BC 的闭环误差以 O(εT²) 复合,并给出首个线性 O(εT) 的 forward training 归约,为 DAgger 奠定理论地基。"}, + {"id":"insight:policy_improvement_bounded_by_distribution_shift","kind":"insight","topic":"rl_foundations","year":2010,"label":"改进受分布偏移上界锁死","label_zh":"策略改进的上限被分布偏移锁死","summary_zh":"模仿、信赖域策略梯度、离线 RL 共享同一不等式:用旧分布评估新策略的可靠度正比于新旧分布接近度,分布偏移是改进的硬天花板。"}, + {"id":"insight:max_entropy_closes_policy_value_duality","kind":"insight","topic":"rl_foundations","year":2008,"label":"最大熵缝合策略价值对偶","label_zh":"最大熵把策略与价值缝成一对偶","summary_zh":"加熵正则后最优策略变成价值的 softmax,配分函数把 reward、价值、策略绑定,使 RL(正向)与 IRL(逆向)成为同一对偶的两面。"} + ], + "edges": [ + {"source":"paper:gail","target":"problem:behavior_cloning_compounds_errors_over_time","rel":"motivates"}, + {"source":"paper:gail","target":"paradigm:imitation_learning","rel":"extends"}, + {"source":"paper:gail","target":"insight:human_demonstrations_compress_implicit_reward_function","rel":"manifests"}, + {"source":"paper:gail","target":"move:add_entropy_bonus_to_encourage_exploration","rel":"composes"}, + {"source":"paper:gail","target":"insight:max_entropy_closes_policy_value_duality","rel":"manifests"}, + {"source":"paper:airl","target":"paper:gail","rel":"extends"}, + {"source":"paper:airl","target":"paper:ziebart_max_ent_irl","rel":"extends"}, + {"source":"paper:airl","target":"insight:human_demonstrations_compress_implicit_reward_function","rel":"manifests"}, + {"source":"paper:airl","target":"paper:irl","rel":"feeds"}, + {"source":"paper:ng_russell_2000_irl","target":"paper:irl","rel":"covers"}, + {"source":"paper:ng_russell_2000_irl","target":"insight:human_demonstrations_compress_implicit_reward_function","rel":"manifests"}, + {"source":"paper:ziebart_max_ent_irl","target":"paper:ng_russell_2000_irl","rel":"extends"}, + {"source":"paper:ziebart_max_ent_irl","target":"insight:max_entropy_closes_policy_value_duality","rel":"manifests"}, + {"source":"paper:ziebart_max_ent_irl","target":"paper:sac","rel":"parallel"}, + {"source":"paper:ziebart_max_ent_irl","target":"paper:irl","rel":"feeds"}, + {"source":"paper:irl","target":"paradigm:imitation_learning","rel":"contrasts"}, + {"source":"paper:irl","target":"insight:human_demonstrations_compress_implicit_reward_function","rel":"manifests"}, + {"source":"paper:ross_bagnell_2010","target":"problem:behavior_cloning_compounds_errors_over_time","rel":"validates"}, + {"source":"paper:ross_bagnell_2010","target":"paper:ross2011_dagger","rel":"motivates"}, + {"source":"paper:ross_bagnell_2010","target":"insight:imitation_learning_alone_cannot_recover_from_compounding_errors","rel":"manifests"}, + {"source":"paper:ross_bagnell_2010","target":"insight:policy_improvement_bounded_by_distribution_shift","rel":"feeds"}, + {"source":"insight:policy_improvement_bounded_by_distribution_shift","target":"paper:ross_bagnell_2010","rel":"manifests"}, + {"source":"insight:policy_improvement_bounded_by_distribution_shift","target":"move:trust_region_step_for_monotonic_improvement","rel":"manifests"}, + {"source":"insight:policy_improvement_bounded_by_distribution_shift","target":"paradigm:offline_rl","rel":"manifests"}, + {"source":"insight:policy_improvement_bounded_by_distribution_shift","target":"insight:imitation_learning_alone_cannot_recover_from_compounding_errors","rel":"feeds"}, + {"source":"insight:policy_improvement_bounded_by_distribution_shift","target":"validation:trace_dataset_aggregation_for_imitation","rel":"manifests"}, + {"source":"insight:max_entropy_closes_policy_value_duality","target":"paper:ziebart_max_ent_irl","rel":"manifests"}, + {"source":"insight:max_entropy_closes_policy_value_duality","target":"paper:sac","rel":"manifests"}, + {"source":"insight:max_entropy_closes_policy_value_duality","target":"move:add_entropy_bonus_to_encourage_exploration","rel":"manifests"}, + {"source":"insight:max_entropy_closes_policy_value_duality","target":"insight:human_demonstrations_compress_implicit_reward_function","rel":"composes"} + ] +} diff --git a/docs/data/generated/round4_orphans.json b/docs/data/generated/round4_orphans.json new file mode 100644 index 0000000..715dfcc --- /dev/null +++ b/docs/data/generated/round4_orphans.json @@ -0,0 +1,13 @@ +{ + "$comment": "Adopt 3 remaining orphan move cards as nodes so their prose is reachable.", + "nodes": [ + {"id": "move:learn_set_predictor_with_hungarian_matching", "kind": "move", "topic": "scene_understanding"}, + {"id": "move:share_object_query_across_tasks_for_e2e_planning", "kind": "move", "topic": "e2e_ad"}, + {"id": "move:learn_open_vocabulary_classifier_via_language_anchor", "kind": "move", "topic": "foundation_models"} + ], + "edges": [ + {"source": "move:learn_set_predictor_with_hungarian_matching", "target": "paper:carion2020", "rel": "composes"}, + {"source": "move:share_object_query_across_tasks_for_e2e_planning", "target": "paper:2212.10156", "rel": "composes"}, + {"source": "move:learn_open_vocabulary_classifier_via_language_anchor", "target": "paper:sam", "rel": "composes"} + ] +} diff --git a/docs/data/generated/round4_perception.json b/docs/data/generated/round4_perception.json new file mode 100644 index 0000000..e84612a --- /dev/null +++ b/docs/data/generated/round4_perception.json @@ -0,0 +1,38 @@ +{ + "$comment": "Round 4 perception/SSL batch — 3D detection backbones (VoxelNet→PointPillars→CenterPoint), foundation vision/SSL (AlexNet, BYOL, VICReg), language/attention origins (BERT, Bahdanau attention). Nodes anchor into existing detection, BEV, SSL and attention nodes.", + "nodes": [ + {"id": "paper:pointpillars", "kind": "paper", "tier": "A", "topic": "scene_understanding", "year": 2019, "label": "PointPillars", "label_zh": "PointPillars(点云柱状编码)", "summary_zh": "把点云在 z 轴不切分、只在 BEV 平面划成柱,每柱用微型 PointNet 编码成固定向量并散射成伪图像,之后全走 2D 卷积。用放弃高度体素化换来纯 2D 卷积的速度,长期是工业界实时 LiDAR 检测的事实基线。"}, + {"id": "paper:voxelnet", "kind": "paper", "tier": "A", "topic": "scene_understanding", "year": 2018, "label": "VoxelNet", "label_zh": "VoxelNet(体素特征端到端学习)", "summary_zh": "第一次证明不需手工点云特征:体素化后用可学习的 Voxel Feature Encoding 直接从原始点学特征,再接 3D 卷积与 RPN 端到端检测。确立了体素 + 可学习编码这条主线,PointPillars、SECOND、CenterPoint 皆为其后裔。"}, + {"id": "paper:centerpoint", "kind": "paper", "tier": "A", "topic": "scene_understanding", "year": 2021, "label": "CenterPoint", "label_zh": "CenterPoint(中心点 3D 检测与跟踪)", "summary_zh": "把 CenterNet 的中心点范式搬到点云 BEV:在热力图上预测目标中心,再回归尺寸、朝向、高度与速度,彻底去掉 anchor 与 NMS。anchor-free 天然处理任意朝向,速度回归又让多目标跟踪近乎免费。"}, + {"id": "paper:krizhevsky2012", "kind": "paper", "tier": "S", "topic": "ssl_vision", "year": 2012, "label": "AlexNet", "label_zh": "AlexNet(ImageNet 时刻)", "summary_zh": "2012 年以巨大优势夺得 ImageNet 冠军,第一次让大数据 + GPU + 深度卷积网络在公开基准上压倒手工特征流水线。其具体技术会过时,但它点燃的范式转移——表示学习取代特征工程——是整个深度学习浪潮的起点。"}, + {"id": "paper:bert", "kind": "paper", "tier": "S", "topic": "foundation_models", "year": 2019, "label": "BERT", "label_zh": "BERT(掩码语言建模)", "summary_zh": "用随机遮词、靠双向上下文填回的极简自监督目标在海量无标注文本上预训练通用语言表示,微调即横扫下游任务。把掩码预测确立为可规模化的自监督信号,此思想随后被 MAE 等整体搬到视觉。"}, + {"id": "paper:byol", "kind": "paper", "tier": "A", "topic": "ssl_vision", "year": 2020, "label": "BYOL", "label_zh": "BYOL(无负样本自监督)", "summary_zh": "打破对比学习必须有负样本的共识:用同图两增广视图,让 online 网络预测缓慢更新的 target 网络表示。靠预测头不对称 + 动量目标 + 停梯度防止坍缩,无需任何负样本即可学出可迁移表示。"}, + {"id": "paper:vicreg", "kind": "paper", "tier": "A", "topic": "ssl_vision", "year": 2022, "label": "VICReg", "label_zh": "VICReg(方差-不变性-协方差正则)", "summary_zh": "把防坍缩从负样本或架构技巧改为三条显式正则:不变性拉近配对表示、方差保证每维散度、协方差去除维度间冗余。无需负样本、动量与预测头,两分支还可异构,把防坍缩从隐晦工程抬升为可分析的目标函数。"}, + {"id": "paper:bahdanau2014_attention", "kind": "paper", "tier": "S", "topic": "foundation_models", "year": 2015, "label": "Bahdanau Attention", "label_zh": "Bahdanau 注意力(对齐即翻译)", "summary_zh": "解决 encoder-decoder 翻译把整句压进定长向量的瓶颈:decoder 生成每词时用可学习对齐打分动态加权所有源端隐状态,得到该步专属上下文向量。注意力机制由此诞生,Transformer 只是把它推到极致。"} + ], + "edges": [ + {"source": "paper:pointpillars", "target": "paper:voxelnet", "rel": "extends"}, + {"source": "paper:pointpillars", "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "composes"}, + {"source": "paper:pointpillars", "target": "paper:bevfusion", "rel": "feeds"}, + {"source": "paper:voxelnet", "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "composes"}, + {"source": "paper:voxelnet", "target": "problem:label_efficiency_for_3d_annotation", "rel": "motivates"}, + {"source": "paper:centerpoint", "target": "paper:voxelnet", "rel": "extends"}, + {"source": "paper:centerpoint", "target": "paper:pointpillars", "rel": "extends"}, + {"source": "paper:centerpoint", "target": "paper:detr3d", "rel": "parallel"}, + {"source": "paper:centerpoint", "target": "paper:bevfusion", "rel": "feeds"}, + {"source": "paper:krizhevsky2012", "target": "paper:he2015_resnet", "rel": "prereq"}, + {"source": "paper:krizhevsky2012", "target": "paper:vit", "rel": "prereq"}, + {"source": "paper:krizhevsky2012", "target": "paradigm:scaling_data_with_self_supervision", "rel": "motivates"}, + {"source": "paper:bert", "target": "paper:vaswani2017", "rel": "extends"}, + {"source": "paper:bert", "target": "insight:masked_prediction_yields_self_supervised_signal", "rel": "manifests"}, + {"source": "paper:bert", "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes"}, + {"source": "paper:byol", "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes"}, + {"source": "paper:byol", "target": "paper:dinov2", "rel": "feeds"}, + {"source": "paper:byol", "target": "problem:label_efficiency_for_3d_annotation", "rel": "motivates"}, + {"source": "paper:vicreg", "target": "paper:byol", "rel": "contrasts"}, + {"source": "paper:vicreg", "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes"}, + {"source": "paper:bahdanau2014_attention", "target": "paper:vaswani2017", "rel": "prereq"}, + {"source": "paper:bahdanau2014_attention", "target": "insight:attention_is_typed_entity_communication", "rel": "motivates"}, + {"source": "paper:bahdanau2014_attention", "target": "paper:mamba", "rel": "contrasts"} + ] +} diff --git a/docs/data/generated/round4_rlinfra.json b/docs/data/generated/round4_rlinfra.json new file mode 100644 index 0000000..5025108 --- /dev/null +++ b/docs/data/generated/round4_rlinfra.json @@ -0,0 +1,51 @@ +{ + "$comment": "Round 4 RL-infra/world-model batch: GAE, D4RL, Levine offline-RL tutorial, Spinning Up, preference learning (RLHF seed), PlaNet, and the Jensen-gap overestimation insight.", + "nodes": [ + {"id": "paper:schulman2016_gae", "kind": "paper", "tier": "A", "topic": "deep_rl", "year": 2016, "label": "GAE", "label_zh": "GAE(广义优势估计)", "summary_zh": "用一个标量 λ 把单步 TD(低方差有偏)和蒙特卡洛回报(无偏高方差)连续插值,给优势估计一个 bias/variance 旋钮,是 TRPO/PPO 默认的优势计算方式。"}, + {"id": "paper:d4rl", "kind": "paper", "tier": "A", "topic": "evaluation_benchmark", "year": 2020, "label": "D4RL", "label_zh": "D4RL(离线 RL 基准)", "summary_zh": "把离线 RL 的数据集、环境与归一化打分标准化成一套公共基准,并刻意构造次优、窄覆盖、需缝合的数据分布来暴露算法弱点,让进展可横向比较。"}, + {"id": "paper:levine_offline_rl_tutorial", "kind": "paper", "tier": "B", "topic": "deep_rl", "year": 2020, "label": "Offline RL Tutorial", "label_zh": "离线 RL 综述(Levine 等)", "summary_zh": "把离线 RL 的全部困难归结为分布偏移,系统梳理策略约束、价值正则、不确定性惩罚、模型基与序列建模五类应对,并诚实列出离线模型选择等开放问题。"}, + {"id": "paper:spinning_up", "kind": "paper", "tier": "B", "topic": "deep_rl", "year": 2018, "label": "Spinning Up", "label_zh": "Spinning Up in Deep RL", "summary_zh": "OpenAI 的深度 RL 教学资源,把策略梯度、信赖域、actor-critic 一族算法统一在一个梯度式下并配极简可读实现,是从读公式到跑代码的最快入口。"}, + {"id": "paper:preference_learning", "kind": "paper", "tier": "A", "topic": "alignment", "year": 2017, "label": "Deep RL from Human Preferences", "label_zh": "从人类偏好中学习奖励", "summary_zh": "放弃手写奖励,让人类对成对行为片段做偏好比较,用 Bradley–Terry 模型拟合奖励再做 RL;以极少标注学会难以言说的目标,是 RLHF 整条路线的种子。"}, + {"id": "paper:planet", "kind": "paper", "tier": "A", "topic": "world_models", "year": 2019, "label": "PlaNet", "label_zh": "PlaNet(潜空间动力学 + 规划)", "summary_zh": "纯从像素学一个结合确定性记忆与随机隐变量的潜动力学模型 RSSM,并完全在潜空间用 CEM 规划解决连续控制,样本效率高一个数量级,是 Dreamer 的前身。"}, + {"id": "insight:q_learning_max_is_optimistically_biased", "kind": "insight", "tier": "A", "topic": "deep_rl", "year": 2010, "label": "Q-learning's max is optimistically biased", "label_zh": "Q-learning 的 max 是系统性乐观偏差", "summary_zh": "由 Jensen 不等式 E[max]≥max E,对噪声 Q 取 max 在期望上必然高估真实价值;自举放大此偏差,是 DQN 不稳定的元凶,也是 Double-Q 的动机。"} + ], + "edges": [ + {"source": "paper:schulman2016_gae", "target": "paper:schulman2017_ppo", "rel": "feeds"}, + {"source": "paper:schulman2016_gae", "target": "paper:schulman2015_trpo", "rel": "feeds"}, + {"source": "paper:schulman2016_gae", "target": "paper:sac", "rel": "parallel"}, + + {"source": "paper:d4rl", "target": "paradigm:offline_rl", "rel": "validates"}, + {"source": "paper:d4rl", "target": "paper:cql", "rel": "validates"}, + {"source": "paper:d4rl", "target": "paper:iql", "rel": "validates"}, + {"source": "paper:d4rl", "target": "paper:decision_transformer", "rel": "validates"}, + {"source": "paper:d4rl", "target": "paper:levine_offline_rl_tutorial", "rel": "parallel"}, + + {"source": "paper:levine_offline_rl_tutorial", "target": "paradigm:offline_rl", "rel": "covers"}, + {"source": "paper:levine_offline_rl_tutorial", "target": "paper:cql", "rel": "covers"}, + {"source": "paper:levine_offline_rl_tutorial", "target": "paper:iql", "rel": "covers"}, + + {"source": "paper:spinning_up", "target": "paper:schulman2016_gae", "rel": "covers"}, + {"source": "paper:spinning_up", "target": "paper:schulman2017_ppo", "rel": "covers"}, + {"source": "paper:spinning_up", "target": "paper:schulman2015_trpo", "rel": "covers"}, + {"source": "paper:spinning_up", "target": "paper:sac", "rel": "covers"}, + {"source": "paper:spinning_up", "target": "paper:mnih2015_dqn", "rel": "covers"}, + + {"source": "paper:preference_learning", "target": "paper:rlhf_dpo", "rel": "feeds"}, + {"source": "paper:preference_learning", "target": "paper:schulman2017_ppo", "rel": "feeds"}, + + {"source": "paper:planet", "target": "paper:dreamer_v2", "rel": "feeds"}, + {"source": "paper:planet", "target": "paper:dreamer_v3", "rel": "feeds"}, + {"source": "paper:planet", "target": "paradigm:world_model_paradigm", "rel": "manifests"}, + {"source": "paper:planet", "target": "paradigm:model_based_rl", "rel": "manifests"}, + {"source": "paper:planet", "target": "paper:world_models", "rel": "extends"}, + {"source": "paper:planet", "target": "move:latent_imagination_rollout", "rel": "manifests"}, + {"source": "paper:planet", "target": "insight:world_models_let_planning_be_done_in_imagination", "rel": "manifests"}, + {"source": "paper:planet", "target": "paper:muzero", "rel": "parallel"}, + + {"source": "insight:q_learning_max_is_optimistically_biased", "target": "move:double_q_to_reduce_overestimation", "rel": "motivates"}, + {"source": "insight:q_learning_max_is_optimistically_biased", "target": "move:bootstrap_target_network_to_stabilize_off_policy_learning", "rel": "parallel"}, + {"source": "insight:q_learning_max_is_optimistically_biased", "target": "paper:mnih2015_dqn", "rel": "motivates"}, + {"source": "insight:q_learning_max_is_optimistically_biased", "target": "paper:sac", "rel": "manifests"}, + {"source": "insight:q_learning_max_is_optimistically_biased", "target": "paradigm:offline_rl", "rel": "motivates"} + ] +} diff --git a/docs/data/graph_extended.json b/docs/data/graph_extended.json index 1855286..0af6151 100644 --- a/docs/data/graph_extended.json +++ b/docs/data/graph_extended.json @@ -14,7 +14,7 @@ "labs": [ "lab:lab03" ], - "degree": 25 + "degree": 26 }, { "id": "paper:2210.14222", @@ -209,7 +209,7 @@ "phase": "prereq", "year": 2017, "card": "paper_vaswani2017.md", - "degree": 25 + "degree": 27 }, { "id": "paper:vit", @@ -221,7 +221,7 @@ "phase": "prereq", "year": 2020, "card": "paper_vit.md", - "degree": 21 + "degree": 22 }, { "id": "paper:carion2020", @@ -233,7 +233,7 @@ "phase": "prereq", "year": 2020, "card": "paper_carion2020.md", - "degree": 14 + "degree": 15 }, { "id": "paper:he2015_resnet", @@ -245,7 +245,7 @@ "phase": "prereq", "year": 2015, "card": "paper_he2015_resnet.md", - "degree": 6 + "degree": 7 }, { "id": "paper:gpt3", @@ -257,7 +257,7 @@ "phase": "prereq", "year": 2020, "card": "paper_gpt3.md", - "degree": 27 + "degree": 29 }, { "id": "paper:schulman2017_ppo", @@ -269,7 +269,7 @@ "phase": "core", "year": 2017, "card": "paper_schulman2017_ppo.md", - "degree": 13 + "degree": 16 }, { "id": "paper:mnih2015_dqn", @@ -281,7 +281,7 @@ "phase": "prereq", "year": 2015, "card": "paper_mnih2015_dqn.md", - "degree": 12 + "degree": 14 }, { "id": "paper:ross2011_dagger", @@ -296,7 +296,7 @@ "labs": [ "lab:lab02" ], - "degree": 9 + "degree": 10 }, { "id": "paper:silver2017_alphazero", @@ -392,7 +392,7 @@ "phase": "prereq", "year": 2023, "card": "paper_dinov2.md", - "degree": 15 + "degree": 16 }, { "id": "paper:sam", @@ -404,7 +404,7 @@ "phase": "prereq", "year": 2023, "card": "paper_sam.md", - "degree": 5 + "degree": 6 }, { "id": "paper:sutton_barto", @@ -428,7 +428,7 @@ "phase": "core", "year": 2023, "card": "paper_rlhf_dpo.md", - "degree": 14 + "degree": 15 }, { "id": "paper:world_models", @@ -440,7 +440,7 @@ "phase": "core", "year": 2018, "card": "paper_world_models.md", - "degree": 19 + "degree": 20 }, { "id": "paper:mamba", @@ -452,7 +452,7 @@ "phase": "frontier", "year": 2023, "card": "paper_mamba.md", - "degree": 4 + "degree": 5 }, { "id": "paper:diffuser", @@ -944,7 +944,7 @@ "year": 2019, "summary_zh": "MuZero 把基于模型的强化学习推到了不需要事先知道环境规则的程度。它在抽象隐空间里同时学习一个表示网络、一个动力学转移网络和一个预测网络,然后在这个隐空间里跑蒙特卡洛树搜索做规划,从而在围棋、国际象棋、将棋以及雅达利游戏上同时取得当时最强的成绩。", "label": "MuZero", - "degree": 8 + "degree": 9 }, { "id": "paper:dreamer_v2", @@ -956,7 +956,7 @@ "year": 2020, "summary_zh": "DreamerV2 在 PlaNet 和 DreamerV1 的基础上把世界模型的隐状态改成离散随机变量,并配合 KL 平衡和直通梯度估计,使得在雅达利套件上仅凭想象中的回放就能训练出与无模型强者相当的策略。它第一次证明纯粹在世界模型内部的想象中训练就可以超过同等数据预算的无模型方法。", "label": "DreamerV2", - "degree": 8 + "degree": 9 }, { "id": "paper:dreamer_v3", @@ -968,7 +968,7 @@ "year": 2023, "summary_zh": "DreamerV3 通过对回报、价值和奖励做对称对数变换以及一系列规范化技巧,让同一套超参数无需调参就能跨越雅达利、ProcGen、DMLab、Minecraft 等数十个不同动力学的任务取得领先成绩。它把世界模型方法从精细调参的研究原型变成了一个真正可以照搬使用的通用基线。", "label": "DreamerV3", - "degree": 8 + "degree": 9 }, { "id": "paper:iris_world_model", @@ -992,7 +992,7 @@ "year": 2018, "summary_zh": "SAC 把最大熵强化学习推广到连续动作空间,让策略在最大化期望回报的同时也最大化策略的熵。它自动调节温度系数来控制探索强度,配合双 Q 评论员减小过估计偏差,成为连续控制领域最稳定也最常用的基线算法之一。", "label": "Soft Actor-Critic", - "degree": 10 + "degree": 15 }, { "id": "paper:td3", @@ -1052,7 +1052,7 @@ "year": 2021, "summary_zh": "Decision Transformer 用一个简单的 GPT 风格 transformer,把过去的回报到 go、状态和动作组成的序列直接当成语言来建模,预测下一个动作。它完全跳过了价值函数和策略梯度,只靠监督式的下一个 token 预测就在 D4RL 等离线数据集上达到与离线 RL 专门算法相当的水平。", "label": "Decision Transformer", - "degree": 7 + "degree": 8 }, { "id": "paper:trajectory_transformer", @@ -1100,7 +1100,7 @@ "year": 2020, "summary_zh": "CQL 在离线强化学习的 Q 损失里额外加了一个项,使得对数据外动作的 Q 估计被显式压低,从而得到真实 Q 的下界。这种保守化让从离线数据学到的策略在部署时不再倾向于挑那些没见过却看起来高分的动作,极大地缓解了离线 RL 的分布偏移问题。", "label": "Conservative Q-Learning", - "degree": 6 + "degree": 8 }, { "id": "paper:iql", @@ -1112,7 +1112,7 @@ "year": 2021, "summary_zh": "IQL 用一个 expectile 回归来估计动作分布在状态下的高分位 Q,从而完全避免在 Bellman 备份里采样数据外动作。配合一个用优势加权的策略提取步骤,它在不显式接触分布外动作的情况下做出隐式的最大化,是当下最稳健的离线 RL 算法之一。", "label": "Implicit Q-Learning", - "degree": 7 + "degree": 9 }, { "id": "paper:calql", @@ -1448,7 +1448,7 @@ "year": 2015, "summary_zh": "为价值函数维护一份缓慢更新或定期复制的目标参数,让 Bellman 备份的目标值在短时间内保持不变。这显著抑制了 Q 学习在深度网络上常见的发散现象。", "label": "Use a target network to stabilize bootstrapping", - "degree": 4 + "degree": 5 }, { "id": "move:add_entropy_bonus_to_encourage_exploration", @@ -1460,7 +1460,7 @@ "year": 2017, "summary_zh": "在策略优化的目标里加入策略熵项,鼓励策略在高回报附近仍保留一定随机性。这一动作既缓解了局部最优陷阱,也让最大熵强化学习有了原则性的损失函数定义。", "label": "Add entropy bonus to encourage exploration", - "degree": 4 + "degree": 7 }, { "id": "move:turn_offline_dataset_into_supervised_sequence_prediction", @@ -1616,7 +1616,7 @@ "year": 2016, "summary_zh": "维护两个独立训练的 Q 网络,并用其中一个选取动作、另一个估值,或简单地在两者中取较小值。该动作有效地消除了 Q 学习以 max 操作为代价付出的系统性高估。", "label": "Double Q to reduce overestimation", - "degree": 5 + "degree": 6 }, { "id": "move:expert_iteration_self_distillation", @@ -1652,7 +1652,7 @@ "year": 2015, "summary_zh": "把策略更新限制在新旧策略 KL 散度不超过给定阈值的范围内,给出有理论保证的近似单调改进。从 TRPO 的硬约束到 PPO 的截断比都是这一动作的不同实现形式。", "label": "Trust region step for monotonic improvement", - "degree": 5 + "degree": 6 }, { "id": "move:expectile_or_quantile_target_for_distributional_robustness", @@ -1808,7 +1808,7 @@ "year": 2021, "summary_zh": "在城市路口或匝道,自车决策与他车行为相互耦合,单边的预测或单边的规划都无法捕捉真实博弈过程。如何在不让模型规模与计算复杂度爆炸的情况下表达这种多智能体交互,是行为预测和规划的关键挑战。", "label": "Multi-agent interaction modeling in dense traffic", - "degree": 2 + "degree": 4 }, { "id": "problem:rare_event_evaluation_with_no_ground_truth", @@ -1856,7 +1856,7 @@ "year": 2010, "summary_zh": "纯监督式的行为克隆只看见专家轨迹,一旦在部署时偏离哪怕一点点就会进入训练分布之外,下一步又在更偏的位置预测,错误像雪球一样滚大。这是 DAgger、对抗模仿学习等大量后续方法所共同针对的问题。", "label": "Behavior cloning compounds errors over time", - "degree": 4 + "degree": 6 }, { "id": "problem:reward_hacking_in_learned_objectives", @@ -1880,7 +1880,7 @@ "year": 2011, "summary_zh": "由于训练阶段只看专家状态而部署阶段必须自己应对自己产生的状态,模仿学习的误差会沿时间复合。任何想要把模仿学习推到长视野任务的方法都必须显式补救这一点,要么通过交互式重标注,要么通过引入价值或世界模型。", "label": "Imitation learning alone cannot recover from compounding errors", - "degree": 1 + "degree": 3 }, { "id": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", @@ -1904,7 +1904,7 @@ "year": 2016, "summary_zh": "一组好的示教不只是状态-动作对,更是对某个未明说的奖励函数的优解。一旦认识到这点,逆强化学习、偏好学习、扩散策略都可以被理解为不同方式去解码这份隐式奖励。", "label": "Human demonstrations compress an implicit reward function", - "degree": 2 + "degree": 7 }, { "id": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", @@ -1940,7 +1940,7 @@ "year": 2021, "summary_zh": "一旦把状态、动作和奖励都编码成离散 token,规划就变成了一个序列生成问题,可以直接套用 transformer、束搜索、扩散采样等成熟工具。这是 Decision Transformer、Trajeglish、CodeTrajectory 等工作背后的共同范式。", "label": "Tokenized trajectories let planning borrow from language modeling", - "degree": 3 + "degree": 5 }, { "id": "insight:bigger_model_plus_more_data_beats_clever_priors", @@ -1976,7 +1976,7 @@ "year": 2018, "summary_zh": "基于模型的 RL 把环境的转移和奖励学习成一个可查询的模型,再在该模型内用规划或想象训练策略。它的关键卖点是样本效率,代价是要承担模型偏差带来的策略偏离。", "label": "Model-based RL", - "degree": 8 + "degree": 9 }, { "id": "paradigm:model_free_rl", @@ -2000,7 +2000,7 @@ "year": 2020, "summary_zh": "离线 RL 从一份固定的历史数据集中学习策略,部署前不再与环境交互。它把强化学习与监督式机器学习的实践模式拉近,但必须正面解决分布外动作带来的过估计问题。", "label": "Offline RL", - "degree": 5 + "degree": 9 }, { "id": "paradigm:imitation_learning", @@ -2012,7 +2012,7 @@ "year": 1989, "summary_zh": "模仿学习把策略学习当作监督学习:用专家轨迹做标签训练策略去复现专家行为。它实现简单、训练稳定,但在分布偏移和奖励缺失两方面有原则性的限制。", "label": "Imitation learning", - "degree": 6 + "degree": 11 }, { "id": "paradigm:optimal_control", @@ -2276,7 +2276,7 @@ "year": 2023, "summary_zh": "RT-2 把动作输出重新编码成 PaLI-X 视觉语言模型词表中的文本 token,让一个原本只做视觉问答的大模型直接生成机器人动作序列。这一动作 token 化思想把控制问题转写成统一的自回归生成,是当前 VLA 范式的奠基工作。", "label": "RT-2", - "degree": 10 + "degree": 11 }, { "id": "paper:rtx", @@ -2888,7 +2888,7 @@ "year": 2023, "summary_zh": "语言模型在离散符号空间中训练,但要驱动机械臂或车辆就必须输出有度量含义的连续量,这之间存在天然的语义到物理的接地鸿沟。如何系统性地学到这种接地,而不是依赖手工动作 token 表,是 VLA 与具身智能的根本未解问题。", "label": "Grounding language to physical world", - "degree": 3 + "degree": 4 }, { "id": "problem:latency_budget_for_large_model_in_realtime_control", @@ -3044,7 +3044,7 @@ "year": 2023, "summary_zh": "一旦语言模型可以生成结构化函数调用并消费其返回值,它就从无状态文本生成器变成了可以查询事实、操作仿真器与控制车辆的环境接地执行者。这一洞察是 Toolformer、ReAct 与 Agent-Driver 共同的方法论核心,也把对齐重心从输出文本搬到了选择动作。", "label": "Tool use extends LM into actor", - "degree": 3 + "degree": 5 }, { "id": "insight:counterfactual_replanning_separates_intent_from_execution", @@ -3140,7 +3140,7 @@ "year": 2024, "summary_zh": "世界模型范式主张把环境动力学独立学到一个可被规划器查询或可被策略联合训练的生成模型里,从而把决策与感知解耦。从 Ha 与 Schmidhuber 的 World Models 到 Dreamer、GAIA-1、DriveDreamer 和 Cosmos,这一范式贯穿强化学习与自动驾驶研究的多个时代。", "label": "World Model Paradigm", - "degree": 5 + "degree": 6 }, { "id": "paradigm:llm_agent_paradigm", @@ -3152,7 +3152,7 @@ "year": 2024, "summary_zh": "大模型代理范式把语言模型当成具备状态、工具、记忆的通用决策器,并通过工具循环、反思与层次分解构造长时域行为。它在驾驶领域以 Agent-Driver、DiLu、DriveVLM-Dual 等形态出现,是把语言模型常识转化为可执行驾驶策略的主要研究路径。", "label": "LLM Agent Paradigm", - "degree": 5 + "degree": 7 }, { "id": "paradigm:vla_paradigm", @@ -3385,7 +3385,7 @@ "summary_zh": "潜空间想象让策略和价值在一个学到的紧凑潜世界模型中进行多步推演,而非在像素或原始传感空间中代价高昂地展开。Ha 与 Schmidhuber 的 World Models 首次系统化它,Dreamer 系列把它做到大规模可训练,GAIA-1 与 DriveDreamer 把它扩展到驾驶视频。在自动驾驶中这一移动是稀缺真实事故数据条件下大规模 model-based 强化学习的关键。", "building_blocks": [], "label": "Latent Imagination Rollout", - "degree": 5 + "degree": 6 }, { "id": "move:spike_event_compute", @@ -3430,7 +3430,7 @@ "concept:ssl" ], "label": "Masked Prediction Yields Self-Supervised Signal", - "degree": 4 + "degree": 5 }, { "id": "insight:attention_is_typed_entity_communication", @@ -3447,7 +3447,7 @@ "concept:detr_query" ], "label": "Attention is Communication Between Typed Entities", - "degree": 7 + "degree": 8 }, { "id": "insight:contrastive_alignment_creates_zero_shot_transfer", @@ -3627,7 +3627,7 @@ "paper:gaia1" ], "label": "World Models Let Planning Be Done in Imagination", - "degree": 7 + "degree": 8 }, { "id": "insight:tokenization_collapses_modality_gap", @@ -4079,7 +4079,7 @@ "insight:imitation_data_compresses_unspecified_reward" ], "label": "Trace: Dataset Aggregation for Imitation", - "degree": 6 + "degree": 7 }, { "id": "validation:trace_world_model_in_latent_imagination", @@ -4243,7 +4243,7 @@ "insight:set_prediction_eliminates_postprocessing_heuristics" ], "label": "Paradigm: Modular Perception-to-Planning Pipeline", - "degree": 7 + "degree": 10 }, { "id": "paradigm:differentiable_end_to_end_imitation", @@ -4264,7 +4264,7 @@ "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong" ], "label": "Paradigm: Differentiable End-to-End Imitation", - "degree": 7 + "degree": 11 }, { "id": "paradigm:model_based_world_imagination_planning", @@ -4308,7 +4308,7 @@ "insight:in_context_learning_emerges_at_scale" ], "label": "Paradigm: Foundation Model Zero-shot Driving Agent", - "degree": 10 + "degree": 11 }, { "id": "paradigm:brain_inspired_event_sparse_compute", @@ -4368,7 +4368,7 @@ "insight:symbolic_intermediate_enables_interpretability_and_transfer" ], "label": "Paradigm: Knowledge-Driven Reflective Agent", - "degree": 7 + "degree": 8 }, { "id": "paradigm:scaling_data_with_self_supervision", @@ -4391,7 +4391,7 @@ "insight:scaling_laws_predict_capability_emergence" ], "label": "Paradigm: Scaling Data with Self-Supervision", - "degree": 9 + "degree": 13 }, { "id": "paper:nuplan", @@ -4456,7 +4456,7 @@ "year": 2024, "card": "paper_bench2drive.md", "summary_zh": "Bench2Drive 在 CARLA Leaderboard 2.0 之上提供 44 个能力分桶与统一训练协议,使 UniAD、VAD 等端到端模型可在同一闭环环境下被公平比较,揭示了离线 L2 与闭环成功率之间的弱相关性。", - "degree": 8 + "degree": 9 }, { "id": "paper:carla_lb2", @@ -5507,7 +5507,7 @@ "year": 2021, "summary_zh": "DETR3D 把二维检测中的 DETR 范式直接搬到多视角三维检测:用一组可学习的三维参考点作为目标查询,把每个三维查询点反投影到所有相机平面上去采样图像特征,再用 Transformer 解码器迭代更新目标位置。它绕开了显式的鸟瞰特征构建,证明了稀疏查询本身就是一种隐式的三维到二维对齐机制。", "label": "DETR3D", - "degree": 10 + "degree": 11 }, { "id": "paper:petr", @@ -5567,7 +5567,7 @@ "year": 2022, "summary_zh": "BEVFusion 把相机分支的鸟瞰特征和点云分支的鸟瞰特征在同一个鸟瞰栅格上对齐后再融合,而不是在感知头处晚融合或在图像-点云层面早融合。这种统一的中间表示使得任一模态失效时另一模态仍可独立工作,并且为后续在鸟瞰空间做规划、预测打开了模块解耦的接口。", "label": "BEVFusion", - "degree": 10 + "degree": 12 }, { "id": "paper:bevformer_v2", @@ -6191,7 +6191,7 @@ "year": 2019, "summary_zh": "标注三维框、占用、轨迹的人工成本远高于二维。自监督预训练、自动标注管线、神经重建辅助标注都尝试缩减成本,但目前业界仍依赖大量人工质检,这是开放数据集规模扩展和长尾覆盖的最大障碍之一。", "label": "Label efficiency for 3D annotation", - "degree": 3 + "degree": 5 }, { "id": "problem:unknown_geometry_in_distant_or_dark_regions", @@ -6394,7 +6394,7 @@ "label": "CARLA Leaderboard", "label_zh": "CARLA Leaderboard(闭环驾驶排行榜)", "phase": "core", - "degree": 1 + "degree": 3 }, { "id": "paper:apollo_autoware", @@ -6474,6496 +6474,7501 @@ "degree": 1 }, { - "id": "paper:chinchilla", - "label": "Chinchilla", - "label_zh": "Chinchilla(compute-optimal LLM scaling laws)", + "id": "paper:alvinn", + "kind": "paper", + "tier": "S", + "topic": "e2e_ad", + "year": 1989, + "label": "ALVINN", + "label_zh": "ALVINN(最早的神经网络端到端驾驶)", + "summary_zh": "Pomerleau 1989 用单隐层全连接网络把相机图像直接映射到转向,是端到端驾驶与行为克隆的源头;并用合成偏移样本预演了协变量偏移的工程解。", + "phase": "core", + "degree": 5 + }, + { + "id": "paper:lbc", "kind": "paper", "tier": "A", - "topic": "foundation_models", + "topic": "e2e_ad", + "year": 2019, + "label": "Learning by Cheating", + "label_zh": "Learning by Cheating(先作弊再蒸馏的端到端驾驶)", + "summary_zh": "先训练能看仿真器真值的特权教师把决策学透,再让只吃像素的学生在自己访问的状态上蒸馏教师,相当于无限 DAgger,是 CARLA 上的强 baseline。", "phase": "core", + "degree": 5 + }, + { + "id": "paper:tcp_carla", + "kind": "paper", + "tier": "A", + "topic": "e2e_ad", "year": 2022, - "summary_zh": "Chinchilla 通过在 400 余次训练运行上拟合损失曲线,给出了在固定算力下参数量与训练 token 数应该同步放大的最优比例(约 1:20)。它揭示了 GPT-3 等早期大模型把算力过度投向参数而 token 不足,是 LLM scaling 的 compute-optimal 修正标尺。", - "degree": 3 + "label": "TCP", + "label_zh": "TCP(轨迹引导的控制预测:轨迹分支 + 控制分支融合)", + "summary_zh": "把端到端驾驶的轨迹表征与底层控制表征做成同一网络的两分支并按场景自适应融合,单相机即取得当年很强的 CARLA 闭环成绩。", + "phase": "core", + "degree": 5 }, { - "id": "paper:watkins_dayan_qlearning", - "label": "Watkins & Dayan Q-learning", - "label_zh": "Watkins & Dayan Q 学习(收敛性证明)", + "id": "paper:gameformer", "kind": "paper", - "tier": "S", - "topic": "rl_foundations", - "phase": "prereq", - "year": 1992, - "summary_zh": "Watkins 1989 博士论文提出 Q-learning,Watkins & Dayan 1992 证明在所有 (s, a) 被无限访问且学习率满足 Robbins-Monro 条件下,Q 值会以概率 1 收敛到最优 Q*。这是几乎所有现代深度 RL 算法的理论起点。", - "degree": 2 + "tier": "A", + "topic": "planning", + "year": 2023, + "label": "GameFormer", + "label_zh": "GameFormer(用层级 transformer 做博弈式交互预测与规划)", + "summary_zh": "用层级 transformer 解码器迭代地让每个 agent 在他者上一层预测条件下更新自己,把 level-k 博弈推理可微化,统一交互预测与自车规划。", + "phase": "core", + "degree": 4 }, { - "id": "paper:bear", - "label": "BEAR", - "label_zh": "BEAR(行为约束的离线 RL)", + "id": "paper:drivegpt", "kind": "paper", "tier": "B", - "topic": "deep_rl", + "topic": "e2e_ad", + "year": 2023, + "label": "DriveGPT", + "label_zh": "DriveGPT(GPT 式自回归驾驶策略 / 自回归驾驶基座范式)", + "summary_zh": "把驾驶行为 token 化后用 GPT 式自回归在海量日志上做下一 token 预测,使规划变成续写;代表把 LLM 训练配方整体搬到驾驶的基座范式。", "phase": "core", - "year": 2019, - "summary_zh": "BEAR (Bootstrapping Error Accumulation Reduction) 在离线 RL 的 actor 损失里加入对行为策略的 MMD 距离约束,让学到的策略不偏离数据集支撑太远。它是 BCQ 之后离线 RL 显式行为约束路线的代表,启发了后续 CQL、IQL 的设计。", - "degree": 2 + "degree": 5 }, { - "id": "paper:ddpm", - "label": "DDPM", - "label_zh": "DDPM(去噪扩散概率模型)", + "id": "paper:codex", "kind": "paper", - "tier": "S", - "topic": "foundation_models", + "tier": "B", + "topic": "llm_agent", + "year": 2021, + "label": "Codex", + "label_zh": "Codex(代码大模型:code-as-action 驾驶 agent 的底座)", + "summary_zh": "在 GPT 上用海量代码继续训练并以单元测试(pass@k)评测,把程序确立为可生成又可精确求值的动作表征,是 code-as-action 智能体的语言底座。", "phase": "core", - "year": 2020, - "summary_zh": "DDPM 把生成模型重新表述为逐步加噪后再学习反向去噪的过程,把图像生成的可训练目标压成预测每一步的噪声。它的简洁性与稳定性让扩散模型在两年内取代 GAN 成为图像、视频、动作生成的默认范式,也奠定了 Diffusion Policy、世界模型视频生成的方法学基础。", - "degree": 4 + "degree": 5 }, { - "id": "paper:lora", - "label": "LoRA", - "label_zh": "LoRA(低秩适配的高效微调)", + "id": "paper:saycan", "kind": "paper", "tier": "A", - "topic": "foundation_models", + "topic": "llm_agent", + "year": 2022, + "label": "SayCan", + "label_zh": "SayCan(用可供性把 LLM 计划接到现实:会说不等于能做)", + "summary_zh": "用 LLM 给技能打语义有用性分(say)、用价值函数给当前状态成功概率打分(can),相乘选动作,把抽象语言计划锚到物理可行集。", "phase": "core", - "year": 2021, - "summary_zh": "LoRA 把大模型权重 W 的微调改写成 W + B·A 的低秩残差形式,其中 A、B 远小于 W。这使百亿参数模型可以在单卡几 GB 显存里完成下游微调,是开源大模型生态在学界规模化的关键工程使能。", - "degree": 2 + "degree": 5 }, { - "id": "paper:altman_constrained_mdp", - "label": "Altman Constrained MDP", - "label_zh": "Altman 1999《约束 MDP》", + "id": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "kind": "insight", + "tier": "insight", + "topic": "planning", + "year": 2023, + "label": "Multi-modal behavior is intrinsic to traffic scenes", + "label_zh": "交通场景的多模态性是内在的", + "summary_zh": "路口的未来是几个都对的答案;单峰回归把两种合理机动平均成落在低密度/违规区的无效轨迹,因此必须用分布式/生成式规划器与熵奖励。", + "phase": "core", + "degree": 6 + }, + { + "id": "paper:gail", "kind": "paper", "tier": "A", - "topic": "safety", - "phase": "prereq", - "year": 1999, - "summary_zh": "Altman 1999 的专著《约束马尔可夫决策过程》系统给出 CMDP 的形式化、可行性条件、对偶理论与求解算法。它是 CPO、Lagrangian Safe RL、RCPO、PID-Lagrangian 等所有现代安全 RL 算法的理论基底。", - "degree": 3 + "topic": "deep_rl", + "year": 2016, + "label": "GAIL", + "label_zh": "GAIL(生成对抗模仿学习)", + "summary_zh": "把模仿写成占用度量匹配的 GAN 博弈,跳过显式 reward 直接训出闭环策略;用判别器吃掉特征工程,缓解 BC 的复合误差。", + "phase": "core", + "degree": 6 }, { - "id": "paper:schulman2015_trpo", - "label": "TRPO", - "label_zh": "TRPO(信赖域策略优化)", + "id": "paper:airl", "kind": "paper", - "tier": "S", + "tier": "A", "topic": "deep_rl", + "year": 2018, + "label": "AIRL", + "label_zh": "AIRL(对抗逆强化学习)", + "summary_zh": "给 GAIL 的判别器一个 reward+势函数 shaping 结构,最优时分解出对动力学变化鲁棒、可迁移的奖励函数。", "phase": "core", - "year": 2015, - "summary_zh": "TRPO 把策略改进步骤限制在新旧策略 KL 散度不超过 δ 的信赖域内,并给出近似单调改进的理论保证。它的二阶求解(共轭梯度 + 自然梯度)虽然工程门槛高,但为 PPO、CPO 等后续算法奠定了信赖域几何基石。", - "degree": 3 + "degree": 4 }, { - "id": "paper:rcpo", - "label": "RCPO", - "label_zh": "RCPO(奖励约束的策略优化)", + "id": "paper:ng_russell_2000_irl", "kind": "paper", - "tier": "B", - "topic": "safety", + "tier": "A", + "topic": "rl_foundations", + "year": 2000, + "label": "Ng & Russell IRL", + "label_zh": "逆强化学习的算法(Ng & Russell 2000)", + "summary_zh": "IRL 奠基作:把'从行为反推 reward'形式化为线性规划,并点破 reward 不可辨识这一内在病灶与 reward shaping 不变性。", "phase": "core", - "year": 2018, - "summary_zh": "Tessler 等 2018 提出 Reward Constrained Policy Optimization (RCPO),把约束代价直接通过 Lagrangian 乘子加进奖励信号,再用标准 actor-critic 优化。它简化了 CPO 的二阶投影,是 Lagrangian Safe RL 的早期统一框架。", - "degree": 2 + "degree": 3 }, { - "id": "paper:pid_lagrangian", - "label": "PID-Lagrangian", - "label_zh": "PID-Lagrangian(PID 控制的乘子更新)", + "id": "paper:ziebart_max_ent_irl", "kind": "paper", - "tier": "B", - "topic": "safety", + "tier": "A", + "topic": "rl_foundations", + "year": 2008, + "label": "MaxEnt IRL", + "label_zh": "最大熵逆强化学习(Ziebart 2008)", + "summary_zh": "用最大熵原理在无数等价 reward 中挑唯一解,把 IRL 升级为良定的能量模型,并天然容纳人类示教的次优与带噪。", "phase": "core", - "year": 2020, - "summary_zh": "Stooke 等 2020 把安全 RL 中的乘子更新从纯梯度上升换成 PID 控制器,让约束响应在违反扩大或缩小时都更平滑。它解决了 Lagrangian Safe RL 长期存在的乘子震荡问题,是工业部署友好的安全 RL 改进。", - "degree": 2 + "degree": 6 }, { - "id": "paper:safe_rl_carla", - "label": "Safe RL on CARLA", - "label_zh": "驾驶场景下的安全 RL(CARLA / nuPlan 实证)", + "id": "paper:irl", "kind": "paper", "tier": "B", - "topic": "safety", - "phase": "frontier", - "year": 2022, - "summary_zh": "代表一类把 CPO / Lagrangian Safe RL 在 CARLA 或 nuPlan 闭环上做实证的工作(如 SafeDriver、ConstrainedDrive),把约束 MDP 框架与碰撞、违规、舒适度多约束驾驶任务直接对齐。它是安全 RL 从 Safety Gym 走向驾驶基准的桥梁。", - "degree": 2 - } - ], - "edges": [ - { - "source": "paper:vaswani2017", - "target": "paper:vit", - "rel": "prereq" + "topic": "rl_foundations", + "year": 2000, + "label": "Inverse RL", + "label_zh": "逆强化学习问题(总览)", + "summary_zh": "把 RL 的箭头反过来——给定专家行为反推其优化的 reward;问题天生不适定,全部算法的核心在于用什么原则从解族里挑一个。", + "phase": "core", + "degree": 5 }, { - "source": "paper:vaswani2017", - "target": "paper:carion2020", - "rel": "prereq" + "id": "paper:ross_bagnell_2010", + "kind": "paper", + "tier": "A", + "topic": "rl_foundations", + "year": 2010, + "label": "Efficient Reductions for IL", + "label_zh": "模仿学习的高效归约(Ross & Bagnell 2010)", + "summary_zh": "证明朴素 BC 的闭环误差以 O(εT²) 复合,并给出首个线性 O(εT) 的 forward training 归约,为 DAgger 奠定理论地基。", + "phase": "core", + "degree": 5 }, { - "source": "paper:vaswani2017", - "target": "paper:gpt3", - "rel": "prereq" + "id": "insight:policy_improvement_bounded_by_distribution_shift", + "kind": "insight", + "topic": "rl_foundations", + "year": 2010, + "label": "改进受分布偏移上界锁死", + "label_zh": "策略改进的上限被分布偏移锁死", + "summary_zh": "模仿、信赖域策略梯度、离线 RL 共享同一不等式:用旧分布评估新策略的可靠度正比于新旧分布接近度,分布偏移是改进的硬天花板。", + "tier": "insight", + "phase": "core", + "degree": 6 + }, + { + "id": "insight:max_entropy_closes_policy_value_duality", + "kind": "insight", + "topic": "rl_foundations", + "year": 2008, + "label": "最大熵缝合策略价值对偶", + "label_zh": "最大熵把策略与价值缝成一对偶", + "summary_zh": "加熵正则后最优策略变成价值的 softmax,配分函数把 reward、价值、策略绑定,使 RL(正向)与 IRL(逆向)成为同一对偶的两面。", + "tier": "insight", + "phase": "core", + "degree": 6 + }, + { + "id": "move:learn_set_predictor_with_hungarian_matching", + "kind": "move", + "topic": "scene_understanding", + "label": "learn_set_predictor_with_hungarian_matching", + "label_zh": "learn_set_predictor_with_hungarian_matching", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:share_object_query_across_tasks_for_e2e_planning", + "kind": "move", + "topic": "e2e_ad", + "label": "share_object_query_across_tasks_for_e2e_planning", + "label_zh": "share_object_query_across_tasks_for_e2e_planning", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "move:learn_open_vocabulary_classifier_via_language_anchor", + "kind": "move", + "topic": "foundation_models", + "label": "learn_open_vocabulary_classifier_via_language_anchor", + "label_zh": "learn_open_vocabulary_classifier_via_language_anchor", + "tier": "move", + "phase": "core", + "year": 2024, + "degree": 1 + }, + { + "id": "paper:pointpillars", + "kind": "paper", + "tier": "A", + "topic": "scene_understanding", + "year": 2019, + "label": "PointPillars", + "label_zh": "PointPillars(点云柱状编码)", + "summary_zh": "把点云在 z 轴不切分、只在 BEV 平面划成柱,每柱用微型 PointNet 编码成固定向量并散射成伪图像,之后全走 2D 卷积。用放弃高度体素化换来纯 2D 卷积的速度,长期是工业界实时 LiDAR 检测的事实基线。", + "phase": "core", + "degree": 4 + }, + { + "id": "paper:voxelnet", + "kind": "paper", + "tier": "A", + "topic": "scene_understanding", + "year": 2018, + "label": "VoxelNet", + "label_zh": "VoxelNet(体素特征端到端学习)", + "summary_zh": "第一次证明不需手工点云特征:体素化后用可学习的 Voxel Feature Encoding 直接从原始点学特征,再接 3D 卷积与 RPN 端到端检测。确立了体素 + 可学习编码这条主线,PointPillars、SECOND、CenterPoint 皆为其后裔。", + "phase": "core", + "degree": 4 + }, + { + "id": "paper:centerpoint", + "kind": "paper", + "tier": "A", + "topic": "scene_understanding", + "year": 2021, + "label": "CenterPoint", + "label_zh": "CenterPoint(中心点 3D 检测与跟踪)", + "summary_zh": "把 CenterNet 的中心点范式搬到点云 BEV:在热力图上预测目标中心,再回归尺寸、朝向、高度与速度,彻底去掉 anchor 与 NMS。anchor-free 天然处理任意朝向,速度回归又让多目标跟踪近乎免费。", + "phase": "core", + "degree": 4 + }, + { + "id": "paper:krizhevsky2012", + "kind": "paper", + "tier": "S", + "topic": "ssl_vision", + "year": 2012, + "label": "AlexNet", + "label_zh": "AlexNet(ImageNet 时刻)", + "summary_zh": "2012 年以巨大优势夺得 ImageNet 冠军,第一次让大数据 + GPU + 深度卷积网络在公开基准上压倒手工特征流水线。其具体技术会过时,但它点燃的范式转移——表示学习取代特征工程——是整个深度学习浪潮的起点。", + "phase": "core", + "degree": 3 + }, + { + "id": "paper:bert", + "kind": "paper", + "tier": "S", + "topic": "foundation_models", + "year": 2019, + "label": "BERT", + "label_zh": "BERT(掩码语言建模)", + "summary_zh": "用随机遮词、靠双向上下文填回的极简自监督目标在海量无标注文本上预训练通用语言表示,微调即横扫下游任务。把掩码预测确立为可规模化的自监督信号,此思想随后被 MAE 等整体搬到视觉。", + "phase": "core", + "degree": 3 + }, + { + "id": "paper:byol", + "kind": "paper", + "tier": "A", + "topic": "ssl_vision", + "year": 2020, + "label": "BYOL", + "label_zh": "BYOL(无负样本自监督)", + "summary_zh": "打破对比学习必须有负样本的共识:用同图两增广视图,让 online 网络预测缓慢更新的 target 网络表示。靠预测头不对称 + 动量目标 + 停梯度防止坍缩,无需任何负样本即可学出可迁移表示。", + "phase": "core", + "degree": 4 + }, + { + "id": "paper:vicreg", + "kind": "paper", + "tier": "A", + "topic": "ssl_vision", + "year": 2022, + "label": "VICReg", + "label_zh": "VICReg(方差-不变性-协方差正则)", + "summary_zh": "把防坍缩从负样本或架构技巧改为三条显式正则:不变性拉近配对表示、方差保证每维散度、协方差去除维度间冗余。无需负样本、动量与预测头,两分支还可异构,把防坍缩从隐晦工程抬升为可分析的目标函数。", + "phase": "core", + "degree": 2 + }, + { + "id": "paper:bahdanau2014_attention", + "kind": "paper", + "tier": "S", + "topic": "foundation_models", + "year": 2015, + "label": "Bahdanau Attention", + "label_zh": "Bahdanau 注意力(对齐即翻译)", + "summary_zh": "解决 encoder-decoder 翻译把整句压进定长向量的瓶颈:decoder 生成每词时用可学习对齐打分动态加权所有源端隐状态,得到该步专属上下文向量。注意力机制由此诞生,Transformer 只是把它推到极致。", + "phase": "core", + "degree": 3 + }, + { + "id": "paper:schulman2016_gae", + "kind": "paper", + "tier": "A", + "topic": "deep_rl", + "year": 2016, + "label": "GAE", + "label_zh": "GAE(广义优势估计)", + "summary_zh": "用一个标量 λ 把单步 TD(低方差有偏)和蒙特卡洛回报(无偏高方差)连续插值,给优势估计一个 bias/variance 旋钮,是 TRPO/PPO 默认的优势计算方式。", + "phase": "core", + "degree": 4 + }, + { + "id": "paper:d4rl", + "kind": "paper", + "tier": "A", + "topic": "evaluation_benchmark", + "year": 2020, + "label": "D4RL", + "label_zh": "D4RL(离线 RL 基准)", + "summary_zh": "把离线 RL 的数据集、环境与归一化打分标准化成一套公共基准,并刻意构造次优、窄覆盖、需缝合的数据分布来暴露算法弱点,让进展可横向比较。", + "phase": "core", + "degree": 5 + }, + { + "id": "paper:levine_offline_rl_tutorial", + "kind": "paper", + "tier": "B", + "topic": "deep_rl", + "year": 2020, + "label": "Offline RL Tutorial", + "label_zh": "离线 RL 综述(Levine 等)", + "summary_zh": "把离线 RL 的全部困难归结为分布偏移,系统梳理策略约束、价值正则、不确定性惩罚、模型基与序列建模五类应对,并诚实列出离线模型选择等开放问题。", + "phase": "core", + "degree": 4 + }, + { + "id": "paper:spinning_up", + "kind": "paper", + "tier": "B", + "topic": "deep_rl", + "year": 2018, + "label": "Spinning Up", + "label_zh": "Spinning Up in Deep RL", + "summary_zh": "OpenAI 的深度 RL 教学资源,把策略梯度、信赖域、actor-critic 一族算法统一在一个梯度式下并配极简可读实现,是从读公式到跑代码的最快入口。", + "phase": "core", + "degree": 5 + }, + { + "id": "paper:preference_learning", + "kind": "paper", + "tier": "A", + "topic": "alignment", + "year": 2017, + "label": "Deep RL from Human Preferences", + "label_zh": "从人类偏好中学习奖励", + "summary_zh": "放弃手写奖励,让人类对成对行为片段做偏好比较,用 Bradley–Terry 模型拟合奖励再做 RL;以极少标注学会难以言说的目标,是 RLHF 整条路线的种子。", + "phase": "core", + "degree": 2 + }, + { + "id": "paper:planet", + "kind": "paper", + "tier": "A", + "topic": "world_models", + "year": 2019, + "label": "PlaNet", + "label_zh": "PlaNet(潜空间动力学 + 规划)", + "summary_zh": "纯从像素学一个结合确定性记忆与随机隐变量的潜动力学模型 RSSM,并完全在潜空间用 CEM 规划解决连续控制,样本效率高一个数量级,是 Dreamer 的前身。", + "phase": "core", + "degree": 8 + }, + { + "id": "insight:q_learning_max_is_optimistically_biased", + "kind": "insight", + "tier": "A", + "topic": "deep_rl", + "year": 2010, + "label": "Q-learning's max is optimistically biased", + "label_zh": "Q-learning 的 max 是系统性乐观偏差", + "summary_zh": "由 Jensen 不等式 E[max]≥max E,对噪声 Q 取 max 在期望上必然高估真实价值;自举放大此偏差,是 DQN 不稳定的元凶,也是 Double-Q 的动机。", + "phase": "core", + "degree": 5 + }, + { + "id": "paper:chinchilla", + "label": "Chinchilla", + "label_zh": "Chinchilla(compute-optimal LLM scaling laws)", + "kind": "paper", + "tier": "A", + "topic": "foundation_models", + "phase": "core", + "year": 2022, + "summary_zh": "Chinchilla 通过在 400 余次训练运行上拟合损失曲线,给出了在固定算力下参数量与训练 token 数应该同步放大的最优比例(约 1:20)。它揭示了 GPT-3 等早期大模型把算力过度投向参数而 token 不足,是 LLM scaling 的 compute-optimal 修正标尺。", + "degree": 3 + }, + { + "id": "paper:watkins_dayan_qlearning", + "label": "Watkins & Dayan Q-learning", + "label_zh": "Watkins & Dayan Q 学习(收敛性证明)", + "kind": "paper", + "tier": "S", + "topic": "rl_foundations", + "phase": "prereq", + "year": 1992, + "summary_zh": "Watkins 1989 博士论文提出 Q-learning,Watkins & Dayan 1992 证明在所有 (s, a) 被无限访问且学习率满足 Robbins-Monro 条件下,Q 值会以概率 1 收敛到最优 Q*。这是几乎所有现代深度 RL 算法的理论起点。", + "degree": 2 + }, + { + "id": "paper:bear", + "label": "BEAR", + "label_zh": "BEAR(行为约束的离线 RL)", + "kind": "paper", + "tier": "B", + "topic": "deep_rl", + "phase": "core", + "year": 2019, + "summary_zh": "BEAR (Bootstrapping Error Accumulation Reduction) 在离线 RL 的 actor 损失里加入对行为策略的 MMD 距离约束,让学到的策略不偏离数据集支撑太远。它是 BCQ 之后离线 RL 显式行为约束路线的代表,启发了后续 CQL、IQL 的设计。", + "degree": 2 + }, + { + "id": "paper:ddpm", + "label": "DDPM", + "label_zh": "DDPM(去噪扩散概率模型)", + "kind": "paper", + "tier": "S", + "topic": "foundation_models", + "phase": "core", + "year": 2020, + "summary_zh": "DDPM 把生成模型重新表述为逐步加噪后再学习反向去噪的过程,把图像生成的可训练目标压成预测每一步的噪声。它的简洁性与稳定性让扩散模型在两年内取代 GAN 成为图像、视频、动作生成的默认范式,也奠定了 Diffusion Policy、世界模型视频生成的方法学基础。", + "degree": 4 + }, + { + "id": "paper:lora", + "label": "LoRA", + "label_zh": "LoRA(低秩适配的高效微调)", + "kind": "paper", + "tier": "A", + "topic": "foundation_models", + "phase": "core", + "year": 2021, + "summary_zh": "LoRA 把大模型权重 W 的微调改写成 W + B·A 的低秩残差形式,其中 A、B 远小于 W。这使百亿参数模型可以在单卡几 GB 显存里完成下游微调,是开源大模型生态在学界规模化的关键工程使能。", + "degree": 2 + }, + { + "id": "paper:altman_constrained_mdp", + "label": "Altman Constrained MDP", + "label_zh": "Altman 1999《约束 MDP》", + "kind": "paper", + "tier": "A", + "topic": "safety", + "phase": "prereq", + "year": 1999, + "summary_zh": "Altman 1999 的专著《约束马尔可夫决策过程》系统给出 CMDP 的形式化、可行性条件、对偶理论与求解算法。它是 CPO、Lagrangian Safe RL、RCPO、PID-Lagrangian 等所有现代安全 RL 算法的理论基底。", + "degree": 3 + }, + { + "id": "paper:schulman2015_trpo", + "label": "TRPO", + "label_zh": "TRPO(信赖域策略优化)", + "kind": "paper", + "tier": "S", + "topic": "deep_rl", + "phase": "core", + "year": 2015, + "summary_zh": "TRPO 把策略改进步骤限制在新旧策略 KL 散度不超过 δ 的信赖域内,并给出近似单调改进的理论保证。它的二阶求解(共轭梯度 + 自然梯度)虽然工程门槛高,但为 PPO、CPO 等后续算法奠定了信赖域几何基石。", + "degree": 5 + }, + { + "id": "paper:rcpo", + "label": "RCPO", + "label_zh": "RCPO(奖励约束的策略优化)", + "kind": "paper", + "tier": "B", + "topic": "safety", + "phase": "core", + "year": 2018, + "summary_zh": "Tessler 等 2018 提出 Reward Constrained Policy Optimization (RCPO),把约束代价直接通过 Lagrangian 乘子加进奖励信号,再用标准 actor-critic 优化。它简化了 CPO 的二阶投影,是 Lagrangian Safe RL 的早期统一框架。", + "degree": 2 + }, + { + "id": "paper:pid_lagrangian", + "label": "PID-Lagrangian", + "label_zh": "PID-Lagrangian(PID 控制的乘子更新)", + "kind": "paper", + "tier": "B", + "topic": "safety", + "phase": "core", + "year": 2020, + "summary_zh": "Stooke 等 2020 把安全 RL 中的乘子更新从纯梯度上升换成 PID 控制器,让约束响应在违反扩大或缩小时都更平滑。它解决了 Lagrangian Safe RL 长期存在的乘子震荡问题,是工业部署友好的安全 RL 改进。", + "degree": 2 + }, + { + "id": "paper:safe_rl_carla", + "label": "Safe RL on CARLA", + "label_zh": "驾驶场景下的安全 RL(CARLA / nuPlan 实证)", + "kind": "paper", + "tier": "B", + "topic": "safety", + "phase": "frontier", + "year": 2022, + "summary_zh": "代表一类把 CPO / Lagrangian Safe RL 在 CARLA 或 nuPlan 闭环上做实证的工作(如 SafeDriver、ConstrainedDrive),把约束 MDP 框架与碰撞、违规、舒适度多约束驾驶任务直接对齐。它是安全 RL 从 Safety Gym 走向驾驶基准的桥梁。", + "degree": 2 + } + ], + "edges": [ + { + "source": "paper:vaswani2017", + "target": "paper:vit", + "rel": "prereq" + }, + { + "source": "paper:vaswani2017", + "target": "paper:carion2020", + "rel": "prereq" + }, + { + "source": "paper:vaswani2017", + "target": "paper:gpt3", + "rel": "prereq" + }, + { + "source": "paper:vaswani2017", + "target": "paper:2307.01694", + "rel": "prereq" + }, + { + "source": "paper:vit", + "target": "paper:dinov2", + "rel": "prereq" + }, + { + "source": "paper:vit", + "target": "paper:sam", + "rel": "prereq" + }, + { + "source": "paper:vit", + "target": "paper:li2022bevformer", + "rel": "prereq" + }, + { + "source": "paper:vit", + "target": "paper:llava", + "rel": "prereq" + }, + { + "source": "paper:vit", + "target": "paper:2307.01694", + "rel": "prereq" + }, + { + "source": "paper:dinov2", + "target": "paper:2508.10104", + "rel": "prereq" + }, + { + "source": "paper:carion2020", + "target": "paper:li2022bevformer", + "rel": "prereq" + }, + { + "source": "paper:carion2020", + "target": "paper:2212.10156", + "rel": "prereq" + }, + { + "source": "paper:carion2020", + "target": "paper:2210.14222", + "rel": "prereq" + }, + { + "source": "paper:li2022bevformer", + "target": "paper:2212.10156", + "rel": "prereq" + }, + { + "source": "paper:he2015_resnet", + "target": "paper:vit", + "rel": "prereq" + }, + { + "source": "paper:he2015_resnet", + "target": "paper:2307.01694", + "rel": "prereq" + }, + { + "source": "paper:gpt3", + "target": "paper:llava", + "rel": "prereq" + }, + { + "source": "paper:gpt3", + "target": "paper:rlhf_dpo", + "rel": "prereq" + }, + { + "source": "paper:gpt3", + "target": "paper:2309.16292", + "rel": "prereq" + }, + { + "source": "paper:gpt3", + "target": "paper:2311.10813", + "rel": "prereq" + }, + { + "source": "paper:llava", + "target": "paper:2402.12289", + "rel": "prereq" + }, + { + "source": "paper:llava", + "target": "paper:2512.24426", + "rel": "prereq" + }, + { + "source": "course:zhao_rl", + "target": "course:cs285", + "rel": "prereq" + }, + { + "source": "course:zhao_rl", + "target": "paper:mnih2015_dqn", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "paper:schulman2017_ppo", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "concept:mdp", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "concept:bellman_eq", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "concept:value_iteration", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "concept:td_learning", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "concept:policy_gradient", + "rel": "covers" + }, + { + "source": "course:zhao_rl", + "target": "concept:actor_critic", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "concept:imitation_learning", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "concept:covariate_shift", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "concept:dqn", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "concept:ppo", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "concept:replay_buffer", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "paper:ross2011_dagger", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "paper:rlhf_dpo", + "rel": "covers" + }, + { + "source": "course:cs285", + "target": "paper:world_models", + "rel": "covers" + }, + { + "source": "paper:sutton_barto", + "target": "course:zhao_rl", + "rel": "parallel" + }, + { + "source": "paper:sutton_barto", + "target": "essay:bitter_lesson", + "rel": "parallel" + }, + { + "source": "channel:3blue1brown", + "target": "concept:transformer", + "rel": "covers" + }, + { + "source": "channel:3blue1brown", + "target": "concept:self_attention", + "rel": "covers" + }, + { + "source": "channel:3blue1brown", + "target": "paper:vaswani2017", + "rel": "covers" + }, + { + "source": "channel:mu_li_bilibili", + "target": "paper:vaswani2017", + "rel": "covers" + }, + { + "source": "channel:mu_li_bilibili", + "target": "paper:vit", + "rel": "covers" + }, + { + "source": "channel:mu_li_bilibili", + "target": "paper:carion2020", + "rel": "covers" + }, + { + "source": "channel:mu_li_bilibili", + "target": "paper:gpt3", + "rel": "covers" + }, + { + "source": "channel:mu_li_bilibili", + "target": "paper:dinov2", + "rel": "covers" + }, + { + "source": "channel:mu_li_bilibili", + "target": "paper:he2015_resnet", + "rel": "covers" + }, + { + "source": "channel:ez_encoder_academy", + "target": "paper:gpt3", + "rel": "covers" + }, + { + "source": "channel:ez_encoder_academy", + "target": "essay:bitter_lesson", + "rel": "covers" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:silver2017_alphazero", + "rel": "covers" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:2309.16292", + "rel": "contrasts" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:2311.10813", + "rel": "contrasts" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:2210.14222", + "rel": "contrasts" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:2307.01694", + "rel": "contrasts" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:2508.10104", + "rel": "covers" + }, + { + "source": "essay:bitter_lesson", + "target": "paper:gpt3", + "rel": "covers" + }, + { + "source": "paper:2212.10156", + "target": "paper:vadv2", + "rel": "extends" + }, + { + "source": "paper:2212.10156", + "target": "paper:2210.14222", + "rel": "contrasts" + }, + { + "source": "paper:2212.10156", + "target": "paper:2402.12289", + "rel": "feeds" + }, + { + "source": "paper:2210.14222", + "target": "paper:transfuser", + "rel": "parallel" + }, + { + "source": "paper:2210.14222", + "target": "paper:2311.10813", + "rel": "feeds" + }, + { + "source": "paper:2402.12289", + "target": "paper:2512.24426", + "rel": "extends" + }, + { + "source": "paper:2311.10813", + "target": "paper:2402.12289", + "rel": "extends" + }, + { + "source": "paper:2311.10813", + "target": "paper:2309.16292", + "rel": "parallel" + }, + { + "source": "paper:2309.16292", + "target": "paper:2311.10813", + "rel": "parallel" + }, + { + "source": "paper:2508.10104", + "target": "paper:2212.10156", + "rel": "feeds" + }, + { + "source": "paper:2508.10104", + "target": "paper:2402.12289", + "rel": "feeds" + }, + { + "source": "paper:2307.01694", + "target": "paper:2508.10104", + "rel": "contrasts" + }, + { + "source": "paper:gaia1", + "target": "paper:2512.24426", + "rel": "parallel" + }, + { + "source": "paper:drivedreamer", + "target": "paper:gaia1", + "rel": "parallel" + }, + { + "source": "paper:world_models", + "target": "paper:gaia1", + "rel": "prereq" + }, + { + "source": "paper:world_models", + "target": "paper:drivedreamer", + "rel": "prereq" + }, + { + "source": "paper:world_models", + "target": "paper:2512.24426", + "rel": "prereq" + }, + { + "source": "paper:rlhf_dpo", + "target": "paper:2512.24426", + "rel": "prereq" + }, + { + "source": "paper:transfuser", + "target": "paper:2210.14222", + "rel": "feeds" + }, + { + "source": "paper:vadv2", + "target": "paper:2402.12289", + "rel": "parallel" + }, + { + "source": "paper:mamba", + "target": "paper:vaswani2017", + "rel": "contrasts" + }, + { + "source": "paper:diffuser", + "target": "paper:vadv2", + "rel": "parallel" + }, + { + "source": "paper:lingo2", + "target": "paper:2402.12289", + "rel": "parallel" + }, + { + "source": "paper:tesla_ai_day", + "target": "paper:2212.10156", + "rel": "parallel" + }, + { + "source": "paper:ad_benchmarks", + "target": "paper:2212.10156", + "rel": "covers" + }, + { + "source": "paper:ad_benchmarks", + "target": "paper:2402.12289", + "rel": "covers" + }, + { + "source": "paper:ad_benchmarks", + "target": "paper:2210.14222", + "rel": "covers" + }, + { + "source": "paper:ad_benchmarks", + "target": "paper:2311.10813", + "rel": "covers" + }, + { + "source": "paper:ad_benchmarks", + "target": "paper:2309.16292", + "rel": "covers" + }, + { + "source": "paper:mnih2015_dqn", + "target": "concept:dqn", + "rel": "covers" + }, + { + "source": "paper:mnih2015_dqn", + "target": "concept:replay_buffer", + "rel": "covers" + }, + { + "source": "paper:schulman2017_ppo", + "target": "concept:ppo", + "rel": "covers" + }, + { + "source": "paper:schulman2017_ppo", + "target": "concept:actor_critic", + "rel": "covers" + }, + { + "source": "paper:ross2011_dagger", + "target": "concept:imitation_learning", + "rel": "covers" + }, + { + "source": "paper:ross2011_dagger", + "target": "concept:covariate_shift", + "rel": "covers" }, { "source": "paper:vaswani2017", - "target": "paper:2307.01694", - "rel": "prereq" + "target": "concept:transformer", + "rel": "covers" }, { - "source": "paper:vit", - "target": "paper:dinov2", - "rel": "prereq" + "source": "paper:vaswani2017", + "target": "concept:self_attention", + "rel": "covers" }, { - "source": "paper:vit", - "target": "paper:sam", - "rel": "prereq" + "source": "paper:carion2020", + "target": "concept:detr_query", + "rel": "covers" }, { - "source": "paper:vit", - "target": "paper:li2022bevformer", - "rel": "prereq" + "source": "paper:li2022bevformer", + "target": "concept:bev", + "rel": "covers" }, { - "source": "paper:vit", - "target": "paper:llava", - "rel": "prereq" + "source": "paper:llava", + "target": "concept:vlm", + "rel": "covers" }, { - "source": "paper:vit", - "target": "paper:2307.01694", - "rel": "prereq" + "source": "paper:2402.12289", + "target": "concept:vla", + "rel": "covers" }, { - "source": "paper:dinov2", - "target": "paper:2508.10104", - "rel": "prereq" + "source": "paper:2402.12289", + "target": "concept:cot", + "rel": "covers" }, { - "source": "paper:carion2020", - "target": "paper:li2022bevformer", - "rel": "prereq" + "source": "paper:2311.10813", + "target": "concept:tool_use", + "rel": "covers" }, { - "source": "paper:carion2020", - "target": "paper:2212.10156", - "rel": "prereq" + "source": "paper:2309.16292", + "target": "concept:cot", + "rel": "covers" }, { - "source": "paper:carion2020", - "target": "paper:2210.14222", - "rel": "prereq" + "source": "paper:2512.24426", + "target": "concept:counterfactual", + "rel": "covers" }, { - "source": "paper:li2022bevformer", - "target": "paper:2212.10156", - "rel": "prereq" + "source": "paper:2512.24426", + "target": "concept:meta_action", + "rel": "covers" }, { - "source": "paper:he2015_resnet", - "target": "paper:vit", - "rel": "prereq" + "source": "paper:dinov2", + "target": "concept:ssl", + "rel": "covers" }, { - "source": "paper:he2015_resnet", - "target": "paper:2307.01694", - "rel": "prereq" + "source": "paper:2508.10104", + "target": "concept:ssl", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:llava", - "rel": "prereq" + "source": "paper:2307.01694", + "target": "concept:spiking_nn", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:rlhf_dpo", - "rel": "prereq" + "source": "paper:rlhf_dpo", + "target": "concept:rlhf", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:2309.16292", - "rel": "prereq" + "source": "essay:bitter_lesson", + "target": "concept:scaling_vs_knowledge", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:2311.10813", - "rel": "prereq" + "source": "lab:lab01", + "target": "course:zhao_rl", + "rel": "implements" }, { - "source": "paper:llava", - "target": "paper:2402.12289", - "rel": "prereq" + "source": "lab:lab01", + "target": "concept:value_iteration", + "rel": "implements" }, { - "source": "paper:llava", - "target": "paper:2512.24426", - "rel": "prereq" + "source": "lab:lab02", + "target": "course:cs285", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "course:cs285", - "rel": "prereq" + "source": "lab:lab02", + "target": "paper:ross2011_dagger", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "paper:mnih2015_dqn", - "rel": "covers" + "source": "lab:lab03", + "target": "paper:2212.10156", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "paper:schulman2017_ppo", - "rel": "covers" + "source": "lab:lab04", + "target": "paper:2210.14222", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "concept:mdp", - "rel": "covers" + "source": "lab:lab05", + "target": "paper:2508.10104", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "concept:bellman_eq", - "rel": "covers" + "source": "lab:lab06", + "target": "paper:2307.01694", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "concept:value_iteration", - "rel": "covers" + "source": "lab:lab07", + "target": "paper:2309.16292", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "concept:td_learning", - "rel": "covers" + "source": "lab:lab08", + "target": "paper:2311.10813", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "concept:policy_gradient", - "rel": "covers" + "source": "lab:lab09", + "target": "paper:2402.12289", + "rel": "implements" }, { - "source": "course:zhao_rl", - "target": "concept:actor_critic", - "rel": "covers" + "source": "lab:lab10", + "target": "paper:2512.24426", + "rel": "implements" }, { - "source": "course:cs285", - "target": "concept:imitation_learning", - "rel": "covers" + "source": "paper:silver2017_alphazero", + "target": "paper:muzero", + "rel": "extends" }, { - "source": "course:cs285", - "target": "concept:covariate_shift", - "rel": "covers" + "source": "paper:world_models", + "target": "paper:dreamer_v2", + "rel": "prereq" }, { - "source": "course:cs285", - "target": "concept:dqn", - "rel": "covers" + "source": "paper:dreamer_v2", + "target": "paper:dreamer_v3", + "rel": "extends" }, { - "source": "course:cs285", - "target": "concept:ppo", - "rel": "covers" + "source": "paper:world_models", + "target": "paper:iris_world_model", + "rel": "prereq" }, { - "source": "course:cs285", - "target": "concept:replay_buffer", - "rel": "covers" + "source": "paper:vaswani2017", + "target": "paper:iris_world_model", + "rel": "prereq" }, { - "source": "course:cs285", - "target": "paper:ross2011_dagger", - "rel": "covers" + "source": "paper:iris_world_model", + "target": "paper:dreamer_v3", + "rel": "parallel" }, { - "source": "course:cs285", - "target": "paper:rlhf_dpo", - "rel": "covers" + "source": "paper:muzero", + "target": "paper:dreamer_v3", + "rel": "parallel" }, { - "source": "course:cs285", - "target": "paper:world_models", - "rel": "covers" + "source": "paper:mnih2015_dqn", + "target": "paper:sac", + "rel": "prereq" }, { - "source": "paper:sutton_barto", - "target": "course:zhao_rl", + "source": "paper:mnih2015_dqn", + "target": "paper:td3", + "rel": "prereq" + }, + { + "source": "paper:sac", + "target": "paper:redq", + "rel": "extends" + }, + { + "source": "paper:td3", + "target": "paper:sac", "rel": "parallel" }, { - "source": "paper:sutton_barto", - "target": "essay:bitter_lesson", + "source": "paper:mnih2015_dqn", + "target": "paper:a3c_a2c", "rel": "parallel" }, { - "source": "channel:3blue1brown", - "target": "concept:transformer", - "rel": "covers" + "source": "paper:a3c_a2c", + "target": "paper:impala", + "rel": "extends" }, { - "source": "channel:3blue1brown", - "target": "concept:self_attention", - "rel": "covers" + "source": "paper:schulman2017_ppo", + "target": "paper:openai_five", + "rel": "feeds" }, { - "source": "channel:3blue1brown", - "target": "paper:vaswani2017", - "rel": "covers" + "source": "paper:a3c_a2c", + "target": "paper:alphastar", + "rel": "feeds" + }, + { + "source": "paper:sac", + "target": "paper:mpo", + "rel": "parallel" }, { - "source": "channel:mu_li_bilibili", - "target": "paper:vaswani2017", - "rel": "covers" + "source": "paper:schulman2017_ppo", + "target": "paper:mpo", + "rel": "contrasts" }, { - "source": "channel:mu_li_bilibili", - "target": "paper:vit", - "rel": "covers" + "source": "paper:gpt3", + "target": "paper:decision_transformer", + "rel": "prereq" }, { - "source": "channel:mu_li_bilibili", - "target": "paper:carion2020", - "rel": "covers" + "source": "paper:decision_transformer", + "target": "paper:trajectory_transformer", + "rel": "parallel" }, { - "source": "channel:mu_li_bilibili", - "target": "paper:gpt3", - "rel": "covers" + "source": "paper:vaswani2017", + "target": "paper:trajectory_transformer", + "rel": "prereq" }, { - "source": "channel:mu_li_bilibili", - "target": "paper:dinov2", - "rel": "covers" + "source": "paper:diffuser", + "target": "paper:diffusion_policy_chi2023", + "rel": "parallel" }, { - "source": "channel:mu_li_bilibili", - "target": "paper:he2015_resnet", - "rel": "covers" + "source": "paper:diffuser", + "target": "paper:diffusion_planner", + "rel": "extends" }, { - "source": "channel:ez_encoder_academy", - "target": "paper:gpt3", - "rel": "covers" + "source": "paper:cql", + "target": "paper:iql", + "rel": "extends" }, { - "source": "channel:ez_encoder_academy", - "target": "essay:bitter_lesson", - "rel": "covers" + "source": "paper:cql", + "target": "paper:calql", + "rel": "extends" }, { - "source": "essay:bitter_lesson", - "target": "paper:silver2017_alphazero", - "rel": "covers" + "source": "paper:iql", + "target": "paper:calql", + "rel": "parallel" }, { - "source": "essay:bitter_lesson", - "target": "paper:2309.16292", - "rel": "contrasts" + "source": "paper:cpo_safe_rl", + "target": "paper:lagrangian_safe_rl", + "rel": "parallel" }, { - "source": "essay:bitter_lesson", - "target": "paper:2311.10813", + "source": "paper:lagrangian_safe_rl", + "target": "paper:shielded_rl", "rel": "contrasts" }, { - "source": "essay:bitter_lesson", - "target": "paper:2210.14222", - "rel": "contrasts" + "source": "paper:rlhf_dpo", + "target": "paper:pebble", + "rel": "parallel" }, { - "source": "essay:bitter_lesson", - "target": "paper:2307.01694", - "rel": "contrasts" + "source": "paper:pebble", + "target": "paper:bpref", + "rel": "parallel" }, { - "source": "essay:bitter_lesson", - "target": "paper:2508.10104", - "rel": "covers" + "source": "paper:rlhf_dpo", + "target": "paper:bpref", + "rel": "parallel" }, { - "source": "essay:bitter_lesson", - "target": "paper:gpt3", - "rel": "covers" + "source": "paper:lqr_classic", + "target": "paper:ilqr_classic", + "rel": "prereq" }, { - "source": "paper:2212.10156", - "target": "paper:vadv2", + "source": "paper:ilqr_classic", + "target": "paper:cilqr", "rel": "extends" }, { - "source": "paper:2212.10156", - "target": "paper:2210.14222", - "rel": "contrasts" + "source": "paper:lqr_classic", + "target": "paper:mpc_book", + "rel": "prereq" }, { - "source": "paper:2212.10156", - "target": "paper:2402.12289", - "rel": "feeds" + "source": "paper:mpc_book", + "target": "paper:cilqr", + "rel": "parallel" }, { - "source": "paper:2210.14222", - "target": "paper:transfuser", + "source": "paper:transfuser", + "target": "paper:interfuser", "rel": "parallel" }, { - "source": "paper:2210.14222", - "target": "paper:2311.10813", - "rel": "feeds" + "source": "paper:transfuser", + "target": "paper:roach", + "rel": "parallel" }, { - "source": "paper:2402.12289", - "target": "paper:2512.24426", - "rel": "extends" + "source": "paper:roach", + "target": "paper:thinktwice", + "rel": "feeds" }, { - "source": "paper:2311.10813", - "target": "paper:2402.12289", - "rel": "extends" + "source": "paper:interfuser", + "target": "paper:thinktwice", + "rel": "parallel" }, { - "source": "paper:2311.10813", - "target": "paper:2309.16292", - "rel": "parallel" + "source": "paper:world_models", + "target": "paper:mile_driving", + "rel": "prereq" }, { - "source": "paper:2309.16292", - "target": "paper:2311.10813", + "source": "paper:dreamer_v2", + "target": "paper:mile_driving", "rel": "parallel" }, { - "source": "paper:2508.10104", - "target": "paper:2212.10156", - "rel": "feeds" + "source": "paper:ad_benchmarks", + "target": "paper:nuplan_baselines", + "rel": "covers" }, { - "source": "paper:2508.10104", - "target": "paper:2402.12289", - "rel": "feeds" + "source": "paper:nuplan_baselines", + "target": "paper:interfuser", + "rel": "contrasts" }, { - "source": "paper:2307.01694", - "target": "paper:2508.10104", - "rel": "contrasts" + "source": "paper:vaswani2017", + "target": "paper:trajeglish", + "rel": "prereq" }, { - "source": "paper:gaia1", - "target": "paper:2512.24426", + "source": "paper:trajeglish", + "target": "paper:most_simagents", "rel": "parallel" }, { - "source": "paper:drivedreamer", - "target": "paper:gaia1", + "source": "paper:gpt3", + "target": "paper:codetraj", + "rel": "prereq" + }, + { + "source": "paper:2311.10813", + "target": "paper:codetraj", "rel": "parallel" }, { "source": "paper:world_models", - "target": "paper:gaia1", - "rel": "prereq" + "target": "paper:mbrl_pets", + "rel": "parallel" }, { - "source": "paper:world_models", - "target": "paper:drivedreamer", + "source": "paper:mbrl_pets", + "target": "paper:dreamer_v2", "rel": "prereq" }, { - "source": "paper:world_models", - "target": "paper:2512.24426", - "rel": "prereq" + "source": "move:learn_world_model_then_plan_in_latent_imagination", + "target": "paper:world_models", + "rel": "manifests" }, { - "source": "paper:rlhf_dpo", - "target": "paper:2512.24426", - "rel": "prereq" + "source": "move:learn_world_model_then_plan_in_latent_imagination", + "target": "paper:dreamer_v2", + "rel": "manifests" }, { - "source": "paper:transfuser", - "target": "paper:2210.14222", - "rel": "feeds" + "source": "move:learn_world_model_then_plan_in_latent_imagination", + "target": "paper:dreamer_v3", + "rel": "manifests" }, { - "source": "paper:vadv2", - "target": "paper:2402.12289", - "rel": "parallel" + "source": "move:learn_world_model_then_plan_in_latent_imagination", + "target": "paper:mile_driving", + "rel": "manifests" }, { - "source": "paper:mamba", - "target": "paper:vaswani2017", - "rel": "contrasts" + "source": "move:plan_with_mcts_in_learned_model", + "target": "paper:muzero", + "rel": "manifests" }, { - "source": "paper:diffuser", - "target": "paper:vadv2", - "rel": "parallel" + "source": "move:plan_with_mcts_in_learned_model", + "target": "paper:silver2017_alphazero", + "rel": "extends" }, { - "source": "paper:lingo2", - "target": "paper:2402.12289", - "rel": "parallel" + "source": "move:discrete_latent_state_for_world_model", + "target": "paper:dreamer_v2", + "rel": "manifests" }, { - "source": "paper:tesla_ai_day", - "target": "paper:2212.10156", - "rel": "parallel" + "source": "move:discrete_latent_state_for_world_model", + "target": "paper:dreamer_v3", + "rel": "manifests" }, { - "source": "paper:ad_benchmarks", - "target": "paper:2212.10156", - "rel": "covers" + "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", + "target": "paper:iris_world_model", + "rel": "manifests" }, { - "source": "paper:ad_benchmarks", - "target": "paper:2402.12289", - "rel": "covers" + "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", + "target": "paper:gaia1", + "rel": "manifests" }, { - "source": "paper:ad_benchmarks", - "target": "paper:2210.14222", - "rel": "covers" + "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", + "target": "paper:drivedreamer", + "rel": "manifests" + }, + { + "source": "move:replace_explicit_critic_with_diffusion_score", + "target": "paper:diffusion_policy_chi2023", + "rel": "manifests" + }, + { + "source": "move:replace_explicit_critic_with_diffusion_score", + "target": "paper:diffuser", + "rel": "manifests" + }, + { + "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", + "target": "paper:mnih2015_dqn", + "rel": "manifests" }, { - "source": "paper:ad_benchmarks", - "target": "paper:2311.10813", - "rel": "covers" + "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", + "target": "paper:sac", + "rel": "manifests" }, { - "source": "paper:ad_benchmarks", - "target": "paper:2309.16292", - "rel": "covers" + "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", + "target": "paper:td3", + "rel": "manifests" }, { - "source": "paper:mnih2015_dqn", - "target": "concept:dqn", - "rel": "covers" + "source": "move:add_entropy_bonus_to_encourage_exploration", + "target": "paper:sac", + "rel": "manifests" }, { - "source": "paper:mnih2015_dqn", - "target": "concept:replay_buffer", - "rel": "covers" + "source": "move:add_entropy_bonus_to_encourage_exploration", + "target": "paper:schulman2017_ppo", + "rel": "manifests" }, { - "source": "paper:schulman2017_ppo", - "target": "concept:ppo", - "rel": "covers" + "source": "move:add_entropy_bonus_to_encourage_exploration", + "target": "paper:a3c_a2c", + "rel": "manifests" }, { - "source": "paper:schulman2017_ppo", - "target": "concept:actor_critic", - "rel": "covers" + "source": "move:turn_offline_dataset_into_supervised_sequence_prediction", + "target": "paper:decision_transformer", + "rel": "manifests" }, { - "source": "paper:ross2011_dagger", - "target": "concept:imitation_learning", - "rel": "covers" + "source": "move:turn_offline_dataset_into_supervised_sequence_prediction", + "target": "paper:trajectory_transformer", + "rel": "manifests" }, { - "source": "paper:ross2011_dagger", - "target": "concept:covariate_shift", - "rel": "covers" + "source": "move:replace_value_function_with_implicit_max_via_expectile", + "target": "paper:iql", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "concept:transformer", - "rel": "covers" + "source": "move:use_pretrained_language_model_as_action_prior", + "target": "paper:codetraj", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "concept:self_attention", - "rel": "covers" + "source": "move:use_pretrained_language_model_as_action_prior", + "target": "paper:2311.10813", + "rel": "manifests" }, { - "source": "paper:carion2020", - "target": "concept:detr_query", - "rel": "covers" + "source": "move:use_pretrained_language_model_as_action_prior", + "target": "paper:2309.16292", + "rel": "manifests" }, { - "source": "paper:li2022bevformer", - "target": "concept:bev", - "rel": "covers" + "source": "move:add_lagrangian_safety_constraint_to_actor_critic", + "target": "paper:lagrangian_safe_rl", + "rel": "manifests" }, { - "source": "paper:llava", - "target": "concept:vlm", - "rel": "covers" + "source": "move:add_lagrangian_safety_constraint_to_actor_critic", + "target": "paper:cpo_safe_rl", + "rel": "manifests" }, { - "source": "paper:2402.12289", - "target": "concept:vla", - "rel": "covers" + "source": "move:treat_planning_as_conditional_generation", + "target": "paper:diffuser", + "rel": "manifests" }, { - "source": "paper:2402.12289", - "target": "concept:cot", - "rel": "covers" + "source": "move:treat_planning_as_conditional_generation", + "target": "paper:diffusion_policy_chi2023", + "rel": "manifests" }, { - "source": "paper:2311.10813", - "target": "concept:tool_use", - "rel": "covers" + "source": "move:treat_planning_as_conditional_generation", + "target": "paper:diffusion_planner", + "rel": "manifests" }, { - "source": "paper:2309.16292", - "target": "concept:cot", - "rel": "covers" + "source": "move:treat_planning_as_conditional_generation", + "target": "paper:vadv2", + "rel": "manifests" }, { - "source": "paper:2512.24426", - "target": "concept:counterfactual", - "rel": "covers" + "source": "move:cast_continuous_action_as_discretized_token_sequence", + "target": "paper:trajectory_transformer", + "rel": "manifests" }, { - "source": "paper:2512.24426", - "target": "concept:meta_action", - "rel": "covers" + "source": "move:cast_continuous_action_as_discretized_token_sequence", + "target": "paper:trajeglish", + "rel": "manifests" }, { - "source": "paper:dinov2", - "target": "concept:ssl", - "rel": "covers" + "source": "move:cast_continuous_action_as_discretized_token_sequence", + "target": "paper:most_simagents", + "rel": "manifests" }, { - "source": "paper:2508.10104", - "target": "concept:ssl", - "rel": "covers" + "source": "move:use_n_step_returns_to_trade_bias_for_variance", + "target": "paper:mnih2015_dqn", + "rel": "enables" }, { - "source": "paper:2307.01694", - "target": "concept:spiking_nn", - "rel": "covers" + "source": "move:use_n_step_returns_to_trade_bias_for_variance", + "target": "paper:schulman2017_ppo", + "rel": "enables" }, { - "source": "paper:rlhf_dpo", - "target": "concept:rlhf", - "rel": "covers" + "source": "move:add_intrinsic_motivation_via_novelty_or_curiosity", + "target": "paper:pebble", + "rel": "enables" }, { - "source": "essay:bitter_lesson", - "target": "concept:scaling_vs_knowledge", - "rel": "covers" + "source": "move:apply_gae_to_smooth_advantage_estimation", + "target": "paper:schulman2017_ppo", + "rel": "manifests" }, { - "source": "lab:lab01", - "target": "course:zhao_rl", - "rel": "implements" + "source": "move:apply_gae_to_smooth_advantage_estimation", + "target": "paper:a3c_a2c", + "rel": "manifests" }, { - "source": "lab:lab01", - "target": "concept:value_iteration", - "rel": "implements" + "source": "move:apply_gae_to_smooth_advantage_estimation", + "target": "paper:openai_five", + "rel": "manifests" }, { - "source": "lab:lab02", - "target": "course:cs285", - "rel": "implements" + "source": "move:use_prioritized_replay_buffer", + "target": "paper:mnih2015_dqn", + "rel": "enables" }, { - "source": "lab:lab02", - "target": "paper:ross2011_dagger", - "rel": "implements" + "source": "move:cotrain_dynamics_model_with_policy_to_share_representations", + "target": "paper:mile_driving", + "rel": "manifests" }, { - "source": "lab:lab03", - "target": "paper:2212.10156", - "rel": "implements" + "source": "move:cotrain_dynamics_model_with_policy_to_share_representations", + "target": "paper:dreamer_v2", + "rel": "manifests" }, { - "source": "lab:lab04", - "target": "paper:2210.14222", - "rel": "implements" + "source": "move:warm_start_rl_with_imitation_then_anneal", + "target": "paper:alphastar", + "rel": "manifests" }, { - "source": "lab:lab05", - "target": "paper:2508.10104", - "rel": "implements" + "source": "move:warm_start_rl_with_imitation_then_anneal", + "target": "paper:roach", + "rel": "manifests" }, { - "source": "lab:lab06", - "target": "paper:2307.01694", - "rel": "implements" + "source": "move:warm_start_rl_with_imitation_then_anneal", + "target": "paper:calql", + "rel": "manifests" }, { - "source": "lab:lab07", - "target": "paper:2309.16292", - "rel": "implements" + "source": "move:double_q_to_reduce_overestimation", + "target": "paper:td3", + "rel": "manifests" }, { - "source": "lab:lab08", - "target": "paper:2311.10813", - "rel": "implements" + "source": "move:double_q_to_reduce_overestimation", + "target": "paper:sac", + "rel": "manifests" }, { - "source": "lab:lab09", - "target": "paper:2402.12289", - "rel": "implements" + "source": "move:double_q_to_reduce_overestimation", + "target": "paper:redq", + "rel": "extends" }, { - "source": "lab:lab10", - "target": "paper:2512.24426", - "rel": "implements" + "source": "move:expert_iteration_self_distillation", + "target": "paper:silver2017_alphazero", + "rel": "manifests" }, { - "source": "paper:silver2017_alphazero", + "source": "move:expert_iteration_self_distillation", "target": "paper:muzero", - "rel": "extends" + "rel": "manifests" }, { - "source": "paper:world_models", - "target": "paper:dreamer_v2", - "rel": "prereq" + "source": "move:distill_privileged_teacher_to_sensor_student", + "target": "paper:roach", + "rel": "manifests" }, { - "source": "paper:dreamer_v2", - "target": "paper:dreamer_v3", - "rel": "extends" + "source": "move:distill_privileged_teacher_to_sensor_student", + "target": "paper:transfuser", + "rel": "manifests" }, { - "source": "paper:world_models", - "target": "paper:iris_world_model", - "rel": "prereq" + "source": "move:trust_region_step_for_monotonic_improvement", + "target": "paper:schulman2017_ppo", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "paper:iris_world_model", - "rel": "prereq" + "source": "move:trust_region_step_for_monotonic_improvement", + "target": "paper:cpo_safe_rl", + "rel": "manifests" }, { - "source": "paper:iris_world_model", - "target": "paper:dreamer_v3", - "rel": "parallel" + "source": "move:trust_region_step_for_monotonic_improvement", + "target": "paper:mpo", + "rel": "manifests" }, { - "source": "paper:muzero", - "target": "paper:dreamer_v3", - "rel": "parallel" + "source": "move:expectile_or_quantile_target_for_distributional_robustness", + "target": "paper:iql", + "rel": "manifests" }, { - "source": "paper:mnih2015_dqn", + "source": "move:hindsight_experience_relabeling", "target": "paper:sac", - "rel": "prereq" + "rel": "enables" }, { - "source": "paper:mnih2015_dqn", - "target": "paper:td3", - "rel": "prereq" + "source": "move:safety_shield_filters_unsafe_actions", + "target": "paper:shielded_rl", + "rel": "manifests" }, { - "source": "paper:sac", - "target": "paper:redq", - "rel": "extends" + "source": "move:reward_model_from_pairwise_human_preferences", + "target": "paper:rlhf_dpo", + "rel": "manifests" }, { - "source": "paper:td3", - "target": "paper:sac", - "rel": "parallel" + "source": "move:reward_model_from_pairwise_human_preferences", + "target": "paper:pebble", + "rel": "manifests" }, { - "source": "paper:mnih2015_dqn", - "target": "paper:a3c_a2c", - "rel": "parallel" + "source": "move:reward_model_from_pairwise_human_preferences", + "target": "paper:bpref", + "rel": "manifests" }, { - "source": "paper:a3c_a2c", - "target": "paper:impala", - "rel": "extends" + "source": "move:guided_sampling_through_classifier_gradients_at_inference", + "target": "paper:diffuser", + "rel": "manifests" }, { - "source": "paper:schulman2017_ppo", - "target": "paper:openai_five", - "rel": "feeds" + "source": "move:guided_sampling_through_classifier_gradients_at_inference", + "target": "paper:diffusion_planner", + "rel": "manifests" }, { - "source": "paper:a3c_a2c", - "target": "paper:alphastar", - "rel": "feeds" + "source": "move:plan_via_cross_entropy_method_on_dynamics_model", + "target": "paper:mbrl_pets", + "rel": "manifests" }, { - "source": "paper:sac", - "target": "paper:mpo", + "source": "move:plan_via_cross_entropy_method_on_dynamics_model", + "target": "paper:muzero", "rel": "parallel" }, { - "source": "paper:schulman2017_ppo", - "target": "paper:mpo", - "rel": "contrasts" + "source": "move:two_stage_coarse_to_fine_trajectory", + "target": "paper:thinktwice", + "rel": "manifests" }, { - "source": "paper:gpt3", - "target": "paper:decision_transformer", - "rel": "prereq" + "source": "move:two_stage_coarse_to_fine_trajectory", + "target": "paper:vadv2", + "rel": "manifests" }, { - "source": "paper:decision_transformer", - "target": "paper:trajectory_transformer", - "rel": "parallel" + "source": "move:league_play_for_policy_diversity", + "target": "paper:alphastar", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "paper:trajectory_transformer", - "rel": "prereq" + "source": "move:learn_world_model_then_plan_in_latent_imagination", + "target": "move:plan_with_mcts_in_learned_model", + "rel": "composes" }, { - "source": "paper:diffuser", - "target": "paper:diffusion_policy_chi2023", - "rel": "parallel" + "source": "move:discrete_latent_state_for_world_model", + "target": "move:learn_world_model_then_plan_in_latent_imagination", + "rel": "composes" }, { - "source": "paper:diffuser", - "target": "paper:diffusion_planner", - "rel": "extends" + "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", + "target": "move:learn_world_model_then_plan_in_latent_imagination", + "rel": "composes" }, { - "source": "paper:cql", - "target": "paper:iql", - "rel": "extends" + "source": "move:cast_continuous_action_as_discretized_token_sequence", + "target": "move:turn_offline_dataset_into_supervised_sequence_prediction", + "rel": "composes" }, { - "source": "paper:cql", - "target": "paper:calql", - "rel": "extends" + "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", + "target": "move:double_q_to_reduce_overestimation", + "rel": "composes" }, { - "source": "paper:iql", - "target": "paper:calql", - "rel": "parallel" + "source": "move:add_entropy_bonus_to_encourage_exploration", + "target": "move:double_q_to_reduce_overestimation", + "rel": "composes" }, { - "source": "paper:cpo_safe_rl", - "target": "paper:lagrangian_safe_rl", - "rel": "parallel" + "source": "move:trust_region_step_for_monotonic_improvement", + "target": "move:add_lagrangian_safety_constraint_to_actor_critic", + "rel": "composes" }, { - "source": "paper:lagrangian_safe_rl", - "target": "paper:shielded_rl", - "rel": "contrasts" + "source": "move:treat_planning_as_conditional_generation", + "target": "move:guided_sampling_through_classifier_gradients_at_inference", + "rel": "composes" }, { - "source": "paper:rlhf_dpo", - "target": "paper:pebble", - "rel": "parallel" + "source": "move:use_pretrained_language_model_as_action_prior", + "target": "move:treat_planning_as_conditional_generation", + "rel": "composes" }, { - "source": "paper:pebble", - "target": "paper:bpref", - "rel": "parallel" + "source": "move:reward_model_from_pairwise_human_preferences", + "target": "move:warm_start_rl_with_imitation_then_anneal", + "rel": "composes" }, { - "source": "paper:rlhf_dpo", - "target": "paper:bpref", - "rel": "parallel" + "source": "move:distill_privileged_teacher_to_sensor_student", + "target": "move:warm_start_rl_with_imitation_then_anneal", + "rel": "composes" }, { - "source": "paper:lqr_classic", - "target": "paper:ilqr_classic", - "rel": "prereq" + "source": "problem:behavior_cloning_compounds_errors_over_time", + "target": "paper:ross2011_dagger", + "rel": "motivates" }, { - "source": "paper:ilqr_classic", - "target": "paper:cilqr", - "rel": "extends" + "source": "problem:behavior_cloning_compounds_errors_over_time", + "target": "paper:roach", + "rel": "motivates" }, { - "source": "paper:lqr_classic", - "target": "paper:mpc_book", - "rel": "prereq" + "source": "problem:behavior_cloning_compounds_errors_over_time", + "target": "paper:transfuser", + "rel": "motivates" }, { - "source": "paper:mpc_book", - "target": "paper:cilqr", - "rel": "parallel" + "source": "problem:distributional_shift_between_offline_data_and_deployment", + "target": "paper:cql", + "rel": "motivates" }, { - "source": "paper:transfuser", - "target": "paper:interfuser", - "rel": "parallel" + "source": "problem:distributional_shift_between_offline_data_and_deployment", + "target": "paper:iql", + "rel": "motivates" }, { - "source": "paper:transfuser", - "target": "paper:roach", - "rel": "parallel" + "source": "problem:distributional_shift_between_offline_data_and_deployment", + "target": "paper:calql", + "rel": "motivates" }, { - "source": "paper:roach", - "target": "paper:thinktwice", - "rel": "feeds" + "source": "problem:distributional_shift_between_offline_data_and_deployment", + "target": "paper:decision_transformer", + "rel": "motivates" }, { - "source": "paper:interfuser", - "target": "paper:thinktwice", - "rel": "parallel" + "source": "problem:long_horizon_credit_assignment_in_driving", + "target": "paper:muzero", + "rel": "motivates" }, { - "source": "paper:world_models", - "target": "paper:mile_driving", - "rel": "prereq" + "source": "problem:long_horizon_credit_assignment_in_driving", + "target": "paper:diffuser", + "rel": "motivates" }, { - "source": "paper:dreamer_v2", - "target": "paper:mile_driving", - "rel": "parallel" + "source": "problem:reward_specification_for_safe_polite_driving", + "target": "paper:rlhf_dpo", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:nuplan_baselines", - "rel": "covers" + "source": "problem:reward_specification_for_safe_polite_driving", + "target": "paper:pebble", + "rel": "motivates" }, { - "source": "paper:nuplan_baselines", - "target": "paper:interfuser", - "rel": "contrasts" + "source": "problem:exploration_in_safety_critical_systems", + "target": "paper:cpo_safe_rl", + "rel": "motivates" }, { - "source": "paper:vaswani2017", - "target": "paper:trajeglish", - "rel": "prereq" + "source": "problem:exploration_in_safety_critical_systems", + "target": "paper:lagrangian_safe_rl", + "rel": "motivates" }, { - "source": "paper:trajeglish", - "target": "paper:most_simagents", - "rel": "parallel" + "source": "problem:exploration_in_safety_critical_systems", + "target": "paper:shielded_rl", + "rel": "motivates" }, { - "source": "paper:gpt3", - "target": "paper:codetraj", - "rel": "prereq" + "source": "problem:closed_loop_simulation_fidelity_gap", + "target": "paper:trajeglish", + "rel": "motivates" }, { - "source": "paper:2311.10813", - "target": "paper:codetraj", - "rel": "parallel" + "source": "problem:closed_loop_simulation_fidelity_gap", + "target": "paper:most_simagents", + "rel": "motivates" }, { - "source": "paper:world_models", - "target": "paper:mbrl_pets", - "rel": "parallel" + "source": "problem:closed_loop_simulation_fidelity_gap", + "target": "paper:gaia1", + "rel": "motivates" }, { - "source": "paper:mbrl_pets", - "target": "paper:dreamer_v2", - "rel": "prereq" + "source": "problem:closed_loop_simulation_fidelity_gap", + "target": "paper:drivedreamer", + "rel": "motivates" }, { - "source": "move:learn_world_model_then_plan_in_latent_imagination", - "target": "paper:world_models", - "rel": "manifests" + "source": "problem:multi_agent_interaction_modeling_in_dense_traffic", + "target": "paper:trajeglish", + "rel": "motivates" }, { - "source": "move:learn_world_model_then_plan_in_latent_imagination", - "target": "paper:dreamer_v2", - "rel": "manifests" + "source": "problem:multi_agent_interaction_modeling_in_dense_traffic", + "target": "paper:nuplan_baselines", + "rel": "motivates" }, { - "source": "move:learn_world_model_then_plan_in_latent_imagination", - "target": "paper:dreamer_v3", - "rel": "manifests" + "source": "problem:planning_horizon_vs_compute_budget_tradeoff", + "target": "paper:cilqr", + "rel": "motivates" }, { - "source": "move:learn_world_model_then_plan_in_latent_imagination", - "target": "paper:mile_driving", - "rel": "manifests" + "source": "problem:planning_horizon_vs_compute_budget_tradeoff", + "target": "paper:mpc_book", + "rel": "motivates" }, { - "source": "move:plan_with_mcts_in_learned_model", - "target": "paper:muzero", - "rel": "manifests" + "source": "problem:rare_event_evaluation_with_no_ground_truth", + "target": "paper:nuplan_baselines", + "rel": "motivates" }, { - "source": "move:plan_with_mcts_in_learned_model", - "target": "paper:silver2017_alphazero", - "rel": "extends" + "source": "problem:rare_event_evaluation_with_no_ground_truth", + "target": "paper:ad_benchmarks", + "rel": "motivates" }, { - "source": "move:discrete_latent_state_for_world_model", - "target": "paper:dreamer_v2", - "rel": "manifests" + "source": "problem:reward_hacking_in_learned_objectives", + "target": "paper:bpref", + "rel": "motivates" }, { - "source": "move:discrete_latent_state_for_world_model", - "target": "paper:dreamer_v3", - "rel": "manifests" + "source": "problem:reward_hacking_in_learned_objectives", + "target": "paper:rlhf_dpo", + "rel": "motivates" }, { - "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", - "target": "paper:iris_world_model", + "source": "problem:behavior_cloning_compounds_errors_over_time", + "target": "insight:imitation_learning_alone_cannot_recover_from_compounding_errors", "rel": "manifests" }, { - "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", - "target": "paper:gaia1", + "source": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", + "target": "paper:world_models", "rel": "manifests" }, { - "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", - "target": "paper:drivedreamer", + "source": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", + "target": "paper:dreamer_v3", "rel": "manifests" }, { - "source": "move:replace_explicit_critic_with_diffusion_score", - "target": "paper:diffusion_policy_chi2023", + "source": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", + "target": "paper:muzero", "rel": "manifests" }, { - "source": "move:replace_explicit_critic_with_diffusion_score", - "target": "paper:diffuser", + "source": "insight:human_demonstrations_compress_implicit_reward_function", + "target": "paper:diffusion_policy_chi2023", "rel": "manifests" }, { - "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", - "target": "paper:mnih2015_dqn", + "source": "insight:human_demonstrations_compress_implicit_reward_function", + "target": "paper:ross2011_dagger", "rel": "manifests" }, { - "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", - "target": "paper:sac", + "source": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", + "target": "paper:cpo_safe_rl", "rel": "manifests" }, { - "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", - "target": "paper:td3", + "source": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", + "target": "paper:lagrangian_safe_rl", "rel": "manifests" }, { - "source": "move:add_entropy_bonus_to_encourage_exploration", - "target": "paper:sac", + "source": "insight:offline_rl_is_actually_constrained_dynamic_programming", + "target": "paper:cql", "rel": "manifests" }, { - "source": "move:add_entropy_bonus_to_encourage_exploration", - "target": "paper:schulman2017_ppo", + "source": "insight:offline_rl_is_actually_constrained_dynamic_programming", + "target": "paper:iql", "rel": "manifests" }, { - "source": "move:add_entropy_bonus_to_encourage_exploration", - "target": "paper:a3c_a2c", + "source": "insight:offline_rl_is_actually_constrained_dynamic_programming", + "target": "paper:calql", "rel": "manifests" }, { - "source": "move:turn_offline_dataset_into_supervised_sequence_prediction", + "source": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", "target": "paper:decision_transformer", "rel": "manifests" }, { - "source": "move:turn_offline_dataset_into_supervised_sequence_prediction", + "source": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", "target": "paper:trajectory_transformer", "rel": "manifests" }, { - "source": "move:replace_value_function_with_implicit_max_via_expectile", - "target": "paper:iql", + "source": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", + "target": "paper:trajeglish", "rel": "manifests" }, { - "source": "move:use_pretrained_language_model_as_action_prior", - "target": "paper:codetraj", + "source": "insight:bigger_model_plus_more_data_beats_clever_priors", + "target": "paper:openai_five", "rel": "manifests" }, { - "source": "move:use_pretrained_language_model_as_action_prior", - "target": "paper:2311.10813", + "source": "insight:bigger_model_plus_more_data_beats_clever_priors", + "target": "paper:alphastar", "rel": "manifests" }, { - "source": "move:use_pretrained_language_model_as_action_prior", - "target": "paper:2309.16292", - "rel": "manifests" + "source": "insight:bigger_model_plus_more_data_beats_clever_priors", + "target": "essay:bitter_lesson", + "rel": "extends" }, { - "source": "move:add_lagrangian_safety_constraint_to_actor_critic", - "target": "paper:lagrangian_safe_rl", + "source": "insight:control_theory_and_rl_meet_in_optimal_control", + "target": "paper:lqr_classic", "rel": "manifests" }, { - "source": "move:add_lagrangian_safety_constraint_to_actor_critic", - "target": "paper:cpo_safe_rl", + "source": "insight:control_theory_and_rl_meet_in_optimal_control", + "target": "paper:mpc_book", "rel": "manifests" }, { - "source": "move:treat_planning_as_conditional_generation", - "target": "paper:diffuser", + "source": "insight:control_theory_and_rl_meet_in_optimal_control", + "target": "concept:bellman_eq", "rel": "manifests" }, { - "source": "move:treat_planning_as_conditional_generation", - "target": "paper:diffusion_policy_chi2023", + "source": "paradigm:model_based_rl", + "target": "paper:world_models", "rel": "manifests" }, { - "source": "move:treat_planning_as_conditional_generation", - "target": "paper:diffusion_planner", + "source": "paradigm:model_based_rl", + "target": "paper:dreamer_v2", "rel": "manifests" }, { - "source": "move:treat_planning_as_conditional_generation", - "target": "paper:vadv2", + "source": "paradigm:model_based_rl", + "target": "paper:dreamer_v3", "rel": "manifests" }, { - "source": "move:cast_continuous_action_as_discretized_token_sequence", - "target": "paper:trajectory_transformer", + "source": "paradigm:model_based_rl", + "target": "paper:muzero", "rel": "manifests" }, { - "source": "move:cast_continuous_action_as_discretized_token_sequence", - "target": "paper:trajeglish", + "source": "paradigm:model_based_rl", + "target": "paper:iris_world_model", "rel": "manifests" }, { - "source": "move:cast_continuous_action_as_discretized_token_sequence", - "target": "paper:most_simagents", + "source": "paradigm:model_based_rl", + "target": "paper:mbrl_pets", "rel": "manifests" }, { - "source": "move:use_n_step_returns_to_trade_bias_for_variance", + "source": "paradigm:model_free_rl", "target": "paper:mnih2015_dqn", - "rel": "enables" + "rel": "manifests" }, { - "source": "move:use_n_step_returns_to_trade_bias_for_variance", + "source": "paradigm:model_free_rl", "target": "paper:schulman2017_ppo", - "rel": "enables" + "rel": "manifests" }, { - "source": "move:add_intrinsic_motivation_via_novelty_or_curiosity", - "target": "paper:pebble", - "rel": "enables" + "source": "paradigm:model_free_rl", + "target": "paper:sac", + "rel": "manifests" }, { - "source": "move:apply_gae_to_smooth_advantage_estimation", - "target": "paper:schulman2017_ppo", + "source": "paradigm:model_free_rl", + "target": "paper:td3", "rel": "manifests" }, { - "source": "move:apply_gae_to_smooth_advantage_estimation", + "source": "paradigm:model_free_rl", "target": "paper:a3c_a2c", "rel": "manifests" }, { - "source": "move:apply_gae_to_smooth_advantage_estimation", - "target": "paper:openai_five", + "source": "paradigm:model_free_rl", + "target": "paper:impala", "rel": "manifests" }, { - "source": "move:use_prioritized_replay_buffer", - "target": "paper:mnih2015_dqn", - "rel": "enables" - }, - { - "source": "move:cotrain_dynamics_model_with_policy_to_share_representations", - "target": "paper:mile_driving", + "source": "paradigm:offline_rl", + "target": "paper:cql", "rel": "manifests" }, { - "source": "move:cotrain_dynamics_model_with_policy_to_share_representations", - "target": "paper:dreamer_v2", + "source": "paradigm:offline_rl", + "target": "paper:iql", "rel": "manifests" }, { - "source": "move:warm_start_rl_with_imitation_then_anneal", - "target": "paper:alphastar", + "source": "paradigm:offline_rl", + "target": "paper:calql", "rel": "manifests" }, { - "source": "move:warm_start_rl_with_imitation_then_anneal", - "target": "paper:roach", + "source": "paradigm:offline_rl", + "target": "paper:decision_transformer", "rel": "manifests" }, { - "source": "move:warm_start_rl_with_imitation_then_anneal", - "target": "paper:calql", + "source": "paradigm:imitation_learning", + "target": "paper:ross2011_dagger", "rel": "manifests" }, { - "source": "move:double_q_to_reduce_overestimation", - "target": "paper:td3", + "source": "paradigm:imitation_learning", + "target": "paper:diffusion_policy_chi2023", "rel": "manifests" }, { - "source": "move:double_q_to_reduce_overestimation", - "target": "paper:sac", + "source": "paradigm:imitation_learning", + "target": "paper:transfuser", "rel": "manifests" }, { - "source": "move:double_q_to_reduce_overestimation", - "target": "paper:redq", - "rel": "extends" - }, - { - "source": "move:expert_iteration_self_distillation", - "target": "paper:silver2017_alphazero", + "source": "paradigm:imitation_learning", + "target": "paper:interfuser", "rel": "manifests" }, { - "source": "move:expert_iteration_self_distillation", - "target": "paper:muzero", + "source": "paradigm:optimal_control", + "target": "paper:lqr_classic", "rel": "manifests" }, { - "source": "move:distill_privileged_teacher_to_sensor_student", - "target": "paper:roach", + "source": "paradigm:optimal_control", + "target": "paper:ilqr_classic", "rel": "manifests" }, { - "source": "move:distill_privileged_teacher_to_sensor_student", - "target": "paper:transfuser", + "source": "paradigm:optimal_control", + "target": "paper:cilqr", "rel": "manifests" }, { - "source": "move:trust_region_step_for_monotonic_improvement", - "target": "paper:schulman2017_ppo", + "source": "paradigm:optimal_control", + "target": "paper:mpc_book", "rel": "manifests" }, { - "source": "move:trust_region_step_for_monotonic_improvement", + "source": "paradigm:safe_rl", "target": "paper:cpo_safe_rl", "rel": "manifests" }, { - "source": "move:trust_region_step_for_monotonic_improvement", - "target": "paper:mpo", + "source": "paradigm:safe_rl", + "target": "paper:lagrangian_safe_rl", "rel": "manifests" }, { - "source": "move:expectile_or_quantile_target_for_distributional_robustness", - "target": "paper:iql", + "source": "paradigm:safe_rl", + "target": "paper:shielded_rl", "rel": "manifests" }, { - "source": "move:hindsight_experience_relabeling", - "target": "paper:sac", - "rel": "enables" + "source": "paradigm:sequence_modeling_for_decision", + "target": "paper:decision_transformer", + "rel": "manifests" }, { - "source": "move:safety_shield_filters_unsafe_actions", - "target": "paper:shielded_rl", + "source": "paradigm:sequence_modeling_for_decision", + "target": "paper:trajectory_transformer", "rel": "manifests" }, { - "source": "move:reward_model_from_pairwise_human_preferences", - "target": "paper:rlhf_dpo", + "source": "paradigm:sequence_modeling_for_decision", + "target": "paper:trajeglish", "rel": "manifests" }, { - "source": "move:reward_model_from_pairwise_human_preferences", - "target": "paper:pebble", + "source": "paradigm:sequence_modeling_for_decision", + "target": "paper:most_simagents", "rel": "manifests" }, { - "source": "move:reward_model_from_pairwise_human_preferences", - "target": "paper:bpref", + "source": "paradigm:sequence_modeling_for_decision", + "target": "paper:codetraj", "rel": "manifests" }, { - "source": "move:guided_sampling_through_classifier_gradients_at_inference", - "target": "paper:diffuser", - "rel": "manifests" + "source": "paradigm:model_free_rl", + "target": "paradigm:model_based_rl", + "rel": "contrasts" + }, + { + "source": "paradigm:imitation_learning", + "target": "paradigm:model_free_rl", + "rel": "contrasts" }, { - "source": "move:guided_sampling_through_classifier_gradients_at_inference", - "target": "paper:diffusion_planner", - "rel": "manifests" + "source": "paradigm:offline_rl", + "target": "paradigm:model_free_rl", + "rel": "contrasts" }, { - "source": "move:plan_via_cross_entropy_method_on_dynamics_model", - "target": "paper:mbrl_pets", - "rel": "manifests" + "source": "paradigm:sequence_modeling_for_decision", + "target": "paradigm:model_free_rl", + "rel": "contrasts" }, { - "source": "move:plan_via_cross_entropy_method_on_dynamics_model", - "target": "paper:muzero", + "source": "paradigm:optimal_control", + "target": "paradigm:model_free_rl", "rel": "parallel" }, { - "source": "move:two_stage_coarse_to_fine_trajectory", - "target": "paper:thinktwice", - "rel": "manifests" + "source": "paradigm:safe_rl", + "target": "paradigm:model_free_rl", + "rel": "extends" }, { - "source": "move:two_stage_coarse_to_fine_trajectory", - "target": "paper:vadv2", - "rel": "manifests" + "source": "problem:reward_hacking_in_learned_objectives", + "target": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", + "rel": "motivates" }, { - "source": "move:league_play_for_policy_diversity", - "target": "paper:alphastar", - "rel": "manifests" + "source": "problem:long_horizon_credit_assignment_in_driving", + "target": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", + "rel": "motivates" }, { - "source": "move:learn_world_model_then_plan_in_latent_imagination", - "target": "move:plan_with_mcts_in_learned_model", - "rel": "composes" + "source": "problem:distributional_shift_between_offline_data_and_deployment", + "target": "insight:offline_rl_is_actually_constrained_dynamic_programming", + "rel": "motivates" }, { - "source": "move:discrete_latent_state_for_world_model", - "target": "move:learn_world_model_then_plan_in_latent_imagination", - "rel": "composes" + "source": "concept:mdp", + "target": "paradigm:optimal_control", + "rel": "prereq" }, { - "source": "move:tokenize_pixel_frames_for_autoregressive_world_model", - "target": "move:learn_world_model_then_plan_in_latent_imagination", - "rel": "composes" + "source": "concept:bellman_eq", + "target": "paradigm:model_based_rl", + "rel": "prereq" }, { - "source": "move:cast_continuous_action_as_discretized_token_sequence", - "target": "move:turn_offline_dataset_into_supervised_sequence_prediction", - "rel": "composes" + "source": "concept:bellman_eq", + "target": "paradigm:model_free_rl", + "rel": "prereq" }, { - "source": "move:bootstrap_target_network_to_stabilize_off_policy_learning", - "target": "move:double_q_to_reduce_overestimation", - "rel": "composes" + "source": "concept:imitation_learning", + "target": "paradigm:imitation_learning", + "rel": "prereq" }, { - "source": "move:add_entropy_bonus_to_encourage_exploration", - "target": "move:double_q_to_reduce_overestimation", - "rel": "composes" + "source": "concept:replay_buffer", + "target": "move:use_prioritized_replay_buffer", + "rel": "prereq" }, { - "source": "move:trust_region_step_for_monotonic_improvement", - "target": "move:add_lagrangian_safety_constraint_to_actor_critic", - "rel": "composes" + "source": "concept:actor_critic", + "target": "paper:sac", + "rel": "prereq" }, { - "source": "move:treat_planning_as_conditional_generation", - "target": "move:guided_sampling_through_classifier_gradients_at_inference", - "rel": "composes" + "source": "concept:actor_critic", + "target": "paper:a3c_a2c", + "rel": "prereq" }, { - "source": "move:use_pretrained_language_model_as_action_prior", - "target": "move:treat_planning_as_conditional_generation", - "rel": "composes" + "source": "concept:actor_critic", + "target": "paper:mpo", + "rel": "prereq" }, { - "source": "move:reward_model_from_pairwise_human_preferences", - "target": "move:warm_start_rl_with_imitation_then_anneal", - "rel": "composes" + "source": "concept:actor_critic", + "target": "move:add_lagrangian_safety_constraint_to_actor_critic", + "rel": "prereq" }, { - "source": "move:distill_privileged_teacher_to_sensor_student", - "target": "move:warm_start_rl_with_imitation_then_anneal", - "rel": "composes" + "source": "paper:gpt3", + "target": "paper:gpt4", + "rel": "extends" }, { - "source": "problem:behavior_cloning_compounds_errors_over_time", - "target": "paper:ross2011_dagger", - "rel": "motivates" + "source": "paper:gpt4", + "target": "paper:gpt4v", + "rel": "extends" }, { - "source": "problem:behavior_cloning_compounds_errors_over_time", - "target": "paper:roach", - "rel": "motivates" + "source": "paper:gpt4", + "target": "paper:claude", + "rel": "parallel" }, { - "source": "problem:behavior_cloning_compounds_errors_over_time", - "target": "paper:transfuser", - "rel": "motivates" + "source": "paper:gpt4", + "target": "paper:gemini", + "rel": "parallel" }, { - "source": "problem:distributional_shift_between_offline_data_and_deployment", - "target": "paper:cql", - "rel": "motivates" + "source": "paper:gpt3", + "target": "paper:llama", + "rel": "parallel" }, { - "source": "problem:distributional_shift_between_offline_data_and_deployment", - "target": "paper:iql", - "rel": "motivates" + "source": "paper:llama", + "target": "paper:mistral", + "rel": "extends" }, { - "source": "problem:distributional_shift_between_offline_data_and_deployment", - "target": "paper:calql", - "rel": "motivates" + "source": "paper:llama", + "target": "paper:qwen", + "rel": "parallel" }, { - "source": "problem:distributional_shift_between_offline_data_and_deployment", - "target": "paper:decision_transformer", - "rel": "motivates" + "source": "paper:gpt3", + "target": "paper:instructgpt", + "rel": "extends" }, { - "source": "problem:long_horizon_credit_assignment_in_driving", - "target": "paper:muzero", - "rel": "motivates" + "source": "paper:instructgpt", + "target": "paper:gpt4", + "rel": "feeds" }, { - "source": "problem:long_horizon_credit_assignment_in_driving", - "target": "paper:diffuser", - "rel": "motivates" + "source": "paper:instructgpt", + "target": "paper:rlhf_dpo", + "rel": "covers" }, { - "source": "problem:reward_specification_for_safe_polite_driving", - "target": "paper:rlhf_dpo", - "rel": "motivates" + "source": "paper:rlhf_dpo", + "target": "paper:constitutional_ai", + "rel": "parallel" }, { - "source": "problem:reward_specification_for_safe_polite_driving", - "target": "paper:pebble", - "rel": "motivates" + "source": "paper:constitutional_ai", + "target": "paper:claude", + "rel": "feeds" }, { - "source": "problem:exploration_in_safety_critical_systems", - "target": "paper:cpo_safe_rl", - "rel": "motivates" + "source": "paper:gpt3", + "target": "paper:cot_wei2022", + "rel": "extends" }, { - "source": "problem:exploration_in_safety_critical_systems", - "target": "paper:lagrangian_safe_rl", - "rel": "motivates" + "source": "paper:cot_wei2022", + "target": "paper:self_consistency", + "rel": "extends" }, { - "source": "problem:exploration_in_safety_critical_systems", - "target": "paper:shielded_rl", - "rel": "motivates" + "source": "paper:cot_wei2022", + "target": "paper:react", + "rel": "extends" }, { - "source": "problem:closed_loop_simulation_fidelity_gap", - "target": "paper:trajeglish", - "rel": "motivates" + "source": "paper:cot_wei2022", + "target": "paper:tot", + "rel": "extends" }, { - "source": "problem:closed_loop_simulation_fidelity_gap", - "target": "paper:most_simagents", - "rel": "motivates" + "source": "paper:react", + "target": "paper:reflexion", + "rel": "extends" }, { - "source": "problem:closed_loop_simulation_fidelity_gap", - "target": "paper:gaia1", - "rel": "motivates" + "source": "paper:react", + "target": "paper:toolformer", + "rel": "parallel" }, { - "source": "problem:closed_loop_simulation_fidelity_gap", - "target": "paper:drivedreamer", - "rel": "motivates" + "source": "paper:react", + "target": "paper:voyager", + "rel": "feeds" }, { - "source": "problem:multi_agent_interaction_modeling_in_dense_traffic", - "target": "paper:trajeglish", - "rel": "motivates" + "source": "paper:reflexion", + "target": "paper:voyager", + "rel": "feeds" }, { - "source": "problem:multi_agent_interaction_modeling_in_dense_traffic", - "target": "paper:nuplan_baselines", - "rel": "motivates" + "source": "paper:tot", + "target": "paper:verifier", + "rel": "parallel" }, { - "source": "problem:planning_horizon_vs_compute_budget_tradeoff", - "target": "paper:cilqr", - "rel": "motivates" + "source": "paper:self_consistency", + "target": "paper:verifier", + "rel": "parallel" }, { - "source": "problem:planning_horizon_vs_compute_budget_tradeoff", - "target": "paper:mpc_book", - "rel": "motivates" + "source": "paper:debate", + "target": "paper:verifier", + "rel": "parallel" }, { - "source": "problem:rare_event_evaluation_with_no_ground_truth", - "target": "paper:nuplan_baselines", - "rel": "motivates" + "source": "paper:react", + "target": "paper:swiftsage", + "rel": "feeds" }, { - "source": "problem:rare_event_evaluation_with_no_ground_truth", - "target": "paper:ad_benchmarks", - "rel": "motivates" + "source": "paper:llava", + "target": "paper:flamingo", + "rel": "parallel" + }, + { + "source": "paper:flamingo", + "target": "paper:llava", + "rel": "prereq" }, { - "source": "problem:reward_hacking_in_learned_objectives", - "target": "paper:bpref", - "rel": "motivates" + "source": "paper:llava", + "target": "paper:internvl", + "rel": "extends" }, { - "source": "problem:reward_hacking_in_learned_objectives", - "target": "paper:rlhf_dpo", - "rel": "motivates" + "source": "paper:llava", + "target": "paper:cambrian", + "rel": "extends" }, { - "source": "problem:behavior_cloning_compounds_errors_over_time", - "target": "insight:imitation_learning_alone_cannot_recover_from_compounding_errors", - "rel": "manifests" + "source": "paper:florence", + "target": "paper:cambrian", + "rel": "parallel" }, { - "source": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", - "target": "paper:world_models", - "rel": "manifests" + "source": "paper:qwen", + "target": "paper:internvl", + "rel": "parallel" }, { - "source": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", - "target": "paper:dreamer_v3", - "rel": "manifests" + "source": "paper:llava", + "target": "paper:palme", + "rel": "parallel" }, { - "source": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", - "target": "paper:muzero", - "rel": "manifests" + "source": "paper:palme", + "target": "paper:rt2", + "rel": "prereq" }, { - "source": "insight:human_demonstrations_compress_implicit_reward_function", - "target": "paper:diffusion_policy_chi2023", - "rel": "manifests" + "source": "paper:rt1", + "target": "paper:rt2", + "rel": "extends" }, { - "source": "insight:human_demonstrations_compress_implicit_reward_function", - "target": "paper:ross2011_dagger", - "rel": "manifests" + "source": "paper:rt2", + "target": "paper:rtx", + "rel": "extends" }, { - "source": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", - "target": "paper:cpo_safe_rl", - "rel": "manifests" + "source": "paper:rtx", + "target": "paper:openvla", + "rel": "feeds" }, { - "source": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", - "target": "paper:lagrangian_safe_rl", - "rel": "manifests" + "source": "paper:rt2", + "target": "paper:openvla", + "rel": "extends" }, { - "source": "insight:offline_rl_is_actually_constrained_dynamic_programming", - "target": "paper:cql", - "rel": "manifests" + "source": "paper:rtx", + "target": "paper:octo", + "rel": "feeds" }, { - "source": "insight:offline_rl_is_actually_constrained_dynamic_programming", - "target": "paper:iql", - "rel": "manifests" + "source": "paper:openvla", + "target": "paper:octo", + "rel": "parallel" }, { - "source": "insight:offline_rl_is_actually_constrained_dynamic_programming", - "target": "paper:calql", - "rel": "manifests" + "source": "paper:vaswani2017", + "target": "paper:dit", + "rel": "prereq" }, { - "source": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", - "target": "paper:decision_transformer", - "rel": "manifests" + "source": "paper:dit", + "target": "paper:sora", + "rel": "prereq" }, { - "source": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", - "target": "paper:trajectory_transformer", - "rel": "manifests" + "source": "paper:dit", + "target": "paper:svd", + "rel": "prereq" }, { - "source": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", - "target": "paper:trajeglish", - "rel": "manifests" + "source": "paper:svd", + "target": "paper:sora", + "rel": "parallel" }, { - "source": "insight:bigger_model_plus_more_data_beats_clever_priors", - "target": "paper:openai_five", - "rel": "manifests" + "source": "paper:sora", + "target": "paper:veo", + "rel": "parallel" }, { - "source": "insight:bigger_model_plus_more_data_beats_clever_priors", - "target": "paper:alphastar", - "rel": "manifests" + "source": "paper:sora", + "target": "paper:cosmos", + "rel": "feeds" }, { - "source": "insight:bigger_model_plus_more_data_beats_clever_priors", - "target": "essay:bitter_lesson", - "rel": "extends" + "source": "paper:svd", + "target": "paper:cosmos", + "rel": "prereq" }, { - "source": "insight:control_theory_and_rl_meet_in_optimal_control", - "target": "paper:lqr_classic", - "rel": "manifests" + "source": "paper:gaia1", + "target": "paper:cosmos", + "rel": "parallel" }, { - "source": "insight:control_theory_and_rl_meet_in_optimal_control", - "target": "paper:mpc_book", - "rel": "manifests" + "source": "paper:drivedreamer", + "target": "paper:cosmos", + "rel": "parallel" }, { - "source": "insight:control_theory_and_rl_meet_in_optimal_control", - "target": "concept:bellman_eq", - "rel": "manifests" + "source": "paper:gaia1", + "target": "paper:prism1", + "rel": "parallel" }, { - "source": "paradigm:model_based_rl", - "target": "paper:world_models", - "rel": "manifests" + "source": "paper:drivedreamer", + "target": "paper:prism1", + "rel": "parallel" }, { - "source": "paradigm:model_based_rl", - "target": "paper:dreamer_v2", - "rel": "manifests" + "source": "paper:2402.12289", + "target": "paper:senna", + "rel": "parallel" }, { - "source": "paradigm:model_based_rl", - "target": "paper:dreamer_v3", - "rel": "manifests" + "source": "paper:gemini", + "target": "paper:emma", + "rel": "prereq" }, { - "source": "paradigm:model_based_rl", - "target": "paper:muzero", - "rel": "manifests" + "source": "paper:rt2", + "target": "paper:emma", + "rel": "parallel" }, { - "source": "paradigm:model_based_rl", - "target": "paper:iris_world_model", - "rel": "manifests" + "source": "paper:2402.12289", + "target": "paper:emma", + "rel": "parallel" }, { - "source": "paradigm:model_based_rl", - "target": "paper:mbrl_pets", - "rel": "manifests" + "source": "paper:2311.10813", + "target": "paper:drivemlm", + "rel": "parallel" }, { - "source": "paradigm:model_free_rl", - "target": "paper:mnih2015_dqn", - "rel": "manifests" + "source": "paper:2402.12289", + "target": "paper:drivemlm", + "rel": "parallel" }, { - "source": "paradigm:model_free_rl", - "target": "paper:schulman2017_ppo", - "rel": "manifests" + "source": "paper:llava", + "target": "paper:drivelm", + "rel": "feeds" }, { - "source": "paradigm:model_free_rl", - "target": "paper:sac", - "rel": "manifests" + "source": "paper:drivelm", + "target": "paper:senna", + "rel": "feeds" }, { - "source": "paradigm:model_free_rl", - "target": "paper:td3", - "rel": "manifests" + "source": "paper:gpt_driver", + "target": "paper:lmdrive", + "rel": "parallel" }, { - "source": "paradigm:model_free_rl", - "target": "paper:a3c_a2c", - "rel": "manifests" + "source": "paper:gpt3", + "target": "paper:gpt_driver", + "rel": "feeds" }, { - "source": "paradigm:model_free_rl", - "target": "paper:impala", - "rel": "manifests" + "source": "paper:llama", + "target": "paper:lmdrive", + "rel": "feeds" }, { - "source": "paradigm:offline_rl", - "target": "paper:cql", - "rel": "manifests" + "source": "paper:lingo2", + "target": "paper:senna", + "rel": "parallel" }, { - "source": "paradigm:offline_rl", - "target": "paper:iql", - "rel": "manifests" + "source": "paper:lingo2", + "target": "paper:prism1", + "rel": "parallel" }, { - "source": "paradigm:offline_rl", - "target": "paper:calql", - "rel": "manifests" + "source": "paper:emma", + "target": "paper:2512.24426", + "rel": "parallel" }, { - "source": "paradigm:offline_rl", - "target": "paper:decision_transformer", - "rel": "manifests" + "source": "paper:senna", + "target": "paper:2512.24426", + "rel": "feeds" }, { - "source": "paradigm:imitation_learning", - "target": "paper:ross2011_dagger", - "rel": "manifests" + "source": "paper:cosmos", + "target": "paper:2512.24426", + "rel": "feeds" }, { - "source": "paradigm:imitation_learning", - "target": "paper:diffusion_policy_chi2023", - "rel": "manifests" + "source": "paper:openvla", + "target": "paper:2512.24426", + "rel": "parallel" }, { - "source": "paradigm:imitation_learning", - "target": "paper:transfuser", - "rel": "manifests" + "source": "paper:gpt3", + "target": "move:scale_data_then_let_emergent_capabilities_appear", + "rel": "covers" }, { - "source": "paradigm:imitation_learning", - "target": "paper:interfuser", - "rel": "manifests" + "source": "paper:sora", + "target": "move:scale_data_then_let_emergent_capabilities_appear", + "rel": "covers" + }, + { + "source": "essay:bitter_lesson", + "target": "move:scale_data_then_let_emergent_capabilities_appear", + "rel": "covers" }, { - "source": "paradigm:optimal_control", - "target": "paper:lqr_classic", - "rel": "manifests" + "source": "paper:llava", + "target": "move:pretrain_with_contrastive_alignment_between_modalities", + "rel": "covers" }, { - "source": "paradigm:optimal_control", - "target": "paper:ilqr_classic", - "rel": "manifests" + "source": "paper:flamingo", + "target": "move:pretrain_with_contrastive_alignment_between_modalities", + "rel": "covers" }, { - "source": "paradigm:optimal_control", - "target": "paper:cilqr", - "rel": "manifests" + "source": "paper:instructgpt", + "target": "move:fine_tune_with_instruction_data_then_align_with_preferences", + "rel": "covers" }, { - "source": "paradigm:optimal_control", - "target": "paper:mpc_book", - "rel": "manifests" + "source": "paper:constitutional_ai", + "target": "move:fine_tune_with_instruction_data_then_align_with_preferences", + "rel": "covers" }, { - "source": "paradigm:safe_rl", - "target": "paper:cpo_safe_rl", - "rel": "manifests" + "source": "paper:rlhf_dpo", + "target": "move:fine_tune_with_instruction_data_then_align_with_preferences", + "rel": "covers" }, { - "source": "paradigm:safe_rl", - "target": "paper:lagrangian_safe_rl", - "rel": "manifests" + "source": "paper:llava", + "target": "move:plug_in_modality_encoder_to_frozen_language_model_via_projection", + "rel": "covers" }, { - "source": "paradigm:safe_rl", - "target": "paper:shielded_rl", - "rel": "manifests" + "source": "paper:flamingo", + "target": "move:plug_in_modality_encoder_to_frozen_language_model_via_projection", + "rel": "covers" }, { - "source": "paradigm:sequence_modeling_for_decision", - "target": "paper:decision_transformer", - "rel": "manifests" + "source": "paper:palme", + "target": "move:plug_in_modality_encoder_to_frozen_language_model_via_projection", + "rel": "covers" }, { - "source": "paradigm:sequence_modeling_for_decision", - "target": "paper:trajectory_transformer", - "rel": "manifests" + "source": "paper:react", + "target": "move:wrap_language_model_with_tool_calling_loop", + "rel": "covers" }, { - "source": "paradigm:sequence_modeling_for_decision", - "target": "paper:trajeglish", - "rel": "manifests" + "source": "paper:toolformer", + "target": "move:wrap_language_model_with_tool_calling_loop", + "rel": "covers" }, { - "source": "paradigm:sequence_modeling_for_decision", - "target": "paper:most_simagents", - "rel": "manifests" + "source": "paper:2311.10813", + "target": "move:wrap_language_model_with_tool_calling_loop", + "rel": "covers" }, { - "source": "paradigm:sequence_modeling_for_decision", - "target": "paper:codetraj", - "rel": "manifests" + "source": "paper:reflexion", + "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "rel": "covers" }, { - "source": "paradigm:model_free_rl", - "target": "paradigm:model_based_rl", - "rel": "contrasts" + "source": "paper:2309.16292", + "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "rel": "covers" }, { - "source": "paradigm:imitation_learning", - "target": "paradigm:model_free_rl", - "rel": "contrasts" + "source": "paper:voyager", + "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "rel": "covers" }, { - "source": "paradigm:offline_rl", - "target": "paradigm:model_free_rl", - "rel": "contrasts" + "source": "paper:rt2", + "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "rel": "covers" }, { - "source": "paradigm:sequence_modeling_for_decision", - "target": "paradigm:model_free_rl", - "rel": "contrasts" + "source": "paper:openvla", + "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "rel": "covers" }, { - "source": "paradigm:optimal_control", - "target": "paradigm:model_free_rl", - "rel": "parallel" + "source": "paper:gpt_driver", + "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "rel": "covers" }, { - "source": "paradigm:safe_rl", - "target": "paradigm:model_free_rl", - "rel": "extends" + "source": "paper:emma", + "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "rel": "covers" }, { - "source": "problem:reward_hacking_in_learned_objectives", - "target": "insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping", - "rel": "motivates" + "source": "paper:2512.24426", + "target": "move:augment_supervised_training_with_counterfactual_or_synthetic_data", + "rel": "covers" }, { - "source": "problem:long_horizon_credit_assignment_in_driving", - "target": "insight:world_model_as_inner_simulator_unlocks_long_horizon_planning", - "rel": "motivates" + "source": "paper:cosmos", + "target": "move:augment_supervised_training_with_counterfactual_or_synthetic_data", + "rel": "covers" }, { - "source": "problem:distributional_shift_between_offline_data_and_deployment", - "target": "insight:offline_rl_is_actually_constrained_dynamic_programming", - "rel": "motivates" + "source": "paper:drivedreamer", + "target": "move:augment_supervised_training_with_counterfactual_or_synthetic_data", + "rel": "covers" }, { - "source": "concept:mdp", - "target": "paradigm:optimal_control", - "rel": "prereq" + "source": "paper:gaia1", + "target": "move:condition_video_generative_model_on_control_action_for_world_model", + "rel": "covers" }, { - "source": "concept:bellman_eq", - "target": "paradigm:model_based_rl", - "rel": "prereq" + "source": "paper:drivedreamer", + "target": "move:condition_video_generative_model_on_control_action_for_world_model", + "rel": "covers" }, { - "source": "concept:bellman_eq", - "target": "paradigm:model_free_rl", - "rel": "prereq" + "source": "paper:cosmos", + "target": "move:condition_video_generative_model_on_control_action_for_world_model", + "rel": "covers" }, { - "source": "concept:imitation_learning", - "target": "paradigm:imitation_learning", - "rel": "prereq" + "source": "paper:voyager", + "target": "move:use_retrieval_augmented_memory_to_extend_context", + "rel": "covers" }, { - "source": "concept:replay_buffer", - "target": "move:use_prioritized_replay_buffer", - "rel": "prereq" + "source": "paper:2309.16292", + "target": "move:use_retrieval_augmented_memory_to_extend_context", + "rel": "covers" }, { - "source": "concept:actor_critic", - "target": "paper:sac", - "rel": "prereq" + "source": "paper:tot", + "target": "move:cast_reasoning_as_search_over_thought_tree", + "rel": "covers" }, { - "source": "concept:actor_critic", - "target": "paper:a3c_a2c", - "rel": "prereq" + "source": "paper:silver2017_alphazero", + "target": "move:cast_reasoning_as_search_over_thought_tree", + "rel": "covers" }, { - "source": "concept:actor_critic", - "target": "paper:mpo", - "rel": "prereq" + "source": "paper:verifier", + "target": "move:cast_reasoning_as_search_over_thought_tree", + "rel": "covers" }, { - "source": "concept:actor_critic", - "target": "move:add_lagrangian_safety_constraint_to_actor_critic", - "rel": "prereq" + "source": "paper:rt2", + "target": "move:co_finetune_language_model_with_action_data_jointly", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:gpt4", - "rel": "extends" + "source": "paper:openvla", + "target": "move:co_finetune_language_model_with_action_data_jointly", + "rel": "covers" }, { - "source": "paper:gpt4", - "target": "paper:gpt4v", - "rel": "extends" + "source": "paper:emma", + "target": "move:co_finetune_language_model_with_action_data_jointly", + "rel": "covers" }, { - "source": "paper:gpt4", - "target": "paper:claude", - "rel": "parallel" + "source": "paper:silver2017_alphazero", + "target": "move:use_self_play_to_generate_unlimited_training_signal", + "rel": "covers" }, { - "source": "paper:gpt4", - "target": "paper:gemini", - "rel": "parallel" + "source": "paper:debate", + "target": "move:use_self_play_to_generate_unlimited_training_signal", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:llama", - "rel": "parallel" + "source": "paper:constitutional_ai", + "target": "move:use_self_play_to_generate_unlimited_training_signal", + "rel": "covers" }, { - "source": "paper:llama", - "target": "paper:mistral", - "rel": "extends" + "source": "paper:gpt4", + "target": "move:distill_large_model_into_specialist_for_deployment", + "rel": "covers" }, { - "source": "paper:llama", - "target": "paper:qwen", - "rel": "parallel" + "source": "paper:2402.12289", + "target": "move:distill_large_model_into_specialist_for_deployment", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:instructgpt", - "rel": "extends" + "source": "paper:senna", + "target": "move:distill_large_model_into_specialist_for_deployment", + "rel": "covers" }, { - "source": "paper:instructgpt", - "target": "paper:gpt4", - "rel": "feeds" + "source": "paper:gaia1", + "target": "move:rewrite_continuous_video_as_token_sequence_for_transformer_world_model", + "rel": "covers" }, { - "source": "paper:instructgpt", - "target": "paper:rlhf_dpo", + "source": "paper:cosmos", + "target": "move:rewrite_continuous_video_as_token_sequence_for_transformer_world_model", "rel": "covers" }, { - "source": "paper:rlhf_dpo", - "target": "paper:constitutional_ai", - "rel": "parallel" + "source": "paper:senna", + "target": "move:condition_on_language_meta_action_then_emit_low_level_action", + "rel": "covers" }, { - "source": "paper:constitutional_ai", - "target": "paper:claude", - "rel": "feeds" + "source": "paper:2402.12289", + "target": "move:condition_on_language_meta_action_then_emit_low_level_action", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:cot_wei2022", - "rel": "extends" + "source": "paper:2512.24426", + "target": "move:condition_on_language_meta_action_then_emit_low_level_action", + "rel": "covers" }, { - "source": "paper:cot_wei2022", - "target": "paper:self_consistency", - "rel": "extends" + "source": "paper:gpt4", + "target": "move:cache_kv_state_to_amortize_long_context", + "rel": "covers" }, { - "source": "paper:cot_wei2022", - "target": "paper:react", - "rel": "extends" + "source": "paper:mistral", + "target": "move:cache_kv_state_to_amortize_long_context", + "rel": "covers" }, { - "source": "paper:cot_wei2022", - "target": "paper:tot", - "rel": "extends" + "source": "paper:mistral", + "target": "move:speculative_decoding_with_draft_model", + "rel": "covers" }, { - "source": "paper:react", - "target": "paper:reflexion", - "rel": "extends" + "source": "paper:gemini", + "target": "move:speculative_decoding_with_draft_model", + "rel": "covers" }, { - "source": "paper:react", - "target": "paper:toolformer", - "rel": "parallel" + "source": "paper:llava", + "target": "move:freeze_visual_encoder_and_only_train_connector", + "rel": "covers" }, { - "source": "paper:react", - "target": "paper:voyager", - "rel": "feeds" + "source": "paper:flamingo", + "target": "move:freeze_visual_encoder_and_only_train_connector", + "rel": "covers" }, { - "source": "paper:reflexion", - "target": "paper:voyager", - "rel": "feeds" + "source": "paper:cambrian", + "target": "move:freeze_visual_encoder_and_only_train_connector", + "rel": "covers" }, { - "source": "paper:tot", - "target": "paper:verifier", - "rel": "parallel" + "source": "paper:octo", + "target": "move:use_diffusion_head_for_continuous_action", + "rel": "covers" }, { - "source": "paper:self_consistency", - "target": "paper:verifier", - "rel": "parallel" + "source": "paper:diffuser", + "target": "move:use_diffusion_head_for_continuous_action", + "rel": "covers" }, { - "source": "paper:debate", - "target": "paper:verifier", - "rel": "parallel" + "source": "paper:gpt_driver", + "target": "move:treat_planning_as_autoregressive_trajectory_generation", + "rel": "covers" }, { - "source": "paper:react", - "target": "paper:swiftsage", - "rel": "feeds" + "source": "paper:emma", + "target": "move:treat_planning_as_autoregressive_trajectory_generation", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:flamingo", + "source": "paper:vadv2", + "target": "move:treat_planning_as_autoregressive_trajectory_generation", "rel": "parallel" }, { - "source": "paper:flamingo", - "target": "paper:llava", - "rel": "prereq" + "source": "paper:2512.24426", + "target": "move:use_world_model_rollout_as_critic_for_policy", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:internvl", - "rel": "extends" + "source": "paper:world_models", + "target": "move:use_world_model_rollout_as_critic_for_policy", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:cambrian", - "rel": "extends" + "source": "paper:cosmos", + "target": "move:use_world_model_rollout_as_critic_for_policy", + "rel": "covers" }, { - "source": "paper:florence", - "target": "paper:cambrian", - "rel": "parallel" + "source": "paper:voyager", + "target": "move:long_horizon_via_hierarchical_subgoal", + "rel": "covers" }, { - "source": "paper:qwen", - "target": "paper:internvl", - "rel": "parallel" + "source": "paper:swiftsage", + "target": "move:long_horizon_via_hierarchical_subgoal", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:palme", - "rel": "parallel" + "source": "paper:2311.10813", + "target": "move:long_horizon_via_hierarchical_subgoal", + "rel": "covers" }, { - "source": "paper:palme", - "target": "paper:rt2", - "rel": "prereq" + "source": "paper:debate", + "target": "move:prompt_chain_with_explicit_persona_roles", + "rel": "covers" }, { - "source": "paper:rt1", - "target": "paper:rt2", - "rel": "extends" + "source": "paper:swiftsage", + "target": "move:prompt_chain_with_explicit_persona_roles", + "rel": "covers" }, { - "source": "paper:rt2", - "target": "paper:rtx", - "rel": "extends" + "source": "paper:2311.10813", + "target": "move:prompt_chain_with_explicit_persona_roles", + "rel": "covers" }, { - "source": "paper:rtx", - "target": "paper:openvla", - "rel": "feeds" + "source": "paper:2512.24426", + "target": "move:contrast_corner_case_against_normal_case_in_training", + "rel": "covers" }, { - "source": "paper:rt2", - "target": "paper:openvla", - "rel": "extends" + "source": "paper:cosmos", + "target": "move:contrast_corner_case_against_normal_case_in_training", + "rel": "covers" }, { - "source": "paper:rtx", - "target": "paper:octo", - "rel": "feeds" + "source": "paper:ad_benchmarks", + "target": "move:evaluate_open_loop_then_close_loop_for_realism", + "rel": "covers" }, { - "source": "paper:openvla", - "target": "paper:octo", - "rel": "parallel" + "source": "paper:drivemlm", + "target": "move:evaluate_open_loop_then_close_loop_for_realism", + "rel": "covers" }, { - "source": "paper:vaswani2017", - "target": "paper:dit", - "rel": "prereq" + "source": "paper:lmdrive", + "target": "move:evaluate_open_loop_then_close_loop_for_realism", + "rel": "covers" }, { - "source": "paper:dit", - "target": "paper:sora", - "rel": "prereq" + "source": "paper:drivelm", + "target": "move:use_language_explanation_as_auxiliary_supervision", + "rel": "covers" }, { - "source": "paper:dit", - "target": "paper:svd", - "rel": "prereq" + "source": "paper:lingo2", + "target": "move:use_language_explanation_as_auxiliary_supervision", + "rel": "covers" }, { - "source": "paper:svd", - "target": "paper:sora", - "rel": "parallel" + "source": "paper:emma", + "target": "move:use_language_explanation_as_auxiliary_supervision", + "rel": "covers" }, { - "source": "paper:sora", - "target": "paper:veo", - "rel": "parallel" + "source": "problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop", + "target": "paper:2512.24426", + "rel": "motivates" }, { - "source": "paper:sora", - "target": "paper:cosmos", - "rel": "feeds" + "source": "problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop", + "target": "paper:senna", + "rel": "motivates" }, { - "source": "paper:svd", - "target": "paper:cosmos", - "rel": "prereq" + "source": "problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop", + "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "rel": "motivates" }, { - "source": "paper:gaia1", - "target": "paper:cosmos", - "rel": "parallel" + "source": "problem:grounding_language_token_to_continuous_physical_world", + "target": "paper:rt2", + "rel": "motivates" }, { - "source": "paper:drivedreamer", - "target": "paper:cosmos", - "rel": "parallel" + "source": "problem:grounding_language_token_to_continuous_physical_world", + "target": "paper:palme", + "rel": "motivates" }, { - "source": "paper:gaia1", - "target": "paper:prism1", - "rel": "parallel" + "source": "problem:grounding_language_token_to_continuous_physical_world", + "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "rel": "motivates" }, { - "source": "paper:drivedreamer", - "target": "paper:prism1", - "rel": "parallel" + "source": "problem:latency_budget_for_large_model_in_realtime_control", + "target": "paper:2402.12289", + "rel": "motivates" }, { - "source": "paper:2402.12289", - "target": "paper:senna", - "rel": "parallel" + "source": "problem:latency_budget_for_large_model_in_realtime_control", + "target": "move:distill_large_model_into_specialist_for_deployment", + "rel": "motivates" }, { - "source": "paper:gemini", - "target": "paper:emma", - "rel": "prereq" + "source": "problem:latency_budget_for_large_model_in_realtime_control", + "target": "move:speculative_decoding_with_draft_model", + "rel": "motivates" }, { - "source": "paper:rt2", - "target": "paper:emma", - "rel": "parallel" + "source": "problem:latency_budget_for_large_model_in_realtime_control", + "target": "move:cache_kv_state_to_amortize_long_context", + "rel": "motivates" }, { - "source": "paper:2402.12289", - "target": "paper:emma", - "rel": "parallel" + "source": "problem:long_horizon_reasoning_with_finite_context_window", + "target": "paper:gemini", + "rel": "motivates" }, { - "source": "paper:2311.10813", - "target": "paper:drivemlm", - "rel": "parallel" + "source": "problem:long_horizon_reasoning_with_finite_context_window", + "target": "move:use_retrieval_augmented_memory_to_extend_context", + "rel": "motivates" }, { - "source": "paper:2402.12289", - "target": "paper:drivemlm", - "rel": "parallel" + "source": "problem:long_horizon_reasoning_with_finite_context_window", + "target": "move:long_horizon_via_hierarchical_subgoal", + "rel": "motivates" + }, + { + "source": "problem:zero_shot_generalization_to_unseen_driving_scenes", + "target": "paper:gpt4v", + "rel": "motivates" }, { - "source": "paper:llava", - "target": "paper:drivelm", - "rel": "feeds" + "source": "problem:zero_shot_generalization_to_unseen_driving_scenes", + "target": "paper:emma", + "rel": "motivates" }, { - "source": "paper:drivelm", - "target": "paper:senna", - "rel": "feeds" + "source": "problem:fine_grained_spatial_understanding_in_vision_language_model", + "target": "paper:cambrian", + "rel": "motivates" }, { - "source": "paper:gpt_driver", - "target": "paper:lmdrive", - "rel": "parallel" + "source": "problem:fine_grained_spatial_understanding_in_vision_language_model", + "target": "paper:florence", + "rel": "motivates" }, { - "source": "paper:gpt3", - "target": "paper:gpt_driver", - "rel": "feeds" + "source": "problem:counterfactual_reasoning_about_other_agents_intent", + "target": "paper:2512.24426", + "rel": "motivates" }, { - "source": "paper:llama", - "target": "paper:lmdrive", - "rel": "feeds" + "source": "problem:counterfactual_reasoning_about_other_agents_intent", + "target": "paper:gaia1", + "rel": "motivates" }, { - "source": "paper:lingo2", - "target": "paper:senna", - "rel": "parallel" + "source": "problem:counterfactual_reasoning_about_other_agents_intent", + "target": "move:use_world_model_rollout_as_critic_for_policy", + "rel": "motivates" }, { - "source": "paper:lingo2", - "target": "paper:prism1", - "rel": "parallel" + "source": "problem:open_world_corner_case_synthesis_for_training", + "target": "paper:cosmos", + "rel": "motivates" }, { - "source": "paper:emma", - "target": "paper:2512.24426", - "rel": "parallel" + "source": "problem:open_world_corner_case_synthesis_for_training", + "target": "paper:prism1", + "rel": "motivates" }, { - "source": "paper:senna", - "target": "paper:2512.24426", - "rel": "feeds" + "source": "problem:open_world_corner_case_synthesis_for_training", + "target": "move:contrast_corner_case_against_normal_case_in_training", + "rel": "motivates" }, { - "source": "paper:cosmos", - "target": "paper:2512.24426", - "rel": "feeds" + "source": "problem:evaluation_gap_between_offline_benchmark_and_closed_loop", + "target": "paper:drivemlm", + "rel": "motivates" }, { - "source": "paper:openvla", - "target": "paper:2512.24426", - "rel": "parallel" + "source": "problem:evaluation_gap_between_offline_benchmark_and_closed_loop", + "target": "paper:lmdrive", + "rel": "motivates" }, { - "source": "paper:gpt3", - "target": "move:scale_data_then_let_emergent_capabilities_appear", - "rel": "covers" + "source": "problem:evaluation_gap_between_offline_benchmark_and_closed_loop", + "target": "move:evaluate_open_loop_then_close_loop_for_realism", + "rel": "motivates" }, { - "source": "paper:sora", - "target": "move:scale_data_then_let_emergent_capabilities_appear", - "rel": "covers" + "source": "problem:catastrophic_forgetting_after_action_finetuning", + "target": "paper:rt2", + "rel": "motivates" }, { - "source": "essay:bitter_lesson", - "target": "move:scale_data_then_let_emergent_capabilities_appear", - "rel": "covers" + "source": "problem:catastrophic_forgetting_after_action_finetuning", + "target": "paper:openvla", + "rel": "motivates" }, { - "source": "paper:llava", - "target": "move:pretrain_with_contrastive_alignment_between_modalities", - "rel": "covers" + "source": "problem:catastrophic_forgetting_after_action_finetuning", + "target": "move:co_finetune_language_model_with_action_data_jointly", + "rel": "motivates" }, { - "source": "paper:flamingo", - "target": "move:pretrain_with_contrastive_alignment_between_modalities", + "source": "insight:language_is_compressed_world_model_for_human_priors", + "target": "paper:gpt3", "rel": "covers" }, { - "source": "paper:instructgpt", - "target": "move:fine_tune_with_instruction_data_then_align_with_preferences", + "source": "insight:language_is_compressed_world_model_for_human_priors", + "target": "paper:2309.16292", "rel": "covers" }, { - "source": "paper:constitutional_ai", - "target": "move:fine_tune_with_instruction_data_then_align_with_preferences", + "source": "insight:language_is_compressed_world_model_for_human_priors", + "target": "paper:palme", "rel": "covers" }, { - "source": "paper:rlhf_dpo", - "target": "move:fine_tune_with_instruction_data_then_align_with_preferences", + "source": "insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models", + "target": "paper:gpt4", "rel": "covers" }, { - "source": "paper:llava", - "target": "move:plug_in_modality_encoder_to_frozen_language_model_via_projection", + "source": "insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models", + "target": "essay:bitter_lesson", "rel": "covers" }, { - "source": "paper:flamingo", - "target": "move:plug_in_modality_encoder_to_frozen_language_model_via_projection", + "source": "insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models", + "target": "move:scale_data_then_let_emergent_capabilities_appear", "rel": "covers" }, { - "source": "paper:palme", - "target": "move:plug_in_modality_encoder_to_frozen_language_model_via_projection", + "source": "insight:world_model_video_diffusion_is_implicit_physics_engine", + "target": "paper:sora", "rel": "covers" }, { - "source": "paper:react", - "target": "move:wrap_language_model_with_tool_calling_loop", + "source": "insight:world_model_video_diffusion_is_implicit_physics_engine", + "target": "paper:gaia1", "rel": "covers" }, { - "source": "paper:toolformer", - "target": "move:wrap_language_model_with_tool_calling_loop", + "source": "insight:world_model_video_diffusion_is_implicit_physics_engine", + "target": "paper:cosmos", "rel": "covers" }, { - "source": "paper:2311.10813", - "target": "move:wrap_language_model_with_tool_calling_loop", + "source": "insight:agent_loop_is_just_iterated_conditional_generation", + "target": "paper:react", "rel": "covers" }, { - "source": "paper:reflexion", - "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "source": "insight:agent_loop_is_just_iterated_conditional_generation", + "target": "paper:reflexion", "rel": "covers" }, { - "source": "paper:2309.16292", - "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "source": "insight:agent_loop_is_just_iterated_conditional_generation", + "target": "paper:2311.10813", "rel": "covers" }, { - "source": "paper:voyager", - "target": "move:add_reflection_step_so_agent_critiques_its_own_output", + "source": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "target": "paper:toolformer", "rel": "covers" }, { - "source": "paper:rt2", - "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "source": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "target": "paper:react", "rel": "covers" }, { - "source": "paper:openvla", - "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "source": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "target": "paper:2311.10813", "rel": "covers" }, { - "source": "paper:gpt_driver", - "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "source": "insight:counterfactual_replanning_separates_intent_from_execution", + "target": "paper:2512.24426", "rel": "covers" }, { - "source": "paper:emma", - "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", + "source": "insight:counterfactual_replanning_separates_intent_from_execution", + "target": "move:use_world_model_rollout_as_critic_for_policy", "rel": "covers" }, { - "source": "paper:2512.24426", - "target": "move:augment_supervised_training_with_counterfactual_or_synthetic_data", + "source": "insight:foundation_model_decouples_perception_from_task_specific_training", + "target": "paper:dinov2", "rel": "covers" }, { - "source": "paper:cosmos", - "target": "move:augment_supervised_training_with_counterfactual_or_synthetic_data", + "source": "insight:foundation_model_decouples_perception_from_task_specific_training", + "target": "paper:sam", "rel": "covers" }, { - "source": "paper:drivedreamer", - "target": "move:augment_supervised_training_with_counterfactual_or_synthetic_data", + "source": "insight:foundation_model_decouples_perception_from_task_specific_training", + "target": "paper:florence", "rel": "covers" }, { - "source": "paper:gaia1", - "target": "move:condition_video_generative_model_on_control_action_for_world_model", + "source": "insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control", + "target": "paper:swiftsage", "rel": "covers" }, { - "source": "paper:drivedreamer", - "target": "move:condition_video_generative_model_on_control_action_for_world_model", + "source": "insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control", + "target": "paper:2402.12289", "rel": "covers" }, { - "source": "paper:cosmos", - "target": "move:condition_video_generative_model_on_control_action_for_world_model", + "source": "insight:emergent_planning_from_next_token_prediction_alone", + "target": "paper:gpt4", "rel": "covers" }, { - "source": "paper:voyager", - "target": "move:use_retrieval_augmented_memory_to_extend_context", + "source": "insight:emergent_planning_from_next_token_prediction_alone", + "target": "paper:cot_wei2022", "rel": "covers" }, { - "source": "paper:2309.16292", - "target": "move:use_retrieval_augmented_memory_to_extend_context", + "source": "insight:alignment_is_constraint_satisfaction_over_generation", + "target": "paper:instructgpt", "rel": "covers" }, { - "source": "paper:tot", - "target": "move:cast_reasoning_as_search_over_thought_tree", + "source": "insight:alignment_is_constraint_satisfaction_over_generation", + "target": "paper:rlhf_dpo", "rel": "covers" }, { - "source": "paper:silver2017_alphazero", - "target": "move:cast_reasoning_as_search_over_thought_tree", + "source": "insight:alignment_is_constraint_satisfaction_over_generation", + "target": "paper:constitutional_ai", "rel": "covers" }, { - "source": "paper:verifier", - "target": "move:cast_reasoning_as_search_over_thought_tree", + "source": "insight:open_weight_release_compounds_research_velocity", + "target": "paper:llama", "rel": "covers" }, { - "source": "paper:rt2", - "target": "move:co_finetune_language_model_with_action_data_jointly", + "source": "insight:open_weight_release_compounds_research_velocity", + "target": "paper:openvla", "rel": "covers" }, { - "source": "paper:openvla", - "target": "move:co_finetune_language_model_with_action_data_jointly", + "source": "insight:open_weight_release_compounds_research_velocity", + "target": "paper:qwen", "rel": "covers" }, { - "source": "paper:emma", - "target": "move:co_finetune_language_model_with_action_data_jointly", + "source": "insight:open_weight_release_compounds_research_velocity", + "target": "paper:mistral", "rel": "covers" }, { - "source": "paper:silver2017_alphazero", - "target": "move:use_self_play_to_generate_unlimited_training_signal", + "source": "paradigm:foundation_model_axis", + "target": "paper:gpt4", "rel": "covers" }, { - "source": "paper:debate", - "target": "move:use_self_play_to_generate_unlimited_training_signal", + "source": "paradigm:foundation_model_axis", + "target": "paper:llama", "rel": "covers" }, { - "source": "paper:constitutional_ai", - "target": "move:use_self_play_to_generate_unlimited_training_signal", + "source": "paradigm:foundation_model_axis", + "target": "paper:llava", "rel": "covers" }, { - "source": "paper:gpt4", - "target": "move:distill_large_model_into_specialist_for_deployment", - "rel": "covers" + "source": "paradigm:foundation_model_axis", + "target": "paradigm:vla_paradigm", + "rel": "parallel" }, { - "source": "paper:2402.12289", - "target": "move:distill_large_model_into_specialist_for_deployment", - "rel": "covers" + "source": "paradigm:foundation_model_axis", + "target": "paradigm:world_model_paradigm", + "rel": "parallel" }, { - "source": "paper:senna", - "target": "move:distill_large_model_into_specialist_for_deployment", - "rel": "covers" + "source": "paradigm:foundation_model_axis", + "target": "paradigm:llm_agent_paradigm", + "rel": "parallel" }, { - "source": "paper:gaia1", - "target": "move:rewrite_continuous_video_as_token_sequence_for_transformer_world_model", + "source": "paradigm:world_model_paradigm", + "target": "paper:world_models", "rel": "covers" }, { - "source": "paper:cosmos", - "target": "move:rewrite_continuous_video_as_token_sequence_for_transformer_world_model", + "source": "paradigm:world_model_paradigm", + "target": "paper:gaia1", "rel": "covers" }, { - "source": "paper:senna", - "target": "move:condition_on_language_meta_action_then_emit_low_level_action", + "source": "paradigm:world_model_paradigm", + "target": "paper:cosmos", "rel": "covers" }, { - "source": "paper:2402.12289", - "target": "move:condition_on_language_meta_action_then_emit_low_level_action", + "source": "paradigm:world_model_paradigm", + "target": "paper:sora", "rel": "covers" }, { - "source": "paper:2512.24426", - "target": "move:condition_on_language_meta_action_then_emit_low_level_action", + "source": "paradigm:llm_agent_paradigm", + "target": "paper:react", "rel": "covers" }, { - "source": "paper:gpt4", - "target": "move:cache_kv_state_to_amortize_long_context", + "source": "paradigm:llm_agent_paradigm", + "target": "paper:2311.10813", "rel": "covers" }, { - "source": "paper:mistral", - "target": "move:cache_kv_state_to_amortize_long_context", + "source": "paradigm:llm_agent_paradigm", + "target": "paper:2309.16292", "rel": "covers" }, { - "source": "paper:mistral", - "target": "move:speculative_decoding_with_draft_model", + "source": "paradigm:llm_agent_paradigm", + "target": "paper:voyager", "rel": "covers" }, { - "source": "paper:gemini", - "target": "move:speculative_decoding_with_draft_model", + "source": "paradigm:vla_paradigm", + "target": "paper:rt2", "rel": "covers" }, { - "source": "paper:llava", - "target": "move:freeze_visual_encoder_and_only_train_connector", + "source": "paradigm:vla_paradigm", + "target": "paper:openvla", "rel": "covers" }, { - "source": "paper:flamingo", - "target": "move:freeze_visual_encoder_and_only_train_connector", + "source": "paradigm:vla_paradigm", + "target": "paper:emma", "rel": "covers" }, { - "source": "paper:cambrian", - "target": "move:freeze_visual_encoder_and_only_train_connector", + "source": "paradigm:vla_paradigm", + "target": "paper:2512.24426", "rel": "covers" }, { - "source": "paper:octo", - "target": "move:use_diffusion_head_for_continuous_action", + "source": "paradigm:vla_paradigm", + "target": "paper:2402.12289", "rel": "covers" }, { - "source": "paper:diffuser", - "target": "move:use_diffusion_head_for_continuous_action", - "rel": "covers" + "source": "move:residual_connection", + "target": "insight:residual_learning_unlocks_arbitrary_depth", + "rel": "composes" }, { - "source": "paper:gpt_driver", - "target": "move:treat_planning_as_autoregressive_trajectory_generation", - "rel": "covers" + "source": "paper:he2015_resnet", + "target": "insight:residual_learning_unlocks_arbitrary_depth", + "rel": "composes" }, { - "source": "paper:emma", - "target": "move:treat_planning_as_autoregressive_trajectory_generation", - "rel": "covers" + "source": "move:masking_for_pretext", + "target": "insight:masked_prediction_yields_self_supervised_signal", + "rel": "composes" }, { - "source": "paper:vadv2", - "target": "move:treat_planning_as_autoregressive_trajectory_generation", - "rel": "parallel" + "source": "concept:ssl", + "target": "insight:masked_prediction_yields_self_supervised_signal", + "rel": "composes" }, { - "source": "paper:2512.24426", - "target": "move:use_world_model_rollout_as_critic_for_policy", - "rel": "covers" + "source": "move:cross_attention_query", + "target": "insight:attention_is_typed_entity_communication", + "rel": "composes" }, { - "source": "paper:world_models", - "target": "move:use_world_model_rollout_as_critic_for_policy", - "rel": "covers" + "source": "concept:self_attention", + "target": "insight:attention_is_typed_entity_communication", + "rel": "composes" }, { - "source": "paper:cosmos", - "target": "move:use_world_model_rollout_as_critic_for_policy", - "rel": "covers" + "source": "concept:detr_query", + "target": "insight:attention_is_typed_entity_communication", + "rel": "composes" }, { - "source": "paper:voyager", - "target": "move:long_horizon_via_hierarchical_subgoal", - "rel": "covers" + "source": "move:contrastive_alignment", + "target": "insight:contrastive_alignment_creates_zero_shot_transfer", + "rel": "composes" }, { - "source": "paper:swiftsage", - "target": "move:long_horizon_via_hierarchical_subgoal", - "rel": "covers" + "source": "concept:vlm", + "target": "insight:contrastive_alignment_creates_zero_shot_transfer", + "rel": "composes" }, { - "source": "paper:2311.10813", - "target": "move:long_horizon_via_hierarchical_subgoal", - "rel": "covers" + "source": "move:diffusion_denoise_sampling", + "target": "insight:diffusion_unifies_generation_and_decision", + "rel": "composes" }, { - "source": "paper:debate", - "target": "move:prompt_chain_with_explicit_persona_roles", - "rel": "covers" + "source": "paper:diffuser", + "target": "insight:diffusion_unifies_generation_and_decision", + "rel": "composes" }, { - "source": "paper:swiftsage", - "target": "move:prompt_chain_with_explicit_persona_roles", - "rel": "covers" + "source": "paper:2212.10156", + "target": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", + "rel": "composes" }, { - "source": "paper:2311.10813", - "target": "move:prompt_chain_with_explicit_persona_roles", - "rel": "covers" + "source": "essay:bitter_lesson", + "target": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", + "rel": "composes" }, { - "source": "paper:2512.24426", - "target": "move:contrast_corner_case_against_normal_case_in_training", - "rel": "covers" + "source": "move:dual_system_fast_slow", + "target": "insight:dual_system_handles_latency_quality_tradeoff", + "rel": "composes" }, { - "source": "paper:cosmos", - "target": "move:contrast_corner_case_against_normal_case_in_training", - "rel": "covers" + "source": "paper:2402.12289", + "target": "insight:dual_system_handles_latency_quality_tradeoff", + "rel": "composes" }, { - "source": "paper:ad_benchmarks", - "target": "move:evaluate_open_loop_then_close_loop_for_realism", - "rel": "covers" + "source": "concept:cot", + "target": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "rel": "composes" }, { - "source": "paper:drivemlm", - "target": "move:evaluate_open_loop_then_close_loop_for_realism", - "rel": "covers" + "source": "paper:2309.16292", + "target": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "rel": "composes" }, { - "source": "paper:lmdrive", - "target": "move:evaluate_open_loop_then_close_loop_for_realism", - "rel": "covers" + "source": "move:counterfactual_replan", + "target": "insight:long_tail_solved_by_synthesis_not_data_alone", + "rel": "composes" }, { - "source": "paper:drivelm", - "target": "move:use_language_explanation_as_auxiliary_supervision", - "rel": "covers" + "source": "paper:2512.24426", + "target": "insight:long_tail_solved_by_synthesis_not_data_alone", + "rel": "composes" }, { - "source": "paper:lingo2", - "target": "move:use_language_explanation_as_auxiliary_supervision", - "rel": "covers" + "source": "paper:drivedreamer", + "target": "insight:long_tail_solved_by_synthesis_not_data_alone", + "rel": "composes" }, { - "source": "paper:emma", - "target": "move:use_language_explanation_as_auxiliary_supervision", - "rel": "covers" + "source": "essay:bitter_lesson", + "target": "insight:scaling_laws_predict_capability_emergence", + "rel": "composes" }, { - "source": "problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop", - "target": "paper:2512.24426", - "rel": "motivates" + "source": "concept:scaling_vs_knowledge", + "target": "insight:scaling_laws_predict_capability_emergence", + "rel": "composes" }, { - "source": "problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop", - "target": "paper:senna", - "rel": "motivates" + "source": "paper:gpt3", + "target": "insight:scaling_laws_predict_capability_emergence", + "rel": "composes" }, { - "source": "problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop", - "target": "move:add_reflection_step_so_agent_critiques_its_own_output", - "rel": "motivates" + "source": "paper:dinov2", + "target": "insight:foundation_pretraining_decouples_data_from_task", + "rel": "composes" }, { - "source": "problem:grounding_language_token_to_continuous_physical_world", - "target": "paper:rt2", - "rel": "motivates" + "source": "paper:2508.10104", + "target": "insight:foundation_pretraining_decouples_data_from_task", + "rel": "composes" }, { - "source": "problem:grounding_language_token_to_continuous_physical_world", - "target": "paper:palme", - "rel": "motivates" + "source": "concept:ssl", + "target": "insight:foundation_pretraining_decouples_data_from_task", + "rel": "composes" }, { - "source": "problem:grounding_language_token_to_continuous_physical_world", - "target": "move:replace_explicit_action_head_with_tokenized_action_sequence", - "rel": "motivates" + "source": "move:self_play_with_search", + "target": "insight:test_time_compute_substitutes_train_time_via_search", + "rel": "composes" }, { - "source": "problem:latency_budget_for_large_model_in_realtime_control", - "target": "paper:2402.12289", - "rel": "motivates" + "source": "paper:silver2017_alphazero", + "target": "insight:test_time_compute_substitutes_train_time_via_search", + "rel": "composes" }, { - "source": "problem:latency_budget_for_large_model_in_realtime_control", - "target": "move:distill_large_model_into_specialist_for_deployment", - "rel": "motivates" + "source": "concept:imitation_learning", + "target": "insight:imitation_data_compresses_unspecified_reward", + "rel": "composes" }, { - "source": "problem:latency_budget_for_large_model_in_realtime_control", - "target": "move:speculative_decoding_with_draft_model", - "rel": "motivates" + "source": "paper:ross2011_dagger", + "target": "insight:imitation_data_compresses_unspecified_reward", + "rel": "composes" }, { - "source": "problem:latency_budget_for_large_model_in_realtime_control", - "target": "move:cache_kv_state_to_amortize_long_context", - "rel": "motivates" + "source": "move:latent_imagination_rollout", + "target": "insight:world_models_let_planning_be_done_in_imagination", + "rel": "composes" }, { - "source": "problem:long_horizon_reasoning_with_finite_context_window", - "target": "paper:gemini", - "rel": "motivates" + "source": "paper:world_models", + "target": "insight:world_models_let_planning_be_done_in_imagination", + "rel": "composes" }, { - "source": "problem:long_horizon_reasoning_with_finite_context_window", - "target": "move:use_retrieval_augmented_memory_to_extend_context", - "rel": "motivates" + "source": "paper:gaia1", + "target": "insight:world_models_let_planning_be_done_in_imagination", + "rel": "composes" }, { - "source": "problem:long_horizon_reasoning_with_finite_context_window", - "target": "move:long_horizon_via_hierarchical_subgoal", - "rel": "motivates" + "source": "move:tokenize_modalities", + "target": "insight:tokenization_collapses_modality_gap", + "rel": "composes" }, { - "source": "problem:zero_shot_generalization_to_unseen_driving_scenes", - "target": "paper:gpt4v", - "rel": "motivates" + "source": "move:patchify_tokenization", + "target": "insight:tokenization_collapses_modality_gap", + "rel": "composes" }, { - "source": "problem:zero_shot_generalization_to_unseen_driving_scenes", - "target": "paper:emma", - "rel": "motivates" + "source": "move:set_prediction_with_hungarian", + "target": "insight:set_prediction_eliminates_postprocessing_heuristics", + "rel": "composes" }, { - "source": "problem:fine_grained_spatial_understanding_in_vision_language_model", - "target": "paper:cambrian", - "rel": "motivates" + "source": "paper:carion2020", + "target": "insight:set_prediction_eliminates_postprocessing_heuristics", + "rel": "composes" }, { - "source": "problem:fine_grained_spatial_understanding_in_vision_language_model", - "target": "paper:florence", - "rel": "motivates" + "source": "paper:gpt3", + "target": "insight:in_context_learning_emerges_at_scale", + "rel": "composes" }, { - "source": "problem:counterfactual_reasoning_about_other_agents_intent", - "target": "paper:2512.24426", - "rel": "motivates" + "source": "concept:cot", + "target": "insight:in_context_learning_emerges_at_scale", + "rel": "composes" }, { - "source": "problem:counterfactual_reasoning_about_other_agents_intent", - "target": "paper:gaia1", - "rel": "motivates" + "source": "concept:policy_gradient", + "target": "insight:safety_constraints_via_lagrangian_dual", + "rel": "composes" }, { - "source": "problem:counterfactual_reasoning_about_other_agents_intent", - "target": "move:use_world_model_rollout_as_critic_for_policy", - "rel": "motivates" + "source": "concept:actor_critic", + "target": "insight:safety_constraints_via_lagrangian_dual", + "rel": "composes" }, { - "source": "problem:open_world_corner_case_synthesis_for_training", - "target": "paper:cosmos", - "rel": "motivates" + "source": "move:spike_event_compute", + "target": "insight:event_sparse_compute_matches_energy_budget", + "rel": "composes" }, { - "source": "problem:open_world_corner_case_synthesis_for_training", - "target": "paper:prism1", - "rel": "motivates" + "source": "concept:spiking_nn", + "target": "insight:event_sparse_compute_matches_energy_budget", + "rel": "composes" }, { - "source": "problem:open_world_corner_case_synthesis_for_training", - "target": "move:contrast_corner_case_against_normal_case_in_training", - "rel": "motivates" + "source": "paper:2307.01694", + "target": "insight:event_sparse_compute_matches_energy_budget", + "rel": "composes" }, { - "source": "problem:evaluation_gap_between_offline_benchmark_and_closed_loop", - "target": "paper:drivemlm", - "rel": "motivates" + "source": "paper:li2022bevformer", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "problem:evaluation_gap_between_offline_benchmark_and_closed_loop", - "target": "paper:lmdrive", - "rel": "motivates" + "source": "paper:carion2020", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "problem:evaluation_gap_between_offline_benchmark_and_closed_loop", - "target": "move:evaluate_open_loop_then_close_loop_for_realism", - "rel": "motivates" + "source": "concept:bev", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "problem:catastrophic_forgetting_after_action_finetuning", - "target": "paper:rt2", - "rel": "motivates" + "source": "concept:detr_query", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "problem:catastrophic_forgetting_after_action_finetuning", - "target": "paper:openvla", - "rel": "motivates" + "source": "move:cross_attention_query", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "problem:catastrophic_forgetting_after_action_finetuning", - "target": "move:co_finetune_language_model_with_action_data_jointly", - "rel": "motivates" + "source": "move:set_prediction_with_hungarian", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "insight:language_is_compressed_world_model_for_human_priors", - "target": "paper:gpt3", - "rel": "covers" + "source": "concept:imitation_learning", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "insight:language_is_compressed_world_model_for_human_priors", - "target": "paper:2309.16292", - "rel": "covers" + "source": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "insight:language_is_compressed_world_model_for_human_priors", - "target": "paper:palme", - "rel": "covers" + "source": "insight:attention_is_typed_entity_communication", + "target": "validation:trace_unified_planning_oriented_e2e_driving", + "rel": "composes" }, { - "source": "insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models", - "target": "paper:gpt4", - "rel": "covers" + "source": "validation:trace_unified_planning_oriented_e2e_driving", + "target": "paper:2212.10156", + "rel": "validates" }, { - "source": "insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models", - "target": "essay:bitter_lesson", - "rel": "covers" + "source": "paper:vaswani2017", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models", - "target": "move:scale_data_then_let_emergent_capabilities_appear", - "rel": "covers" + "source": "paper:carion2020", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:world_model_video_diffusion_is_implicit_physics_engine", - "target": "paper:sora", - "rel": "covers" + "source": "concept:transformer", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:world_model_video_diffusion_is_implicit_physics_engine", - "target": "paper:gaia1", - "rel": "covers" + "source": "move:cross_attention_query", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:world_model_video_diffusion_is_implicit_physics_engine", - "target": "paper:cosmos", - "rel": "covers" + "source": "move:set_prediction_with_hungarian", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:agent_loop_is_just_iterated_conditional_generation", - "target": "paper:react", - "rel": "covers" + "source": "concept:imitation_learning", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:agent_loop_is_just_iterated_conditional_generation", - "target": "paper:reflexion", - "rel": "covers" + "source": "paper:ad_benchmarks", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:agent_loop_is_just_iterated_conditional_generation", - "target": "paper:2311.10813", - "rel": "covers" + "source": "paper:transfuser", + "target": "validation:trace_object_level_planning_transformer", + "rel": "composes" }, { - "source": "insight:tool_use_extends_language_model_into_environment_grounded_actor", - "target": "paper:toolformer", - "rel": "covers" + "source": "validation:trace_object_level_planning_transformer", + "target": "paper:2210.14222", + "rel": "validates" }, { - "source": "insight:tool_use_extends_language_model_into_environment_grounded_actor", - "target": "paper:react", - "rel": "covers" + "source": "paper:llava", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:tool_use_extends_language_model_into_environment_grounded_actor", - "target": "paper:2311.10813", - "rel": "covers" + "source": "concept:vlm", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:counterfactual_replanning_separates_intent_from_execution", - "target": "paper:2512.24426", - "rel": "covers" + "source": "concept:vla", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:counterfactual_replanning_separates_intent_from_execution", - "target": "move:use_world_model_rollout_as_critic_for_policy", - "rel": "covers" + "source": "concept:cot", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:foundation_model_decouples_perception_from_task_specific_training", - "target": "paper:dinov2", - "rel": "covers" + "source": "move:dual_system_fast_slow", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:foundation_model_decouples_perception_from_task_specific_training", - "target": "paper:sam", - "rel": "covers" + "source": "move:cross_attention_query", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:foundation_model_decouples_perception_from_task_specific_training", - "target": "paper:florence", - "rel": "covers" + "source": "insight:dual_system_handles_latency_quality_tradeoff", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control", - "target": "paper:swiftsage", - "rel": "covers" + "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" }, { - "source": "insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control", + "source": "paper:li2022bevformer", + "target": "validation:trace_vision_language_action_dual_loop", + "rel": "composes" + }, + { + "source": "validation:trace_vision_language_action_dual_loop", "target": "paper:2402.12289", - "rel": "covers" + "rel": "validates" }, { - "source": "insight:emergent_planning_from_next_token_prediction_alone", - "target": "paper:gpt4", - "rel": "covers" + "source": "paper:gpt3", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:emergent_planning_from_next_token_prediction_alone", - "target": "paper:cot_wei2022", - "rel": "covers" + "source": "concept:vlm", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:alignment_is_constraint_satisfaction_over_generation", - "target": "paper:instructgpt", - "rel": "covers" + "source": "concept:cot", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:alignment_is_constraint_satisfaction_over_generation", - "target": "paper:rlhf_dpo", - "rel": "covers" + "source": "concept:tool_use", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:alignment_is_constraint_satisfaction_over_generation", - "target": "paper:constitutional_ai", - "rel": "covers" + "source": "move:tool_use_function_calling", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:open_weight_release_compounds_research_velocity", - "target": "paper:llama", - "rel": "covers" + "source": "paper:2210.14222", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" + }, + { + "source": "insight:in_context_learning_emerges_at_scale", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:open_weight_release_compounds_research_velocity", - "target": "paper:openvla", - "rel": "covers" + "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "target": "validation:trace_llm_decision_agent_for_driving", + "rel": "composes" }, { - "source": "insight:open_weight_release_compounds_research_velocity", - "target": "paper:qwen", - "rel": "covers" + "source": "validation:trace_llm_decision_agent_for_driving", + "target": "paper:2311.10813", + "rel": "validates" }, { - "source": "insight:open_weight_release_compounds_research_velocity", - "target": "paper:mistral", - "rel": "covers" + "source": "paper:gpt3", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:foundation_model_axis", - "target": "paper:gpt4", - "rel": "covers" + "source": "concept:vlm", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:foundation_model_axis", - "target": "paper:llama", - "rel": "covers" + "source": "concept:cot", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:foundation_model_axis", - "target": "paper:llava", - "rel": "covers" + "source": "concept:tool_use", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:foundation_model_axis", - "target": "paradigm:vla_paradigm", - "rel": "parallel" + "source": "essay:bitter_lesson", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:foundation_model_axis", - "target": "paradigm:world_model_paradigm", - "rel": "parallel" + "source": "paper:ad_benchmarks", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:foundation_model_axis", - "target": "paradigm:llm_agent_paradigm", - "rel": "parallel" + "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:world_model_paradigm", - "target": "paper:world_models", - "rel": "covers" + "source": "insight:in_context_learning_emerges_at_scale", + "target": "validation:trace_knowledge_driven_reflective_agent", + "rel": "composes" }, { - "source": "paradigm:world_model_paradigm", - "target": "paper:gaia1", - "rel": "covers" + "source": "validation:trace_knowledge_driven_reflective_agent", + "target": "paper:2309.16292", + "rel": "validates" }, { - "source": "paradigm:world_model_paradigm", - "target": "paper:cosmos", - "rel": "covers" + "source": "concept:spiking_nn", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:world_model_paradigm", - "target": "paper:sora", - "rel": "covers" + "source": "paper:vaswani2017", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:llm_agent_paradigm", - "target": "paper:react", - "rel": "covers" + "source": "paper:vit", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:llm_agent_paradigm", - "target": "paper:2311.10813", - "rel": "covers" + "source": "paper:he2015_resnet", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:llm_agent_paradigm", - "target": "paper:2309.16292", - "rel": "covers" + "source": "concept:self_attention", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:llm_agent_paradigm", - "target": "paper:voyager", - "rel": "covers" + "source": "move:residual_connection", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:vla_paradigm", - "target": "paper:rt2", - "rel": "covers" + "source": "move:spike_event_compute", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:vla_paradigm", - "target": "paper:openvla", - "rel": "covers" + "source": "insight:event_sparse_compute_matches_energy_budget", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:vla_paradigm", - "target": "paper:emma", - "rel": "covers" + "source": "essay:bitter_lesson", + "target": "validation:trace_brain_inspired_spike_attention", + "rel": "composes" }, { - "source": "paradigm:vla_paradigm", - "target": "paper:2512.24426", - "rel": "covers" + "source": "validation:trace_brain_inspired_spike_attention", + "target": "paper:2307.01694", + "rel": "validates" }, { - "source": "paradigm:vla_paradigm", - "target": "paper:2402.12289", - "rel": "covers" + "source": "paper:dinov2", + "target": "validation:trace_scalable_self_supervised_vision_backbone", + "rel": "composes" }, { - "source": "move:residual_connection", - "target": "insight:residual_learning_unlocks_arbitrary_depth", + "source": "paper:vit", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { - "source": "paper:he2015_resnet", - "target": "insight:residual_learning_unlocks_arbitrary_depth", + "source": "concept:ssl", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { "source": "move:masking_for_pretext", - "target": "insight:masked_prediction_yields_self_supervised_signal", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { - "source": "concept:ssl", - "target": "insight:masked_prediction_yields_self_supervised_signal", + "source": "essay:bitter_lesson", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "insight:attention_is_typed_entity_communication", + "source": "insight:foundation_pretraining_decouples_data_from_task", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { - "source": "concept:self_attention", - "target": "insight:attention_is_typed_entity_communication", + "source": "insight:scaling_laws_predict_capability_emergence", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { - "source": "concept:detr_query", - "target": "insight:attention_is_typed_entity_communication", + "source": "insight:masked_prediction_yields_self_supervised_signal", + "target": "validation:trace_scalable_self_supervised_vision_backbone", "rel": "composes" }, { - "source": "move:contrastive_alignment", - "target": "insight:contrastive_alignment_creates_zero_shot_transfer", - "rel": "composes" + "source": "validation:trace_scalable_self_supervised_vision_backbone", + "target": "paper:2508.10104", + "rel": "validates" }, { - "source": "concept:vlm", - "target": "insight:contrastive_alignment_creates_zero_shot_transfer", + "source": "paper:2402.12289", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "move:diffusion_denoise_sampling", - "target": "insight:diffusion_unifies_generation_and_decision", + "source": "paper:llava", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "paper:diffuser", - "target": "insight:diffusion_unifies_generation_and_decision", + "source": "paper:world_models", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "paper:2212.10156", - "target": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", + "source": "paper:gaia1", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "essay:bitter_lesson", - "target": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", + "source": "paper:drivedreamer", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "move:dual_system_fast_slow", - "target": "insight:dual_system_handles_latency_quality_tradeoff", + "source": "paper:rlhf_dpo", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "paper:2402.12289", - "target": "insight:dual_system_handles_latency_quality_tradeoff", + "source": "concept:vla", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "concept:cot", - "target": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "source": "concept:counterfactual", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "paper:2309.16292", - "target": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "source": "concept:meta_action", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { "source": "move:counterfactual_replan", - "target": "insight:long_tail_solved_by_synthesis_not_data_alone", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "paper:2512.24426", - "target": "insight:long_tail_solved_by_synthesis_not_data_alone", + "source": "move:latent_imagination_rollout", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "paper:drivedreamer", - "target": "insight:long_tail_solved_by_synthesis_not_data_alone", + "source": "insight:long_tail_solved_by_synthesis_not_data_alone", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "essay:bitter_lesson", - "target": "insight:scaling_laws_predict_capability_emergence", + "source": "insight:world_models_let_planning_be_done_in_imagination", + "target": "validation:trace_counterfactual_vla_replanner", "rel": "composes" }, { - "source": "concept:scaling_vs_knowledge", - "target": "insight:scaling_laws_predict_capability_emergence", + "source": "validation:trace_counterfactual_vla_replanner", + "target": "paper:2512.24426", + "rel": "validates" + }, + { + "source": "paper:vaswani2017", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "paper:gpt3", - "target": "insight:scaling_laws_predict_capability_emergence", + "source": "concept:transformer", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "paper:dinov2", - "target": "insight:foundation_pretraining_decouples_data_from_task", + "source": "concept:self_attention", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "paper:2508.10104", - "target": "insight:foundation_pretraining_decouples_data_from_task", + "source": "move:cross_attention_query", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "concept:ssl", - "target": "insight:foundation_pretraining_decouples_data_from_task", + "source": "move:set_prediction_with_hungarian", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "move:self_play_with_search", - "target": "insight:test_time_compute_substitutes_train_time_via_search", + "source": "insight:attention_is_typed_entity_communication", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "paper:silver2017_alphazero", - "target": "insight:test_time_compute_substitutes_train_time_via_search", + "source": "insight:set_prediction_eliminates_postprocessing_heuristics", + "target": "validation:trace_set_prediction_with_object_queries", "rel": "composes" }, { - "source": "concept:imitation_learning", - "target": "insight:imitation_data_compresses_unspecified_reward", + "source": "validation:trace_set_prediction_with_object_queries", + "target": "paper:carion2020", + "rel": "validates" + }, + { + "source": "paper:he2015_resnet", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "paper:ross2011_dagger", - "target": "insight:imitation_data_compresses_unspecified_reward", + "source": "concept:self_attention", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "move:latent_imagination_rollout", - "target": "insight:world_models_let_planning_be_done_in_imagination", + "source": "concept:transformer", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "paper:world_models", - "target": "insight:world_models_let_planning_be_done_in_imagination", + "source": "move:residual_connection", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "paper:gaia1", - "target": "insight:world_models_let_planning_be_done_in_imagination", + "source": "move:cross_attention_query", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "move:tokenize_modalities", - "target": "insight:tokenization_collapses_modality_gap", + "source": "insight:residual_learning_unlocks_arbitrary_depth", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "move:patchify_tokenization", - "target": "insight:tokenization_collapses_modality_gap", + "source": "insight:attention_is_typed_entity_communication", + "target": "validation:trace_self_attention_replaces_recurrence", "rel": "composes" }, { - "source": "move:set_prediction_with_hungarian", - "target": "insight:set_prediction_eliminates_postprocessing_heuristics", - "rel": "composes" + "source": "validation:trace_self_attention_replaces_recurrence", + "target": "paper:vaswani2017", + "rel": "validates" }, { - "source": "paper:carion2020", - "target": "insight:set_prediction_eliminates_postprocessing_heuristics", + "source": "paper:vaswani2017", + "target": "validation:trace_image_transformer_via_patch_tokenization", "rel": "composes" }, { - "source": "paper:gpt3", - "target": "insight:in_context_learning_emerges_at_scale", + "source": "concept:transformer", + "target": "validation:trace_image_transformer_via_patch_tokenization", "rel": "composes" }, { - "source": "concept:cot", - "target": "insight:in_context_learning_emerges_at_scale", + "source": "concept:self_attention", + "target": "validation:trace_image_transformer_via_patch_tokenization", "rel": "composes" }, { - "source": "concept:policy_gradient", - "target": "insight:safety_constraints_via_lagrangian_dual", + "source": "move:patchify_tokenization", + "target": "validation:trace_image_transformer_via_patch_tokenization", "rel": "composes" }, { - "source": "concept:actor_critic", - "target": "insight:safety_constraints_via_lagrangian_dual", + "source": "insight:tokenization_collapses_modality_gap", + "target": "validation:trace_image_transformer_via_patch_tokenization", "rel": "composes" }, { - "source": "move:spike_event_compute", - "target": "insight:event_sparse_compute_matches_energy_budget", + "source": "insight:scaling_laws_predict_capability_emergence", + "target": "validation:trace_image_transformer_via_patch_tokenization", "rel": "composes" }, { - "source": "concept:spiking_nn", - "target": "insight:event_sparse_compute_matches_energy_budget", - "rel": "composes" + "source": "validation:trace_image_transformer_via_patch_tokenization", + "target": "paper:vit", + "rel": "validates" }, { - "source": "paper:2307.01694", - "target": "insight:event_sparse_compute_matches_energy_budget", + "source": "paper:vaswani2017", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "paper:li2022bevformer", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "source": "paper:vit", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { "source": "paper:carion2020", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "concept:bev", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "source": "concept:transformer", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { "source": "concept:detr_query", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "source": "concept:bev", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "move:set_prediction_with_hungarian", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "source": "move:cross_attention_query", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "concept:imitation_learning", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "source": "move:lift_2d_to_3d", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "source": "move:patchify_tokenization", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { "source": "insight:attention_is_typed_entity_communication", - "target": "validation:trace_unified_planning_oriented_e2e_driving", + "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", "rel": "composes" }, { - "source": "validation:trace_unified_planning_oriented_e2e_driving", - "target": "paper:2212.10156", + "source": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", + "target": "paper:li2022bevformer", "rel": "validates" }, { "source": "paper:vaswani2017", - "target": "validation:trace_object_level_planning_transformer", + "target": "validation:trace_few_shot_in_context_learning_at_scale", "rel": "composes" }, { - "source": "paper:carion2020", - "target": "validation:trace_object_level_planning_transformer", + "source": "concept:transformer", + "target": "validation:trace_few_shot_in_context_learning_at_scale", "rel": "composes" }, { - "source": "concept:transformer", - "target": "validation:trace_object_level_planning_transformer", + "source": "essay:bitter_lesson", + "target": "validation:trace_few_shot_in_context_learning_at_scale", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_object_level_planning_transformer", + "source": "concept:scaling_vs_knowledge", + "target": "validation:trace_few_shot_in_context_learning_at_scale", "rel": "composes" }, { - "source": "move:set_prediction_with_hungarian", - "target": "validation:trace_object_level_planning_transformer", + "source": "insight:scaling_laws_predict_capability_emergence", + "target": "validation:trace_few_shot_in_context_learning_at_scale", "rel": "composes" }, { - "source": "concept:imitation_learning", - "target": "validation:trace_object_level_planning_transformer", + "source": "insight:in_context_learning_emerges_at_scale", + "target": "validation:trace_few_shot_in_context_learning_at_scale", "rel": "composes" }, { - "source": "paper:ad_benchmarks", - "target": "validation:trace_object_level_planning_transformer", - "rel": "composes" + "source": "validation:trace_few_shot_in_context_learning_at_scale", + "target": "paper:gpt3", + "rel": "validates" }, { - "source": "paper:transfuser", - "target": "validation:trace_object_level_planning_transformer", + "source": "concept:policy_gradient", + "target": "validation:trace_clipped_policy_gradient_surrogate", "rel": "composes" }, { - "source": "validation:trace_object_level_planning_transformer", - "target": "paper:2210.14222", - "rel": "validates" + "source": "concept:actor_critic", + "target": "validation:trace_clipped_policy_gradient_surrogate", + "rel": "composes" }, { - "source": "paper:llava", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "concept:ppo", + "target": "validation:trace_clipped_policy_gradient_surrogate", "rel": "composes" }, { - "source": "concept:vlm", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "move:clipped_surrogate_objective", + "target": "validation:trace_clipped_policy_gradient_surrogate", "rel": "composes" }, { - "source": "concept:vla", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "course:zhao_rl", + "target": "validation:trace_clipped_policy_gradient_surrogate", "rel": "composes" }, { - "source": "concept:cot", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "paper:sutton_barto", + "target": "validation:trace_clipped_policy_gradient_surrogate", "rel": "composes" }, { - "source": "move:dual_system_fast_slow", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "validation:trace_clipped_policy_gradient_surrogate", + "target": "paper:schulman2017_ppo", + "rel": "validates" + }, + { + "source": "concept:mdp", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "concept:bellman_eq", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "insight:dual_system_handles_latency_quality_tradeoff", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "concept:td_learning", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "concept:dqn", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "paper:li2022bevformer", - "target": "validation:trace_vision_language_action_dual_loop", + "source": "concept:replay_buffer", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "validation:trace_vision_language_action_dual_loop", - "target": "paper:2402.12289", - "rel": "validates" + "source": "move:replay_and_target_net", + "target": "validation:trace_deep_q_network_with_replay_and_target", + "rel": "composes" }, { - "source": "paper:gpt3", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "course:zhao_rl", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "concept:vlm", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "paper:sutton_barto", + "target": "validation:trace_deep_q_network_with_replay_and_target", "rel": "composes" }, { - "source": "concept:cot", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "validation:trace_deep_q_network_with_replay_and_target", + "target": "paper:mnih2015_dqn", + "rel": "validates" + }, + { + "source": "concept:mdp", + "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", "rel": "composes" }, { - "source": "concept:tool_use", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "concept:policy_gradient", + "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", "rel": "composes" }, { - "source": "move:tool_use_function_calling", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "concept:actor_critic", + "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", "rel": "composes" }, { - "source": "paper:2210.14222", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "essay:bitter_lesson", + "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", "rel": "composes" }, { - "source": "insight:in_context_learning_emerges_at_scale", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "move:self_play_with_search", + "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", "rel": "composes" }, { - "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", - "target": "validation:trace_llm_decision_agent_for_driving", + "source": "insight:test_time_compute_substitutes_train_time_via_search", + "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", "rel": "composes" }, { - "source": "validation:trace_llm_decision_agent_for_driving", - "target": "paper:2311.10813", + "source": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", + "target": "paper:silver2017_alphazero", "rel": "validates" }, { - "source": "paper:gpt3", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "concept:imitation_learning", + "target": "validation:trace_dataset_aggregation_for_imitation", "rel": "composes" }, { - "source": "concept:vlm", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "concept:covariate_shift", + "target": "validation:trace_dataset_aggregation_for_imitation", "rel": "composes" }, { - "source": "concept:cot", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "move:dataset_aggregation", + "target": "validation:trace_dataset_aggregation_for_imitation", "rel": "composes" }, { - "source": "concept:tool_use", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "course:cs285", + "target": "validation:trace_dataset_aggregation_for_imitation", "rel": "composes" }, { - "source": "essay:bitter_lesson", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "insight:imitation_data_compresses_unspecified_reward", + "target": "validation:trace_dataset_aggregation_for_imitation", + "rel": "composes" + }, + { + "source": "validation:trace_dataset_aggregation_for_imitation", + "target": "paper:ross2011_dagger", + "rel": "validates" + }, + { + "source": "concept:mdp", + "target": "validation:trace_world_model_in_latent_imagination", "rel": "composes" }, { - "source": "paper:ad_benchmarks", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "concept:policy_gradient", + "target": "validation:trace_world_model_in_latent_imagination", "rel": "composes" }, { - "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "concept:actor_critic", + "target": "validation:trace_world_model_in_latent_imagination", "rel": "composes" }, { - "source": "insight:in_context_learning_emerges_at_scale", - "target": "validation:trace_knowledge_driven_reflective_agent", + "source": "move:latent_imagination_rollout", + "target": "validation:trace_world_model_in_latent_imagination", "rel": "composes" }, { - "source": "validation:trace_knowledge_driven_reflective_agent", - "target": "paper:2309.16292", - "rel": "validates" + "source": "insight:world_models_let_planning_be_done_in_imagination", + "target": "validation:trace_world_model_in_latent_imagination", + "rel": "composes" }, { - "source": "concept:spiking_nn", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "paper:world_models", + "target": "validation:trace_world_model_in_latent_imagination", "rel": "composes" }, { - "source": "paper:vaswani2017", - "target": "validation:trace_brain_inspired_spike_attention", - "rel": "composes" + "source": "validation:trace_world_model_in_latent_imagination", + "target": "paper:world_models", + "rel": "validates" }, { "source": "paper:vit", - "target": "validation:trace_brain_inspired_spike_attention", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "paper:he2015_resnet", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "paper:vaswani2017", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "concept:self_attention", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "concept:vlm", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "move:residual_connection", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "move:contrastive_alignment", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "move:spike_event_compute", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "concept:ssl", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "insight:event_sparse_compute_matches_energy_budget", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "insight:contrastive_alignment_creates_zero_shot_transfer", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "essay:bitter_lesson", - "target": "validation:trace_brain_inspired_spike_attention", + "source": "insight:foundation_pretraining_decouples_data_from_task", + "target": "validation:trace_vision_language_pretrained_dual_encoder", "rel": "composes" }, { - "source": "validation:trace_brain_inspired_spike_attention", - "target": "paper:2307.01694", + "source": "validation:trace_vision_language_pretrained_dual_encoder", + "target": "paper:llava", "rel": "validates" }, { - "source": "paper:dinov2", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "concept:imitation_learning", + "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", "rel": "composes" }, { - "source": "paper:vit", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "move:diffusion_denoise_sampling", + "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", "rel": "composes" }, { - "source": "concept:ssl", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "paper:diffuser", + "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", "rel": "composes" }, { - "source": "move:masking_for_pretext", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "insight:diffusion_unifies_generation_and_decision", + "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", "rel": "composes" }, { - "source": "essay:bitter_lesson", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "insight:imitation_data_compresses_unspecified_reward", + "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", "rel": "composes" }, { - "source": "insight:foundation_pretraining_decouples_data_from_task", - "target": "validation:trace_scalable_self_supervised_vision_backbone", - "rel": "composes" + "source": "validation:trace_diffusion_policy_as_score_based_action_sampler", + "target": "paper:diffuser", + "rel": "validates" }, { - "source": "insight:scaling_laws_predict_capability_emergence", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "paper:vit", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "insight:masked_prediction_yields_self_supervised_signal", - "target": "validation:trace_scalable_self_supervised_vision_backbone", + "source": "paper:carion2020", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "validation:trace_scalable_self_supervised_vision_backbone", - "target": "paper:2508.10104", - "rel": "validates" + "source": "paper:li2022bevformer", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", + "rel": "composes" }, { - "source": "paper:2402.12289", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "concept:bev", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "paper:llava", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "concept:detr_query", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "paper:world_models", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "move:lift_2d_to_3d", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "paper:gaia1", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "move:cross_attention_query", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "paper:drivedreamer", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "paper:tesla_ai_day", + "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", "rel": "composes" }, { - "source": "paper:rlhf_dpo", - "target": "validation:trace_counterfactual_vla_replanner", - "rel": "composes" + "source": "validation:trace_modular_perception_pipeline_with_bev_fusion", + "target": "paper:li2022bevformer", + "rel": "validates" }, { - "source": "concept:vla", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "paper:vit", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { - "source": "concept:counterfactual", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "paper:gaia1", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { - "source": "concept:meta_action", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "paper:drivedreamer", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { - "source": "move:counterfactual_replan", - "target": "validation:trace_counterfactual_vla_replanner", + "source": "paper:world_models", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { "source": "move:latent_imagination_rollout", - "target": "validation:trace_counterfactual_vla_replanner", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { "source": "insight:long_tail_solved_by_synthesis_not_data_alone", - "target": "validation:trace_counterfactual_vla_replanner", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { "source": "insight:world_models_let_planning_be_done_in_imagination", - "target": "validation:trace_counterfactual_vla_replanner", + "target": "validation:trace_neural_field_for_dynamic_driving_scene", "rel": "composes" }, { - "source": "validation:trace_counterfactual_vla_replanner", - "target": "paper:2512.24426", + "source": "validation:trace_neural_field_for_dynamic_driving_scene", + "target": "paper:gaia1", "rel": "validates" }, { "source": "paper:vaswani2017", - "target": "validation:trace_set_prediction_with_object_queries", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "concept:transformer", - "target": "validation:trace_set_prediction_with_object_queries", + "source": "paper:gpt3", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "concept:self_attention", - "target": "validation:trace_set_prediction_with_object_queries", + "source": "concept:transformer", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_set_prediction_with_object_queries", + "source": "concept:imitation_learning", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "move:set_prediction_with_hungarian", - "target": "validation:trace_set_prediction_with_object_queries", + "source": "move:tokenize_modalities", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "insight:attention_is_typed_entity_communication", - "target": "validation:trace_set_prediction_with_object_queries", + "source": "insight:tokenization_collapses_modality_gap", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "insight:set_prediction_eliminates_postprocessing_heuristics", - "target": "validation:trace_set_prediction_with_object_queries", + "source": "insight:imitation_data_compresses_unspecified_reward", + "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", "rel": "composes" }, { - "source": "validation:trace_set_prediction_with_object_queries", - "target": "paper:carion2020", + "source": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", + "target": "paper:diffuser", "rel": "validates" }, { - "source": "paper:he2015_resnet", - "target": "validation:trace_self_attention_replaces_recurrence", + "source": "concept:mdp", + "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", "rel": "composes" }, { - "source": "concept:self_attention", - "target": "validation:trace_self_attention_replaces_recurrence", + "source": "concept:policy_gradient", + "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", "rel": "composes" }, { - "source": "concept:transformer", - "target": "validation:trace_self_attention_replaces_recurrence", + "source": "concept:actor_critic", + "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", "rel": "composes" }, { - "source": "move:residual_connection", - "target": "validation:trace_self_attention_replaces_recurrence", + "source": "concept:ppo", + "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_self_attention_replaces_recurrence", + "source": "move:clipped_surrogate_objective", + "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", "rel": "composes" }, { - "source": "insight:residual_learning_unlocks_arbitrary_depth", - "target": "validation:trace_self_attention_replaces_recurrence", + "source": "insight:safety_constraints_via_lagrangian_dual", + "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", "rel": "composes" }, { - "source": "insight:attention_is_typed_entity_communication", - "target": "validation:trace_self_attention_replaces_recurrence", - "rel": "composes" + "source": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", + "target": "paper:schulman2017_ppo", + "rel": "validates" }, { - "source": "validation:trace_self_attention_replaces_recurrence", - "target": "paper:vaswani2017", - "rel": "validates" + "source": "paper:tesla_ai_day", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "validation:trace_image_transformer_via_patch_tokenization", + "source": "paper:li2022bevformer", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "manifests" + }, + { + "source": "paper:transfuser", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "manifests" + }, + { + "source": "concept:bev", + "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "composes" }, { - "source": "concept:transformer", - "target": "validation:trace_image_transformer_via_patch_tokenization", + "source": "move:lift_2d_to_3d", + "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "composes" }, { - "source": "concept:self_attention", - "target": "validation:trace_image_transformer_via_patch_tokenization", + "source": "insight:set_prediction_eliminates_postprocessing_heuristics", + "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "composes" }, { - "source": "move:patchify_tokenization", - "target": "validation:trace_image_transformer_via_patch_tokenization", + "source": "paper:2212.10156", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "manifests" + }, + { + "source": "paper:2210.14222", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "manifests" + }, + { + "source": "paper:vadv2", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "manifests" + }, + { + "source": "paper:transfuser", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "manifests" + }, + { + "source": "concept:imitation_learning", + "target": "paradigm:differentiable_end_to_end_imitation", "rel": "composes" }, { - "source": "insight:tokenization_collapses_modality_gap", - "target": "validation:trace_image_transformer_via_patch_tokenization", + "source": "move:cross_attention_query", + "target": "paradigm:differentiable_end_to_end_imitation", "rel": "composes" }, { - "source": "insight:scaling_laws_predict_capability_emergence", - "target": "validation:trace_image_transformer_via_patch_tokenization", + "source": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", + "target": "paradigm:differentiable_end_to_end_imitation", "rel": "composes" }, { - "source": "validation:trace_image_transformer_via_patch_tokenization", - "target": "paper:vit", - "rel": "validates" + "source": "paper:world_models", + "target": "paradigm:model_based_world_imagination_planning", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "rel": "composes" + "source": "paper:gaia1", + "target": "paradigm:model_based_world_imagination_planning", + "rel": "manifests" }, { - "source": "paper:vit", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "rel": "composes" + "source": "paper:drivedreamer", + "target": "paradigm:model_based_world_imagination_planning", + "rel": "manifests" }, { - "source": "paper:carion2020", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", + "source": "move:latent_imagination_rollout", + "target": "paradigm:model_based_world_imagination_planning", "rel": "composes" }, { - "source": "concept:transformer", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", + "source": "move:diffusion_denoise_sampling", + "target": "paradigm:model_based_world_imagination_planning", "rel": "composes" }, { - "source": "concept:detr_query", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", + "source": "insight:world_models_let_planning_be_done_in_imagination", + "target": "paradigm:model_based_world_imagination_planning", "rel": "composes" }, { - "source": "concept:bev", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", + "source": "insight:long_tail_solved_by_synthesis_not_data_alone", + "target": "paradigm:model_based_world_imagination_planning", "rel": "composes" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "rel": "composes" + "source": "paper:gpt3", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" }, { - "source": "move:lift_2d_to_3d", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "rel": "composes" + "source": "paper:llava", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" }, { - "source": "move:patchify_tokenization", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "rel": "composes" + "source": "paper:2402.12289", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" }, { - "source": "insight:attention_is_typed_entity_communication", - "target": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "rel": "composes" + "source": "paper:2311.10813", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" }, { - "source": "validation:trace_bird_eye_view_transformer_with_temporal_aggregation", - "target": "paper:li2022bevformer", - "rel": "validates" + "source": "paper:lingo2", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" }, { - "source": "paper:vaswani2017", - "target": "validation:trace_few_shot_in_context_learning_at_scale", + "source": "concept:vla", + "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "composes" }, { - "source": "concept:transformer", - "target": "validation:trace_few_shot_in_context_learning_at_scale", + "source": "concept:tool_use", + "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "composes" }, { - "source": "essay:bitter_lesson", - "target": "validation:trace_few_shot_in_context_learning_at_scale", + "source": "move:dual_system_fast_slow", + "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "composes" }, { - "source": "concept:scaling_vs_knowledge", - "target": "validation:trace_few_shot_in_context_learning_at_scale", + "source": "insight:dual_system_handles_latency_quality_tradeoff", + "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "composes" }, { - "source": "insight:scaling_laws_predict_capability_emergence", - "target": "validation:trace_few_shot_in_context_learning_at_scale", + "source": "insight:in_context_learning_emerges_at_scale", + "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "composes" }, { - "source": "insight:in_context_learning_emerges_at_scale", - "target": "validation:trace_few_shot_in_context_learning_at_scale", + "source": "concept:spiking_nn", + "target": "paradigm:brain_inspired_event_sparse_compute", "rel": "composes" }, { - "source": "validation:trace_few_shot_in_context_learning_at_scale", - "target": "paper:gpt3", - "rel": "validates" + "source": "paper:2307.01694", + "target": "paradigm:brain_inspired_event_sparse_compute", + "rel": "manifests" }, { - "source": "concept:policy_gradient", - "target": "validation:trace_clipped_policy_gradient_surrogate", + "source": "move:spike_event_compute", + "target": "paradigm:brain_inspired_event_sparse_compute", "rel": "composes" }, { - "source": "concept:actor_critic", - "target": "validation:trace_clipped_policy_gradient_surrogate", + "source": "insight:event_sparse_compute_matches_energy_budget", + "target": "paradigm:brain_inspired_event_sparse_compute", "rel": "composes" }, { - "source": "concept:ppo", - "target": "validation:trace_clipped_policy_gradient_surrogate", - "rel": "composes" + "source": "paper:2512.24426", + "target": "paradigm:counterfactual_data_centric_safety", + "rel": "manifests" }, { - "source": "move:clipped_surrogate_objective", - "target": "validation:trace_clipped_policy_gradient_surrogate", - "rel": "composes" + "source": "paper:gaia1", + "target": "paradigm:counterfactual_data_centric_safety", + "rel": "manifests" }, { - "source": "course:zhao_rl", - "target": "validation:trace_clipped_policy_gradient_surrogate", - "rel": "composes" + "source": "paper:drivedreamer", + "target": "paradigm:counterfactual_data_centric_safety", + "rel": "manifests" }, { - "source": "paper:sutton_barto", - "target": "validation:trace_clipped_policy_gradient_surrogate", + "source": "concept:counterfactual", + "target": "paradigm:counterfactual_data_centric_safety", "rel": "composes" }, { - "source": "validation:trace_clipped_policy_gradient_surrogate", - "target": "paper:schulman2017_ppo", - "rel": "validates" - }, - { - "source": "concept:mdp", - "target": "validation:trace_deep_q_network_with_replay_and_target", + "source": "concept:meta_action", + "target": "paradigm:counterfactual_data_centric_safety", "rel": "composes" }, { - "source": "concept:bellman_eq", - "target": "validation:trace_deep_q_network_with_replay_and_target", + "source": "move:counterfactual_replan", + "target": "paradigm:counterfactual_data_centric_safety", "rel": "composes" }, { - "source": "concept:td_learning", - "target": "validation:trace_deep_q_network_with_replay_and_target", + "source": "insight:long_tail_solved_by_synthesis_not_data_alone", + "target": "paradigm:counterfactual_data_centric_safety", "rel": "composes" }, { - "source": "concept:dqn", - "target": "validation:trace_deep_q_network_with_replay_and_target", - "rel": "composes" + "source": "paper:gpt3", + "target": "paradigm:knowledge_driven_reflective_agent", + "rel": "manifests" }, { - "source": "concept:replay_buffer", - "target": "validation:trace_deep_q_network_with_replay_and_target", - "rel": "composes" + "source": "paper:2309.16292", + "target": "paradigm:knowledge_driven_reflective_agent", + "rel": "manifests" }, { - "source": "move:replay_and_target_net", - "target": "validation:trace_deep_q_network_with_replay_and_target", - "rel": "composes" + "source": "paper:2311.10813", + "target": "paradigm:knowledge_driven_reflective_agent", + "rel": "manifests" }, { - "source": "course:zhao_rl", - "target": "validation:trace_deep_q_network_with_replay_and_target", + "source": "concept:cot", + "target": "paradigm:knowledge_driven_reflective_agent", "rel": "composes" }, { - "source": "paper:sutton_barto", - "target": "validation:trace_deep_q_network_with_replay_and_target", + "source": "concept:tool_use", + "target": "paradigm:knowledge_driven_reflective_agent", "rel": "composes" }, { - "source": "validation:trace_deep_q_network_with_replay_and_target", - "target": "paper:mnih2015_dqn", - "rel": "validates" + "source": "essay:bitter_lesson", + "target": "paradigm:knowledge_driven_reflective_agent", + "rel": "contrasts" }, { - "source": "concept:mdp", - "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", + "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", + "target": "paradigm:knowledge_driven_reflective_agent", "rel": "composes" }, { - "source": "concept:policy_gradient", - "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", - "rel": "composes" + "source": "paper:dinov2", + "target": "paradigm:scaling_data_with_self_supervision", + "rel": "manifests" }, { - "source": "concept:actor_critic", - "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", - "rel": "composes" + "source": "paper:2508.10104", + "target": "paradigm:scaling_data_with_self_supervision", + "rel": "manifests" }, { - "source": "essay:bitter_lesson", - "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", - "rel": "composes" + "source": "paper:sam", + "target": "paradigm:scaling_data_with_self_supervision", + "rel": "manifests" }, { - "source": "move:self_play_with_search", - "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", + "source": "concept:ssl", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "insight:test_time_compute_substitutes_train_time_via_search", - "target": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", + "source": "move:masking_for_pretext", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "validation:trace_alpha_zero_self_play_with_mcts_guided_policy", - "target": "paper:silver2017_alphazero", - "rel": "validates" - }, - { - "source": "concept:imitation_learning", - "target": "validation:trace_dataset_aggregation_for_imitation", + "source": "move:contrastive_alignment", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "concept:covariate_shift", - "target": "validation:trace_dataset_aggregation_for_imitation", + "source": "insight:masked_prediction_yields_self_supervised_signal", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "move:dataset_aggregation", - "target": "validation:trace_dataset_aggregation_for_imitation", + "source": "insight:foundation_pretraining_decouples_data_from_task", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "course:cs285", - "target": "validation:trace_dataset_aggregation_for_imitation", + "source": "insight:scaling_laws_predict_capability_emergence", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "insight:imitation_data_compresses_unspecified_reward", - "target": "validation:trace_dataset_aggregation_for_imitation", - "rel": "composes" + "source": "paper:ad_benchmarks", + "target": "paper:nuplan", + "rel": "covers" }, { - "source": "validation:trace_dataset_aggregation_for_imitation", - "target": "paper:ross2011_dagger", - "rel": "validates" + "source": "paper:ad_benchmarks", + "target": "paper:waymo_motion", + "rel": "covers" }, - { - "source": "concept:mdp", - "target": "validation:trace_world_model_in_latent_imagination", - "rel": "composes" + { + "source": "paper:ad_benchmarks", + "target": "paper:argoverse2", + "rel": "covers" }, { - "source": "concept:policy_gradient", - "target": "validation:trace_world_model_in_latent_imagination", - "rel": "composes" + "source": "paper:ad_benchmarks", + "target": "paper:navsim", + "rel": "covers" }, { - "source": "concept:actor_critic", - "target": "validation:trace_world_model_in_latent_imagination", - "rel": "composes" + "source": "paper:ad_benchmarks", + "target": "paper:bench2drive", + "rel": "covers" }, { - "source": "move:latent_imagination_rollout", - "target": "validation:trace_world_model_in_latent_imagination", - "rel": "composes" + "source": "paper:ad_benchmarks", + "target": "paper:carla_lb2", + "rel": "covers" }, { - "source": "insight:world_models_let_planning_be_done_in_imagination", - "target": "validation:trace_world_model_in_latent_imagination", - "rel": "composes" + "source": "paper:ad_benchmarks", + "target": "paper:womd_pred", + "rel": "covers" }, { - "source": "paper:world_models", - "target": "validation:trace_world_model_in_latent_imagination", - "rel": "composes" + "source": "paper:ad_benchmarks", + "target": "paper:interaction_dataset", + "rel": "covers" }, { - "source": "validation:trace_world_model_in_latent_imagination", - "target": "paper:world_models", - "rel": "validates" + "source": "paper:nuplan", + "target": "paper:navsim", + "rel": "feeds" }, { - "source": "paper:vit", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:nuplan", + "target": "paper:bench2drive", + "rel": "parallel" }, { - "source": "paper:vaswani2017", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:navsim", + "target": "paper:bench2drive", + "rel": "parallel" }, { - "source": "concept:vlm", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:carla_lb2", + "target": "paper:bench2drive", + "rel": "feeds" }, { - "source": "move:contrastive_alignment", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:waymo_motion", + "target": "paper:womd_pred", + "rel": "feeds" }, { - "source": "concept:ssl", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:argoverse2", + "target": "paper:womd_pred", + "rel": "parallel" }, { - "source": "insight:contrastive_alignment_creates_zero_shot_transfer", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:interaction_dataset", + "target": "paper:waymo_motion", + "rel": "parallel" }, { - "source": "insight:foundation_pretraining_decouples_data_from_task", - "target": "validation:trace_vision_language_pretrained_dual_encoder", - "rel": "composes" + "source": "paper:bdd100k", + "target": "paper:argoverse2", + "rel": "parallel" }, { - "source": "validation:trace_vision_language_pretrained_dual_encoder", - "target": "paper:llava", - "rel": "validates" + "source": "paper:lyft_l5", + "target": "paper:waymo_motion", + "rel": "parallel" }, { - "source": "concept:imitation_learning", - "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", - "rel": "composes" + "source": "paper:pandaset", + "target": "paper:argoverse2", + "rel": "parallel" }, { - "source": "move:diffusion_denoise_sampling", - "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", - "rel": "composes" + "source": "paper:apolloscape", + "target": "paper:bdd100k", + "rel": "parallel" }, { - "source": "paper:diffuser", - "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", - "rel": "composes" + "source": "paper:highway_env", + "target": "paper:metadrive", + "rel": "prereq" }, { - "source": "insight:diffusion_unifies_generation_and_decision", - "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", - "rel": "composes" + "source": "paper:metadrive", + "target": "paper:smarts", + "rel": "parallel" }, { - "source": "insight:imitation_data_compresses_unspecified_reward", - "target": "validation:trace_diffusion_policy_as_score_based_action_sampler", - "rel": "composes" + "source": "paper:smarts", + "target": "paper:commonroad", + "rel": "parallel" }, { - "source": "validation:trace_diffusion_policy_as_score_based_action_sampler", - "target": "paper:diffuser", - "rel": "validates" + "source": "paper:commonroad", + "target": "paper:carla_lb2", + "rel": "parallel" }, { - "source": "paper:vit", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:highway_env", + "target": "paper:smarts", + "rel": "prereq" }, { - "source": "paper:carion2020", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:tesla_ai_day", + "target": "paper:tesla_autolabel", + "rel": "covers" }, { - "source": "paper:li2022bevformer", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:tesla_autolabel", + "target": "paper:waymo_scenario_mining", + "rel": "parallel" }, { - "source": "concept:bev", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:tesla_autolabel", + "target": "paper:gs_for_ad", + "rel": "feeds" }, { - "source": "concept:detr_query", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:gs_for_ad", + "target": "paper:shift_dataset", + "rel": "parallel" }, { - "source": "move:lift_2d_to_3d", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:shift_dataset", + "target": "paper:v2x_sim", + "rel": "parallel" }, { - "source": "move:cross_attention_query", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:vaswani2017", + "target": "paper:flashattention", + "rel": "prereq" }, { - "source": "paper:tesla_ai_day", - "target": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "rel": "composes" + "source": "paper:vaswani2017", + "target": "paper:performer", + "rel": "prereq" }, { - "source": "validation:trace_modular_perception_pipeline_with_bev_fusion", - "target": "paper:li2022bevformer", - "rel": "validates" + "source": "paper:vaswani2017", + "target": "paper:linear_attention", + "rel": "prereq" }, { - "source": "paper:vit", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:performer", + "target": "paper:linear_attention", + "rel": "parallel" }, { - "source": "paper:gaia1", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:flashattention", + "target": "paper:performer", + "rel": "contrasts" }, { - "source": "paper:drivedreamer", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:flashattention", + "target": "paper:mamba", + "rel": "contrasts" }, { - "source": "paper:world_models", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:linear_attention", + "target": "paper:mamba", + "rel": "prereq" }, { - "source": "move:latent_imagination_rollout", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:llava", + "target": "paper:distill_vlm", + "rel": "feeds" }, { - "source": "insight:long_tail_solved_by_synthesis_not_data_alone", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:gptq", + "target": "paper:awq", + "rel": "parallel" }, { - "source": "insight:world_models_let_planning_be_done_in_imagination", - "target": "validation:trace_neural_field_for_dynamic_driving_scene", - "rel": "composes" + "source": "paper:gpt3", + "target": "paper:gptq", + "rel": "feeds" }, { - "source": "validation:trace_neural_field_for_dynamic_driving_scene", - "target": "paper:gaia1", - "rel": "validates" + "source": "paper:llava", + "target": "paper:gptq", + "rel": "feeds" }, { - "source": "paper:vaswani2017", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:llava", + "target": "paper:awq", + "rel": "feeds" }, { - "source": "paper:gpt3", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:distill_vlm", + "target": "paper:2402.12289", + "rel": "feeds" }, { - "source": "concept:transformer", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:2307.01694", + "target": "paper:loihi2", + "rel": "feeds" }, { - "source": "concept:imitation_learning", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:loihi2", + "target": "paper:truenorth", + "rel": "extends" }, { - "source": "move:tokenize_modalities", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:truenorth", + "target": "paper:tianjic", + "rel": "parallel" }, { - "source": "insight:tokenization_collapses_modality_gap", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:loihi2", + "target": "paper:tianjic", + "rel": "parallel" }, { - "source": "insight:imitation_data_compresses_unspecified_reward", - "target": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "rel": "composes" + "source": "paper:loihi2", + "target": "paper:grai", + "rel": "parallel" }, { - "source": "validation:trace_decision_transformer_offline_rl_via_sequence_modeling", - "target": "paper:diffuser", - "rel": "validates" + "source": "paper:dvs_event_camera", + "target": "paper:loihi2", + "rel": "feeds" }, { - "source": "concept:mdp", - "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "rel": "composes" + "source": "paper:dvs_event_camera", + "target": "paper:2307.01694", + "rel": "feeds" }, { - "source": "concept:policy_gradient", - "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "rel": "composes" + "source": "paper:iso26262", + "target": "paper:sotif_21448", + "rel": "prereq" }, { - "source": "concept:actor_critic", - "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "rel": "composes" + "source": "paper:sotif_21448", + "target": "paper:ul4600", + "rel": "extends" }, { - "source": "concept:ppo", - "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "rel": "composes" + "source": "paper:iso26262", + "target": "paper:ul4600", + "rel": "prereq" }, { - "source": "move:clipped_surrogate_objective", - "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "rel": "composes" + "source": "paper:nuplan", + "target": "move:design_closed_loop_metric_correlated_with_real_world_safety", + "rel": "feeds" }, { - "source": "insight:safety_constraints_via_lagrangian_dual", - "target": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "rel": "composes" + "source": "paper:navsim", + "target": "move:design_closed_loop_metric_correlated_with_real_world_safety", + "rel": "feeds" }, { - "source": "validation:trace_safe_rl_via_lagrangian_constrained_optimization", - "target": "paper:schulman2017_ppo", - "rel": "validates" + "source": "paper:bench2drive", + "target": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", + "rel": "feeds" }, { - "source": "paper:tesla_ai_day", - "target": "paradigm:modular_perception_to_planning_pipeline", - "rel": "manifests" + "source": "paper:navsim", + "target": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", + "rel": "feeds" }, { - "source": "paper:li2022bevformer", - "target": "paradigm:modular_perception_to_planning_pipeline", - "rel": "manifests" + "source": "paper:nuplan", + "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", + "rel": "feeds" }, { - "source": "paper:transfuser", - "target": "paradigm:modular_perception_to_planning_pipeline", - "rel": "manifests" + "source": "paper:waymo_scenario_mining", + "target": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", + "rel": "feeds" }, { - "source": "concept:bev", - "target": "paradigm:modular_perception_to_planning_pipeline", - "rel": "composes" + "source": "paper:tesla_autolabel", + "target": "move:auto_label_with_offline_model_then_human_in_loop_validate", + "rel": "feeds" }, { - "source": "move:lift_2d_to_3d", - "target": "paradigm:modular_perception_to_planning_pipeline", - "rel": "composes" + "source": "paper:tesla_autolabel", + "target": "move:augment_dataset_via_offline_scenario_perturbation", + "rel": "feeds" }, { - "source": "insight:set_prediction_eliminates_postprocessing_heuristics", - "target": "paradigm:modular_perception_to_planning_pipeline", - "rel": "composes" + "source": "paper:gs_for_ad", + "target": "move:augment_dataset_via_offline_scenario_perturbation", + "rel": "feeds" }, { - "source": "paper:2212.10156", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "manifests" + "source": "paper:gs_for_ad", + "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", + "rel": "feeds" }, { - "source": "paper:2210.14222", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "manifests" + "source": "paper:lidar_cam_calib", + "target": "move:share_LiDAR_camera_calibration_via_continuous_time_optimization", + "rel": "feeds" }, { - "source": "paper:vadv2", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "manifests" + "source": "paper:flashattention", + "target": "move:tile_attention_to_fit_SRAM_for_speedup", + "rel": "covers" }, { - "source": "paper:transfuser", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "manifests" + "source": "paper:performer", + "target": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "rel": "covers" }, { - "source": "concept:imitation_learning", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "composes" + "source": "paper:linear_attention", + "target": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "rel": "covers" }, { - "source": "move:cross_attention_query", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "composes" + "source": "paper:gptq", + "target": "move:quantize_attention_to_int8_with_calibration", + "rel": "feeds" }, { - "source": "insight:end_to_end_differentiable_beats_handcraft_when_signal_strong", - "target": "paradigm:differentiable_end_to_end_imitation", - "rel": "composes" + "source": "paper:awq", + "target": "move:quantize_attention_to_int8_with_calibration", + "rel": "feeds" }, { - "source": "paper:world_models", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "manifests" + "source": "paper:distill_vlm", + "target": "move:distill_large_VLM_into_small_realtime_specialist", + "rel": "covers" }, { - "source": "paper:gaia1", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "manifests" + "source": "paper:flashattention", + "target": "move:cache_KV_state_across_frames_to_amortize_attention_cost", + "rel": "feeds" }, { - "source": "paper:drivedreamer", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "manifests" + "source": "paper:mamba", + "target": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "rel": "parallel" }, { - "source": "move:latent_imagination_rollout", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "composes" + "source": "paper:2307.01694", + "target": "move:replace_dense_attention_with_sparse_event_driven_attention", + "rel": "covers" }, { - "source": "move:diffusion_denoise_sampling", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "composes" + "source": "paper:2307.01694", + "target": "move:implement_spiking_neuron_with_surrogate_gradient_for_backprop", + "rel": "feeds" }, { - "source": "insight:world_models_let_planning_be_done_in_imagination", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "composes" + "source": "paper:loihi2", + "target": "move:co_design_silicon_with_algorithm_for_minimum_energy", + "rel": "feeds" }, { - "source": "insight:long_tail_solved_by_synthesis_not_data_alone", - "target": "paradigm:model_based_world_imagination_planning", - "rel": "composes" + "source": "paper:tianjic", + "target": "move:co_design_silicon_with_algorithm_for_minimum_energy", + "rel": "feeds" }, { - "source": "paper:gpt3", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "manifests" + "source": "paper:grai", + "target": "move:co_design_silicon_with_algorithm_for_minimum_energy", + "rel": "feeds" }, { - "source": "paper:llava", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "manifests" + "source": "paper:dvs_event_camera", + "target": "move:use_event_camera_microsecond_latency_for_emergency_braking", + "rel": "feeds" }, { - "source": "paper:2402.12289", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "manifests" + "source": "paper:dvs_event_camera", + "target": "move:replace_dense_attention_with_sparse_event_driven_attention", + "rel": "feeds" }, { - "source": "paper:2311.10813", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "manifests" + "source": "paper:iso26262", + "target": "move:formalize_safety_case_with_claim_evidence_assumption", + "rel": "feeds" }, { - "source": "paper:lingo2", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "manifests" + "source": "paper:sotif_21448", + "target": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", + "rel": "feeds" }, { - "source": "concept:vla", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "composes" + "source": "paper:sotif_21448", + "target": "move:treat_corner_case_as_OOD_detection_then_route_to_human", + "rel": "feeds" }, { - "source": "concept:tool_use", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "composes" + "source": "paper:ul4600", + "target": "move:formalize_safety_case_with_claim_evidence_assumption", + "rel": "covers" }, { - "source": "move:dual_system_fast_slow", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "composes" + "source": "paper:sotif_21448", + "target": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", + "rel": "feeds" }, { - "source": "insight:dual_system_handles_latency_quality_tradeoff", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "composes" + "source": "paper:sotif_21448", + "target": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", + "rel": "feeds" }, { - "source": "insight:in_context_learning_emerges_at_scale", - "target": "paradigm:foundation_model_zero_shot_driving_agent", - "rel": "composes" + "source": "paper:ul4600", + "target": "move:add_explanation_head_to_promote_interpretability", + "rel": "feeds" }, { - "source": "concept:spiking_nn", - "target": "paradigm:brain_inspired_event_sparse_compute", - "rel": "composes" + "source": "paper:metadrive", + "target": "move:use_difficulty_aware_curriculum_to_accelerate_RL", + "rel": "feeds" }, { - "source": "paper:2307.01694", - "target": "paradigm:brain_inspired_event_sparse_compute", - "rel": "manifests" + "source": "paper:smarts", + "target": "move:use_difficulty_aware_curriculum_to_accelerate_RL", + "rel": "feeds" }, - { - "source": "move:spike_event_compute", - "target": "paradigm:brain_inspired_event_sparse_compute", - "rel": "composes" + { + "source": "paper:metadrive", + "target": "move:replay_buffer_prioritize_safety_critical_transitions", + "rel": "feeds" }, { - "source": "insight:event_sparse_compute_matches_energy_budget", - "target": "paradigm:brain_inspired_event_sparse_compute", - "rel": "composes" + "source": "paper:highway_env", + "target": "move:replay_buffer_prioritize_safety_critical_transitions", + "rel": "feeds" }, { - "source": "paper:2512.24426", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "manifests" + "source": "paper:commonroad", + "target": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", + "rel": "feeds" }, { - "source": "paper:gaia1", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "manifests" + "source": "paper:waymo_scenario_mining", + "target": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", + "rel": "feeds" }, { - "source": "paper:drivedreamer", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "manifests" + "source": "paper:shift_dataset", + "target": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", + "rel": "feeds" }, { - "source": "concept:counterfactual", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "composes" + "source": "move:design_closed_loop_metric_correlated_with_real_world_safety", + "target": "problem:offline_metric_does_not_predict_closed_loop_performance", + "rel": "motivates" }, { - "source": "concept:meta_action", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "composes" + "source": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", + "target": "problem:offline_metric_does_not_predict_closed_loop_performance", + "rel": "motivates" }, { - "source": "move:counterfactual_replan", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "composes" + "source": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", + "target": "problem:offline_metric_does_not_predict_closed_loop_performance", + "rel": "motivates" }, { - "source": "insight:long_tail_solved_by_synthesis_not_data_alone", - "target": "paradigm:counterfactual_data_centric_safety", - "rel": "composes" + "source": "move:augment_dataset_via_offline_scenario_perturbation", + "target": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", + "rel": "motivates" }, { - "source": "paper:gpt3", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "manifests" + "source": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", + "target": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", + "rel": "motivates" }, { - "source": "paper:2309.16292", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "manifests" + "source": "move:replay_buffer_prioritize_safety_critical_transitions", + "target": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", + "rel": "motivates" }, { - "source": "paper:2311.10813", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "manifests" + "source": "move:distill_large_VLM_into_small_realtime_specialist", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "concept:cot", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "composes" + "source": "move:quantize_attention_to_int8_with_calibration", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "concept:tool_use", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "composes" + "source": "move:tile_attention_to_fit_SRAM_for_speedup", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "essay:bitter_lesson", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "contrasts" + "source": "move:cache_KV_state_across_frames_to_amortize_attention_cost", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "insight:symbolic_intermediate_enables_interpretability_and_transfer", - "target": "paradigm:knowledge_driven_reflective_agent", - "rel": "composes" + "source": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "paper:dinov2", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "manifests" + "source": "move:replace_dense_attention_with_sparse_event_driven_attention", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "paper:2508.10104", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "manifests" + "source": "move:perform_neural_architecture_search_with_latency_constraint", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "paper:sam", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "manifests" + "source": "move:co_design_silicon_with_algorithm_for_minimum_energy", + "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "rel": "motivates" }, { - "source": "concept:ssl", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "composes" + "source": "move:share_LiDAR_camera_calibration_via_continuous_time_optimization", + "target": "problem:sensor_calibration_drift_over_vehicle_lifetime", + "rel": "motivates" }, { - "source": "move:masking_for_pretext", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "composes" + "source": "move:auto_label_with_offline_model_then_human_in_loop_validate", + "target": "problem:label_noise_for_3d_object_categories", + "rel": "motivates" }, { - "source": "move:contrastive_alignment", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "composes" + "source": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", + "target": "problem:label_noise_for_3d_object_categories", + "rel": "motivates" }, { - "source": "insight:masked_prediction_yields_self_supervised_signal", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "composes" + "source": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", + "target": "problem:verification_of_neural_network_safety_properties_at_scale", + "rel": "motivates" }, { - "source": "insight:foundation_pretraining_decouples_data_from_task", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "composes" + "source": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", + "target": "problem:verification_of_neural_network_safety_properties_at_scale", + "rel": "motivates" }, { - "source": "insight:scaling_laws_predict_capability_emergence", - "target": "paradigm:scaling_data_with_self_supervision", - "rel": "composes" + "source": "move:augment_dataset_via_offline_scenario_perturbation", + "target": "problem:realistic_other_agent_behavior_in_simulator", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:nuplan", - "rel": "covers" + "source": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", + "target": "problem:catastrophic_forgetting_under_continual_learning", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:waymo_motion", - "rel": "covers" + "source": "move:add_explanation_head_to_promote_interpretability", + "target": "problem:auditability_of_decisions_for_regulatory_compliance", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:argoverse2", - "rel": "covers" + "source": "move:formalize_safety_case_with_claim_evidence_assumption", + "target": "problem:auditability_of_decisions_for_regulatory_compliance", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:navsim", - "rel": "covers" + "source": "move:treat_corner_case_as_OOD_detection_then_route_to_human", + "target": "problem:auditability_of_decisions_for_regulatory_compliance", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:bench2drive", - "rel": "covers" + "source": "paper:gs_for_ad", + "target": "problem:simulator_visual_gap_breaks_perception_models", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:carla_lb2", - "rel": "covers" + "source": "paper:shift_dataset", + "target": "problem:simulator_visual_gap_breaks_perception_models", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:womd_pred", - "rel": "covers" + "source": "paper:v2x_sim", + "target": "problem:realistic_other_agent_behavior_in_simulator", + "rel": "motivates" }, { - "source": "paper:ad_benchmarks", - "target": "paper:interaction_dataset", - "rel": "covers" + "source": "problem:offline_metric_does_not_predict_closed_loop_performance", + "target": "insight:closed_loop_evaluation_is_the_only_ground_truth_for_planners", + "rel": "composes" }, { - "source": "paper:nuplan", - "target": "paper:navsim", - "rel": "feeds" + "source": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", + "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", + "rel": "composes" }, { - "source": "paper:nuplan", - "target": "paper:bench2drive", - "rel": "parallel" + "source": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "target": "insight:event_driven_computation_matches_natural_sparsity_of_driving_scene", + "rel": "composes" }, { - "source": "paper:navsim", - "target": "paper:bench2drive", - "rel": "parallel" + "source": "problem:energy_budget_too_small_for_full_transformer_at_30fps", + "target": "insight:hardware_software_co_design_unlocks_orders_of_magnitude_efficiency", + "rel": "composes" }, { - "source": "paper:carla_lb2", - "target": "paper:bench2drive", - "rel": "feeds" + "source": "problem:verification_of_neural_network_safety_properties_at_scale", + "target": "insight:safety_emerges_from_layered_constraints_not_single_objective", + "rel": "composes" }, { - "source": "paper:waymo_motion", - "target": "paper:womd_pred", - "rel": "feeds" + "source": "problem:auditability_of_decisions_for_regulatory_compliance", + "target": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", + "rel": "composes" }, { - "source": "paper:argoverse2", - "target": "paper:womd_pred", - "rel": "parallel" + "source": "problem:realistic_other_agent_behavior_in_simulator", + "target": "insight:simulator_realism_is_lower_bound_on_training_value", + "rel": "composes" }, { - "source": "paper:interaction_dataset", - "target": "paper:waymo_motion", - "rel": "parallel" + "source": "problem:simulator_visual_gap_breaks_perception_models", + "target": "insight:simulator_realism_is_lower_bound_on_training_value", + "rel": "composes" }, { - "source": "paper:bdd100k", - "target": "paper:argoverse2", - "rel": "parallel" + "source": "problem:catastrophic_forgetting_under_continual_learning", + "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", + "rel": "composes" }, { - "source": "paper:lyft_l5", - "target": "paper:waymo_motion", - "rel": "parallel" + "source": "problem:sensor_calibration_drift_over_vehicle_lifetime", + "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", + "rel": "composes" }, { - "source": "paper:pandaset", - "target": "paper:argoverse2", - "rel": "parallel" + "source": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", + "target": "insight:offline_metrics_co_evolve_with_methods_so_must_be_re_audited", + "rel": "composes" }, { - "source": "paper:apolloscape", - "target": "paper:bdd100k", - "rel": "parallel" + "source": "move:design_closed_loop_metric_correlated_with_real_world_safety", + "target": "insight:closed_loop_evaluation_is_the_only_ground_truth_for_planners", + "rel": "composes" }, { - "source": "paper:highway_env", - "target": "paper:metadrive", - "rel": "prereq" + "source": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", + "target": "insight:safety_emerges_from_layered_constraints_not_single_objective", + "rel": "composes" }, { - "source": "paper:metadrive", - "target": "paper:smarts", - "rel": "parallel" + "source": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", + "target": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", + "rel": "composes" }, { - "source": "paper:smarts", - "target": "paper:commonroad", - "rel": "parallel" + "source": "move:treat_corner_case_as_OOD_detection_then_route_to_human", + "target": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", + "rel": "composes" }, { - "source": "paper:commonroad", - "target": "paper:carla_lb2", - "rel": "parallel" + "source": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", + "target": "insight:safety_emerges_from_layered_constraints_not_single_objective", + "rel": "composes" }, { - "source": "paper:highway_env", - "target": "paper:smarts", - "rel": "prereq" + "source": "move:co_design_silicon_with_algorithm_for_minimum_energy", + "target": "insight:hardware_software_co_design_unlocks_orders_of_magnitude_efficiency", + "rel": "composes" }, { - "source": "paper:tesla_ai_day", - "target": "paper:tesla_autolabel", - "rel": "covers" + "source": "move:replace_dense_attention_with_sparse_event_driven_attention", + "target": "insight:event_driven_computation_matches_natural_sparsity_of_driving_scene", + "rel": "composes" }, { - "source": "paper:tesla_autolabel", - "target": "paper:waymo_scenario_mining", - "rel": "parallel" + "source": "move:auto_label_with_offline_model_then_human_in_loop_validate", + "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", + "rel": "composes" }, { - "source": "paper:tesla_autolabel", - "target": "paper:gs_for_ad", - "rel": "feeds" + "source": "move:augment_dataset_via_offline_scenario_perturbation", + "target": "insight:simulator_realism_is_lower_bound_on_training_value", + "rel": "composes" }, { - "source": "paper:gs_for_ad", - "target": "paper:shift_dataset", - "rel": "parallel" + "source": "insight:closed_loop_evaluation_is_the_only_ground_truth_for_planners", + "target": "paradigm:closed_loop_data_engine_centric_development", + "rel": "composes" }, { - "source": "paper:shift_dataset", - "target": "paper:v2x_sim", - "rel": "parallel" + "source": "insight:data_engine_loop_is_more_valuable_than_static_dataset", + "target": "paradigm:closed_loop_data_engine_centric_development", + "rel": "composes" }, { - "source": "paper:vaswani2017", - "target": "paper:flashattention", - "rel": "prereq" + "source": "insight:offline_metrics_co_evolve_with_methods_so_must_be_re_audited", + "target": "paradigm:closed_loop_data_engine_centric_development", + "rel": "composes" }, { - "source": "paper:vaswani2017", - "target": "paper:performer", - "rel": "prereq" + "source": "insight:safety_emerges_from_layered_constraints_not_single_objective", + "target": "paradigm:safety_by_constraint_layered_architecture", + "rel": "composes" }, { - "source": "paper:vaswani2017", - "target": "paper:linear_attention", - "rel": "prereq" + "source": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", + "target": "paradigm:safety_by_constraint_layered_architecture", + "rel": "composes" }, { - "source": "paper:performer", - "target": "paper:linear_attention", - "rel": "parallel" + "source": "insight:event_driven_computation_matches_natural_sparsity_of_driving_scene", + "target": "paradigm:brain_inspired_neuromorphic_co_design", + "rel": "composes" }, { - "source": "paper:flashattention", - "target": "paper:performer", - "rel": "contrasts" + "source": "insight:hardware_software_co_design_unlocks_orders_of_magnitude_efficiency", + "target": "paradigm:brain_inspired_neuromorphic_co_design", + "rel": "composes" }, { - "source": "paper:flashattention", - "target": "paper:mamba", - "rel": "contrasts" + "source": "insight:simulator_realism_is_lower_bound_on_training_value", + "target": "paradigm:simulator_first_synthetic_data_centric", + "rel": "composes" }, { - "source": "paper:linear_attention", - "target": "paper:mamba", - "rel": "prereq" + "source": "paradigm:closed_loop_data_engine_centric_development", + "target": "paper:tesla_autolabel", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:distill_vlm", - "rel": "feeds" + "source": "paradigm:closed_loop_data_engine_centric_development", + "target": "paper:waymo_scenario_mining", + "rel": "covers" }, { - "source": "paper:gptq", - "target": "paper:awq", - "rel": "parallel" + "source": "paradigm:closed_loop_data_engine_centric_development", + "target": "paper:nuplan", + "rel": "covers" }, { - "source": "paper:gpt3", - "target": "paper:gptq", - "rel": "feeds" + "source": "paradigm:closed_loop_data_engine_centric_development", + "target": "paper:bench2drive", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:gptq", - "rel": "feeds" + "source": "paradigm:safety_by_constraint_layered_architecture", + "target": "paper:iso26262", + "rel": "covers" }, { - "source": "paper:llava", - "target": "paper:awq", - "rel": "feeds" + "source": "paradigm:safety_by_constraint_layered_architecture", + "target": "paper:sotif_21448", + "rel": "covers" }, { - "source": "paper:distill_vlm", - "target": "paper:2402.12289", - "rel": "feeds" + "source": "paradigm:safety_by_constraint_layered_architecture", + "target": "paper:ul4600", + "rel": "covers" }, { - "source": "paper:2307.01694", - "target": "paper:loihi2", - "rel": "feeds" + "source": "paradigm:brain_inspired_neuromorphic_co_design", + "target": "paper:2307.01694", + "rel": "covers" }, { - "source": "paper:loihi2", - "target": "paper:truenorth", - "rel": "extends" + "source": "paradigm:brain_inspired_neuromorphic_co_design", + "target": "paper:loihi2", + "rel": "covers" }, { - "source": "paper:truenorth", + "source": "paradigm:brain_inspired_neuromorphic_co_design", "target": "paper:tianjic", - "rel": "parallel" + "rel": "covers" }, { - "source": "paper:loihi2", - "target": "paper:tianjic", - "rel": "parallel" + "source": "paradigm:brain_inspired_neuromorphic_co_design", + "target": "paper:dvs_event_camera", + "rel": "covers" }, { - "source": "paper:loihi2", - "target": "paper:grai", - "rel": "parallel" + "source": "paradigm:simulator_first_synthetic_data_centric", + "target": "paper:carla_lb2", + "rel": "covers" }, { - "source": "paper:dvs_event_camera", - "target": "paper:loihi2", - "rel": "feeds" + "source": "paradigm:simulator_first_synthetic_data_centric", + "target": "paper:shift_dataset", + "rel": "covers" }, { - "source": "paper:dvs_event_camera", - "target": "paper:2307.01694", - "rel": "feeds" + "source": "paradigm:simulator_first_synthetic_data_centric", + "target": "paper:v2x_sim", + "rel": "covers" }, { - "source": "paper:iso26262", - "target": "paper:sotif_21448", - "rel": "prereq" + "source": "paradigm:simulator_first_synthetic_data_centric", + "target": "paper:gs_for_ad", + "rel": "covers" }, { - "source": "paper:sotif_21448", - "target": "paper:ul4600", - "rel": "extends" + "source": "paradigm:simulator_first_synthetic_data_centric", + "target": "paper:metadrive", + "rel": "covers" }, { - "source": "paper:iso26262", - "target": "paper:ul4600", - "rel": "prereq" + "source": "essay:bitter_lesson", + "target": "paradigm:closed_loop_data_engine_centric_development", + "rel": "parallel" }, { - "source": "paper:nuplan", - "target": "move:design_closed_loop_metric_correlated_with_real_world_safety", - "rel": "feeds" + "source": "essay:bitter_lesson", + "target": "paradigm:simulator_first_synthetic_data_centric", + "rel": "parallel" }, { - "source": "paper:navsim", - "target": "move:design_closed_loop_metric_correlated_with_real_world_safety", - "rel": "feeds" + "source": "essay:bitter_lesson", + "target": "paradigm:brain_inspired_neuromorphic_co_design", + "rel": "contrasts" }, { - "source": "paper:bench2drive", - "target": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", - "rel": "feeds" + "source": "essay:bitter_lesson", + "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", + "rel": "parallel" }, { - "source": "paper:navsim", - "target": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", - "rel": "feeds" + "source": "paper:2212.10156", + "target": "paper:bench2drive", + "rel": "covers" }, { - "source": "paper:nuplan", - "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", - "rel": "feeds" + "source": "paper:vadv2", + "target": "paper:nuplan", + "rel": "covers" }, { - "source": "paper:waymo_scenario_mining", - "target": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", - "rel": "feeds" + "source": "paper:transfuser", + "target": "paper:carla_lb2", + "rel": "covers" }, { - "source": "paper:tesla_autolabel", - "target": "move:auto_label_with_offline_model_then_human_in_loop_validate", + "source": "paper:2402.12289", + "target": "paper:distill_vlm", "rel": "feeds" }, { - "source": "paper:tesla_autolabel", - "target": "move:augment_dataset_via_offline_scenario_perturbation", - "rel": "feeds" + "source": "paper:diffuser", + "target": "paper:nuplan", + "rel": "parallel" }, { - "source": "paper:gs_for_ad", - "target": "move:augment_dataset_via_offline_scenario_perturbation", - "rel": "feeds" + "source": "concept:spiking_nn", + "target": "paper:loihi2", + "rel": "covers" + }, + { + "source": "concept:spiking_nn", + "target": "paper:tianjic", + "rel": "covers" }, { - "source": "paper:gs_for_ad", - "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", - "rel": "feeds" + "source": "concept:spiking_nn", + "target": "paper:truenorth", + "rel": "covers" }, { - "source": "paper:lidar_cam_calib", - "target": "move:share_LiDAR_camera_calibration_via_continuous_time_optimization", - "rel": "feeds" + "source": "concept:spiking_nn", + "target": "move:implement_spiking_neuron_with_surrogate_gradient_for_backprop", + "rel": "covers" }, { - "source": "paper:flashattention", - "target": "move:tile_attention_to_fit_SRAM_for_speedup", + "source": "concept:self_attention", + "target": "paper:flashattention", "rel": "covers" }, { - "source": "paper:performer", - "target": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "source": "concept:self_attention", + "target": "paper:performer", "rel": "covers" }, { - "source": "paper:linear_attention", - "target": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "source": "concept:transformer", + "target": "paper:flashattention", "rel": "covers" }, { - "source": "paper:gptq", - "target": "move:quantize_attention_to_int8_with_calibration", - "rel": "feeds" + "source": "lab:lab06", + "target": "move:implement_spiking_neuron_with_surrogate_gradient_for_backprop", + "rel": "implements" }, { - "source": "paper:awq", - "target": "move:quantize_attention_to_int8_with_calibration", - "rel": "feeds" + "source": "lab:lab06", + "target": "move:replace_dense_attention_with_sparse_event_driven_attention", + "rel": "implements" }, { - "source": "paper:distill_vlm", - "target": "move:distill_large_VLM_into_small_realtime_specialist", - "rel": "covers" + "source": "lab:lab02", + "target": "move:use_difficulty_aware_curriculum_to_accelerate_RL", + "rel": "parallel" }, { - "source": "paper:flashattention", + "source": "lab:lab03", "target": "move:cache_KV_state_across_frames_to_amortize_attention_cost", - "rel": "feeds" + "rel": "parallel" }, { - "source": "paper:mamba", - "target": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "source": "lab:lab04", + "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", "rel": "parallel" }, { - "source": "paper:2307.01694", - "target": "move:replace_dense_attention_with_sparse_event_driven_attention", - "rel": "covers" + "source": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", + "target": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", + "rel": "feeds" }, { - "source": "paper:2307.01694", - "target": "move:implement_spiking_neuron_with_surrogate_gradient_for_backprop", - "rel": "feeds" + "source": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", + "target": "move:treat_corner_case_as_OOD_detection_then_route_to_human", + "rel": "parallel" }, { - "source": "paper:loihi2", - "target": "move:co_design_silicon_with_algorithm_for_minimum_energy", + "source": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", + "target": "move:treat_corner_case_as_OOD_detection_then_route_to_human", "rel": "feeds" }, { - "source": "paper:tianjic", - "target": "move:co_design_silicon_with_algorithm_for_minimum_energy", - "rel": "feeds" + "source": "move:tile_attention_to_fit_SRAM_for_speedup", + "target": "move:cache_KV_state_across_frames_to_amortize_attention_cost", + "rel": "parallel" }, { - "source": "paper:grai", - "target": "move:co_design_silicon_with_algorithm_for_minimum_energy", - "rel": "feeds" + "source": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", + "target": "move:replace_dense_attention_with_sparse_event_driven_attention", + "rel": "parallel" }, { - "source": "paper:dvs_event_camera", - "target": "move:use_event_camera_microsecond_latency_for_emergency_braking", - "rel": "feeds" + "source": "move:quantize_attention_to_int8_with_calibration", + "target": "move:perform_neural_architecture_search_with_latency_constraint", + "rel": "parallel" }, { - "source": "paper:dvs_event_camera", - "target": "move:replace_dense_attention_with_sparse_event_driven_attention", + "source": "move:distill_large_VLM_into_small_realtime_specialist", + "target": "move:quantize_attention_to_int8_with_calibration", "rel": "feeds" }, { - "source": "paper:iso26262", - "target": "move:formalize_safety_case_with_claim_evidence_assumption", + "source": "move:augment_dataset_via_offline_scenario_perturbation", + "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", "rel": "feeds" }, { - "source": "paper:sotif_21448", - "target": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", + "source": "move:auto_label_with_offline_model_then_human_in_loop_validate", + "target": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", "rel": "feeds" }, { - "source": "paper:sotif_21448", - "target": "move:treat_corner_case_as_OOD_detection_then_route_to_human", + "source": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", + "target": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", "rel": "feeds" }, { - "source": "paper:ul4600", - "target": "move:formalize_safety_case_with_claim_evidence_assumption", - "rel": "covers" + "source": "move:share_LiDAR_camera_calibration_via_continuous_time_optimization", + "target": "move:auto_label_with_offline_model_then_human_in_loop_validate", + "rel": "prereq" }, { - "source": "paper:sotif_21448", - "target": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", + "source": "move:design_closed_loop_metric_correlated_with_real_world_safety", + "target": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", "rel": "feeds" }, { - "source": "paper:sotif_21448", - "target": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", + "source": "move:formalize_safety_case_with_claim_evidence_assumption", + "target": "move:add_explanation_head_to_promote_interpretability", "rel": "feeds" }, { - "source": "paper:ul4600", - "target": "move:add_explanation_head_to_promote_interpretability", - "rel": "feeds" + "source": "paper:lift_splat_shoot", + "target": "paper:bevdet", + "rel": "prereq" }, { - "source": "paper:metadrive", - "target": "move:use_difficulty_aware_curriculum_to_accelerate_RL", - "rel": "feeds" + "source": "paper:bevdet", + "target": "paper:bevdet4d", + "rel": "extends" }, { - "source": "paper:smarts", - "target": "move:use_difficulty_aware_curriculum_to_accelerate_RL", - "rel": "feeds" + "source": "paper:lift_splat_shoot", + "target": "paper:li2022bevformer", + "rel": "parallel" }, { - "source": "paper:metadrive", - "target": "move:replay_buffer_prioritize_safety_critical_transitions", - "rel": "feeds" + "source": "paper:li2022bevformer", + "target": "paper:bevformer_v2", + "rel": "extends" }, { - "source": "paper:highway_env", - "target": "move:replay_buffer_prioritize_safety_critical_transitions", - "rel": "feeds" + "source": "paper:carion2020", + "target": "paper:detr3d", + "rel": "prereq" }, { - "source": "paper:commonroad", - "target": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", - "rel": "feeds" + "source": "paper:detr3d", + "target": "paper:petr", + "rel": "parallel" }, { - "source": "paper:waymo_scenario_mining", - "target": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", - "rel": "feeds" + "source": "paper:petr", + "target": "paper:petrv2", + "rel": "extends" }, { - "source": "paper:shift_dataset", - "target": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", - "rel": "feeds" + "source": "paper:petr", + "target": "paper:streampetr", + "rel": "extends" }, { - "source": "move:design_closed_loop_metric_correlated_with_real_world_safety", - "target": "problem:offline_metric_does_not_predict_closed_loop_performance", - "rel": "motivates" + "source": "paper:detr3d", + "target": "paper:streampetr", + "rel": "prereq" }, { - "source": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", - "target": "problem:offline_metric_does_not_predict_closed_loop_performance", - "rel": "motivates" + "source": "paper:bevdet", + "target": "paper:bevfusion", + "rel": "feeds" }, { - "source": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", - "target": "problem:offline_metric_does_not_predict_closed_loop_performance", - "rel": "motivates" + "source": "paper:lift_splat_shoot", + "target": "paper:bevfusion", + "rel": "prereq" }, { - "source": "move:augment_dataset_via_offline_scenario_perturbation", - "target": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", - "rel": "motivates" + "source": "paper:lift_splat_shoot", + "target": "paper:simplebev", + "rel": "contrasts" }, { - "source": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", - "target": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", - "rel": "motivates" + "source": "paper:simplebev", + "target": "paper:bevdet", + "rel": "contrasts" }, { - "source": "move:replay_buffer_prioritize_safety_critical_transitions", - "target": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", - "rel": "motivates" + "source": "paper:occupancy_networks_tesla", + "target": "paper:surroundocc", + "rel": "feeds" }, { - "source": "move:distill_large_VLM_into_small_realtime_specialist", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:tesla_ai_day", + "target": "paper:occupancy_networks_tesla", + "rel": "covers" }, { - "source": "move:quantize_attention_to_int8_with_calibration", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:surroundocc", + "target": "paper:occ3d", + "rel": "parallel" }, { - "source": "move:tile_attention_to_fit_SRAM_for_speedup", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:occ3d", + "target": "paper:openocc_unic", + "rel": "parallel" }, { - "source": "move:cache_KV_state_across_frames_to_amortize_attention_cost", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:li2022bevformer", + "target": "paper:surroundocc", + "rel": "prereq" }, { - "source": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:vit", + "target": "paper:mae", + "rel": "prereq" }, { - "source": "move:replace_dense_attention_with_sparse_event_driven_attention", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:vit", + "target": "paper:beit", + "rel": "prereq" }, { - "source": "move:perform_neural_architecture_search_with_latency_constraint", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:mae", + "target": "paper:dinov2", + "rel": "parallel" }, { - "source": "move:co_design_silicon_with_algorithm_for_minimum_energy", - "target": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "rel": "motivates" + "source": "paper:beit", + "target": "paper:mae", + "rel": "parallel" }, { - "source": "move:share_LiDAR_camera_calibration_via_continuous_time_optimization", - "target": "problem:sensor_calibration_drift_over_vehicle_lifetime", - "rel": "motivates" + "source": "paper:vit", + "target": "paper:clip", + "rel": "prereq" + }, + { + "source": "paper:clip", + "target": "paper:blip2", + "rel": "prereq" }, { - "source": "move:auto_label_with_offline_model_then_human_in_loop_validate", - "target": "problem:label_noise_for_3d_object_categories", - "rel": "motivates" + "source": "paper:clip", + "target": "paper:vilt", + "rel": "parallel" }, { - "source": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", - "target": "problem:label_noise_for_3d_object_categories", - "rel": "motivates" + "source": "paper:blip2", + "target": "paper:llava", + "rel": "prereq" }, { - "source": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", - "target": "problem:verification_of_neural_network_safety_properties_at_scale", - "rel": "motivates" + "source": "paper:vit", + "target": "paper:dinov1", + "rel": "prereq" }, { - "source": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", - "target": "problem:verification_of_neural_network_safety_properties_at_scale", - "rel": "motivates" + "source": "paper:dinov1", + "target": "paper:dinov2", + "rel": "prereq" }, { - "source": "move:augment_dataset_via_offline_scenario_perturbation", - "target": "problem:realistic_other_agent_behavior_in_simulator", - "rel": "motivates" + "source": "paper:simclr_mocov3", + "target": "paper:dinov1", + "rel": "contrasts" }, { - "source": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", - "target": "problem:catastrophic_forgetting_under_continual_learning", - "rel": "motivates" + "source": "paper:simclr_mocov3", + "target": "paper:mae", + "rel": "contrasts" }, { - "source": "move:add_explanation_head_to_promote_interpretability", - "target": "problem:auditability_of_decisions_for_regulatory_compliance", - "rel": "motivates" + "source": "paper:nerf", + "target": "paper:3dgs", + "rel": "contrasts" }, { - "source": "move:formalize_safety_case_with_claim_evidence_assumption", - "target": "problem:auditability_of_decisions_for_regulatory_compliance", - "rel": "motivates" + "source": "paper:nerf", + "target": "paper:emernerf", + "rel": "prereq" }, { - "source": "move:treat_corner_case_as_OOD_detection_then_route_to_human", - "target": "problem:auditability_of_decisions_for_regulatory_compliance", - "rel": "motivates" + "source": "paper:3dgs", + "target": "paper:drivinggaussian", + "rel": "prereq" }, { - "source": "paper:gs_for_ad", - "target": "problem:simulator_visual_gap_breaks_perception_models", - "rel": "motivates" + "source": "paper:emernerf", + "target": "paper:drivinggaussian", + "rel": "parallel" }, { - "source": "paper:shift_dataset", - "target": "problem:simulator_visual_gap_breaks_perception_models", - "rel": "motivates" + "source": "paper:dinov2", + "target": "paper:depth_anything", + "rel": "prereq" }, { - "source": "paper:v2x_sim", - "target": "problem:realistic_other_agent_behavior_in_simulator", - "rel": "motivates" + "source": "paper:depth_anything", + "target": "paper:lift_splat_shoot", + "rel": "feeds" }, { - "source": "problem:offline_metric_does_not_predict_closed_loop_performance", - "target": "insight:closed_loop_evaluation_is_the_only_ground_truth_for_planners", - "rel": "composes" + "source": "paper:depth_anything", + "target": "paper:bevdet", + "rel": "feeds" }, { - "source": "problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented", - "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", - "rel": "composes" + "source": "paper:vit", + "target": "paper:vggt", + "rel": "prereq" }, { - "source": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "target": "insight:event_driven_computation_matches_natural_sparsity_of_driving_scene", - "rel": "composes" + "source": "paper:vggt", + "target": "paper:3dgs", + "rel": "feeds" }, { - "source": "problem:energy_budget_too_small_for_full_transformer_at_30fps", - "target": "insight:hardware_software_co_design_unlocks_orders_of_magnitude_efficiency", - "rel": "composes" + "source": "paper:li2022bevformer", + "target": "paper:2212.10156", + "rel": "feeds" }, { - "source": "problem:verification_of_neural_network_safety_properties_at_scale", - "target": "insight:safety_emerges_from_layered_constraints_not_single_objective", - "rel": "composes" + "source": "paper:bevfusion", + "target": "paper:2212.10156", + "rel": "feeds" }, { - "source": "problem:auditability_of_decisions_for_regulatory_compliance", - "target": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", - "rel": "composes" + "source": "paper:streampetr", + "target": "paper:2212.10156", + "rel": "feeds" }, { - "source": "problem:realistic_other_agent_behavior_in_simulator", - "target": "insight:simulator_realism_is_lower_bound_on_training_value", - "rel": "composes" + "source": "paper:occupancy_networks_tesla", + "target": "paper:2212.10156", + "rel": "feeds" }, { - "source": "problem:simulator_visual_gap_breaks_perception_models", - "target": "insight:simulator_realism_is_lower_bound_on_training_value", - "rel": "composes" + "source": "paper:occupancy_networks_tesla", + "target": "paper:vadv2", + "rel": "feeds" }, { - "source": "problem:catastrophic_forgetting_under_continual_learning", - "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", - "rel": "composes" + "source": "paper:dinov2", + "target": "paper:surroundocc", + "rel": "feeds" }, { - "source": "problem:sensor_calibration_drift_over_vehicle_lifetime", - "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", - "rel": "composes" + "source": "paper:2508.10104", + "target": "paper:surroundocc", + "rel": "feeds" }, { - "source": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", - "target": "insight:offline_metrics_co_evolve_with_methods_so_must_be_re_audited", - "rel": "composes" + "source": "paper:sam", + "target": "paper:depth_anything", + "rel": "parallel" }, { - "source": "move:design_closed_loop_metric_correlated_with_real_world_safety", - "target": "insight:closed_loop_evaluation_is_the_only_ground_truth_for_planners", - "rel": "composes" + "source": "paper:clip", + "target": "paper:llava", + "rel": "prereq" }, { - "source": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", - "target": "insight:safety_emerges_from_layered_constraints_not_single_objective", - "rel": "composes" + "source": "paper:lift_splat_shoot", + "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "rel": "covers" }, { - "source": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", - "target": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", - "rel": "composes" + "source": "paper:bevdet", + "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "rel": "covers" }, { - "source": "move:treat_corner_case_as_OOD_detection_then_route_to_human", - "target": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", - "rel": "composes" + "source": "paper:bevfusion", + "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "rel": "covers" }, { - "source": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", - "target": "insight:safety_emerges_from_layered_constraints_not_single_objective", - "rel": "composes" + "source": "paper:carion2020", + "target": "move:treat_detection_as_set_prediction_with_learnable_queries", + "rel": "covers" }, { - "source": "move:co_design_silicon_with_algorithm_for_minimum_energy", - "target": "insight:hardware_software_co_design_unlocks_orders_of_magnitude_efficiency", - "rel": "composes" + "source": "paper:detr3d", + "target": "move:treat_detection_as_set_prediction_with_learnable_queries", + "rel": "covers" }, { - "source": "move:replace_dense_attention_with_sparse_event_driven_attention", - "target": "insight:event_driven_computation_matches_natural_sparsity_of_driving_scene", - "rel": "composes" + "source": "paper:2212.10156", + "target": "move:treat_detection_as_set_prediction_with_learnable_queries", + "rel": "covers" }, { - "source": "move:auto_label_with_offline_model_then_human_in_loop_validate", - "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", - "rel": "composes" + "source": "paper:detr3d", + "target": "move:reproject_3d_query_to_2d_for_feature_sampling", + "rel": "covers" }, { - "source": "move:augment_dataset_via_offline_scenario_perturbation", - "target": "insight:simulator_realism_is_lower_bound_on_training_value", - "rel": "composes" + "source": "paper:streampetr", + "target": "move:reproject_3d_query_to_2d_for_feature_sampling", + "rel": "covers" }, { - "source": "insight:closed_loop_evaluation_is_the_only_ground_truth_for_planners", - "target": "paradigm:closed_loop_data_engine_centric_development", - "rel": "composes" + "source": "paper:petr", + "target": "move:embed_camera_geometry_into_positional_encoding", + "rel": "covers" }, { - "source": "insight:data_engine_loop_is_more_valuable_than_static_dataset", - "target": "paradigm:closed_loop_data_engine_centric_development", - "rel": "composes" + "source": "paper:petrv2", + "target": "move:embed_camera_geometry_into_positional_encoding", + "rel": "covers" }, { - "source": "insight:offline_metrics_co_evolve_with_methods_so_must_be_re_audited", - "target": "paradigm:closed_loop_data_engine_centric_development", - "rel": "composes" + "source": "paper:nerf", + "target": "move:replace_explicit_module_with_implicit_function", + "rel": "covers" }, { - "source": "insight:safety_emerges_from_layered_constraints_not_single_objective", - "target": "paradigm:safety_by_constraint_layered_architecture", - "rel": "composes" + "source": "paper:3dgs", + "target": "move:swap_implicit_for_explicit_primitives_when_compute_allows", + "rel": "covers" }, { - "source": "insight:uncertainty_calibration_is_prerequisite_for_safe_delegation", - "target": "paradigm:safety_by_constraint_layered_architecture", - "rel": "composes" + "source": "paper:bevformer_v2", + "target": "move:add_auxiliary_perspective_supervision_to_bev", + "rel": "covers" }, { - "source": "insight:event_driven_computation_matches_natural_sparsity_of_driving_scene", - "target": "paradigm:brain_inspired_neuromorphic_co_design", - "rel": "composes" + "source": "paper:streampetr", + "target": "move:carry_object_query_across_time_as_recurrent_state", + "rel": "covers" }, { - "source": "insight:hardware_software_co_design_unlocks_orders_of_magnitude_efficiency", - "target": "paradigm:brain_inspired_neuromorphic_co_design", - "rel": "composes" + "source": "paper:bevdet4d", + "target": "move:carry_object_query_across_time_as_recurrent_state", + "rel": "covers" }, { - "source": "insight:simulator_realism_is_lower_bound_on_training_value", - "target": "paradigm:simulator_first_synthetic_data_centric", - "rel": "composes" + "source": "paper:bevfusion", + "target": "move:fuse_modalities_in_shared_intermediate_space", + "rel": "covers" }, { - "source": "paradigm:closed_loop_data_engine_centric_development", - "target": "paper:tesla_autolabel", + "source": "paper:occupancy_networks_tesla", + "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", "rel": "covers" }, { - "source": "paradigm:closed_loop_data_engine_centric_development", - "target": "paper:waymo_scenario_mining", + "source": "paper:surroundocc", + "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", "rel": "covers" }, { - "source": "paradigm:closed_loop_data_engine_centric_development", - "target": "paper:nuplan", + "source": "paper:mae", + "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", "rel": "covers" }, { - "source": "paradigm:closed_loop_data_engine_centric_development", - "target": "paper:bench2drive", + "source": "paper:dinov2", + "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", "rel": "covers" }, { - "source": "paradigm:safety_by_constraint_layered_architecture", - "target": "paper:iso26262", + "source": "paper:2508.10104", + "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", "rel": "covers" }, { - "source": "paradigm:safety_by_constraint_layered_architecture", - "target": "paper:sotif_21448", + "source": "paper:blip2", + "target": "move:freeze_giant_backbone_train_small_adapter", "rel": "covers" }, { - "source": "paradigm:safety_by_constraint_layered_architecture", - "target": "paper:ul4600", + "source": "paper:llava", + "target": "move:freeze_giant_backbone_train_small_adapter", "rel": "covers" }, { - "source": "paradigm:brain_inspired_neuromorphic_co_design", - "target": "paper:2307.01694", + "source": "paper:vit", + "target": "move:tokenize_continuous_signal_to_use_transformer", "rel": "covers" }, { - "source": "paradigm:brain_inspired_neuromorphic_co_design", - "target": "paper:loihi2", + "source": "paper:beit", + "target": "move:tokenize_continuous_signal_to_use_transformer", "rel": "covers" }, { - "source": "paradigm:brain_inspired_neuromorphic_co_design", - "target": "paper:tianjic", + "source": "paper:nerf", + "target": "move:use_geometry_as_self_supervision", "rel": "covers" }, { - "source": "paradigm:brain_inspired_neuromorphic_co_design", - "target": "paper:dvs_event_camera", + "source": "paper:emernerf", + "target": "move:use_geometry_as_self_supervision", "rel": "covers" }, { - "source": "paradigm:simulator_first_synthetic_data_centric", - "target": "paper:carla_lb2", + "source": "paper:2212.10156", + "target": "move:make_pipeline_differentiable_via_shared_latent", "rel": "covers" }, { - "source": "paradigm:simulator_first_synthetic_data_centric", - "target": "paper:shift_dataset", + "source": "paper:vadv2", + "target": "move:make_pipeline_differentiable_via_shared_latent", "rel": "covers" }, { - "source": "paradigm:simulator_first_synthetic_data_centric", - "target": "paper:v2x_sim", + "source": "paper:nerf", + "target": "move:rasterize_differentiable_renderer_for_inverse_problem", "rel": "covers" }, { - "source": "paradigm:simulator_first_synthetic_data_centric", - "target": "paper:gs_for_ad", + "source": "paper:3dgs", + "target": "move:rasterize_differentiable_renderer_for_inverse_problem", "rel": "covers" }, { - "source": "paradigm:simulator_first_synthetic_data_centric", - "target": "paper:metadrive", + "source": "paper:depth_anything", + "target": "move:distill_internet_data_into_small_specialist", "rel": "covers" }, { - "source": "essay:bitter_lesson", - "target": "paradigm:closed_loop_data_engine_centric_development", - "rel": "parallel" - }, - { - "source": "essay:bitter_lesson", - "target": "paradigm:simulator_first_synthetic_data_centric", - "rel": "parallel" + "source": "paper:sam", + "target": "move:distill_internet_data_into_small_specialist", + "rel": "covers" }, { - "source": "essay:bitter_lesson", - "target": "paradigm:brain_inspired_neuromorphic_co_design", - "rel": "contrasts" + "source": "paper:bevdet4d", + "target": "move:make_camera_only_temporal_match_lidar", + "rel": "covers" }, { - "source": "essay:bitter_lesson", - "target": "insight:data_engine_loop_is_more_valuable_than_static_dataset", - "rel": "parallel" + "source": "paper:streampetr", + "target": "move:make_camera_only_temporal_match_lidar", + "rel": "covers" }, { - "source": "paper:2212.10156", - "target": "paper:bench2drive", + "source": "paper:clip", + "target": "move:open_vocabulary_via_text_alignment", "rel": "covers" }, { - "source": "paper:vadv2", - "target": "paper:nuplan", + "source": "paper:dinov1", + "target": "move:emergent_segmentation_from_self_distillation", "rel": "covers" }, { - "source": "paper:transfuser", - "target": "paper:carla_lb2", + "source": "paper:dinov2", + "target": "move:emergent_segmentation_from_self_distillation", "rel": "covers" }, { - "source": "paper:2402.12289", - "target": "paper:distill_vlm", - "rel": "feeds" + "source": "paper:vggt", + "target": "move:replace_handcrafted_sfm_with_feedforward_transformer", + "rel": "covers" }, { - "source": "paper:diffuser", - "target": "paper:nuplan", - "rel": "parallel" + "source": "paper:emernerf", + "target": "move:decompose_scene_into_static_and_dynamic_streams", + "rel": "covers" }, { - "source": "concept:spiking_nn", - "target": "paper:loihi2", + "source": "paper:drivinggaussian", + "target": "move:decompose_scene_into_static_and_dynamic_streams", "rel": "covers" }, { - "source": "concept:spiking_nn", - "target": "paper:tianjic", + "source": "paper:drivinggaussian", + "target": "move:bridge_sim_and_real_via_neural_reconstruction", "rel": "covers" }, { - "source": "concept:spiking_nn", - "target": "paper:truenorth", + "source": "paper:emernerf", + "target": "move:bridge_sim_and_real_via_neural_reconstruction", "rel": "covers" }, { - "source": "concept:spiking_nn", - "target": "move:implement_spiking_neuron_with_surrogate_gradient_for_backprop", + "source": "paper:drivinggaussian", + "target": "move:augment_via_counterfactual_object_insertion", "rel": "covers" }, { - "source": "concept:self_attention", - "target": "paper:flashattention", + "source": "paper:2212.10156", + "target": "move:share_queries_across_multiple_tasks", "rel": "covers" }, { - "source": "concept:self_attention", - "target": "paper:performer", + "source": "paper:petrv2", + "target": "move:share_queries_across_multiple_tasks", "rel": "covers" }, { - "source": "concept:transformer", - "target": "paper:flashattention", + "source": "paper:gaia1", + "target": "move:learn_motion_in_latent_space_then_decode", "rel": "covers" }, { - "source": "lab:lab06", - "target": "move:implement_spiking_neuron_with_surrogate_gradient_for_backprop", - "rel": "implements" + "source": "paper:drivedreamer", + "target": "move:learn_motion_in_latent_space_then_decode", + "rel": "covers" }, { - "source": "lab:lab06", - "target": "move:replace_dense_attention_with_sparse_event_driven_attention", - "rel": "implements" + "source": "paper:occ3d", + "target": "move:use_visibility_mask_to_filter_supervision", + "rel": "covers" }, { - "source": "lab:lab02", - "target": "move:use_difficulty_aware_curriculum_to_accelerate_RL", - "rel": "parallel" + "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "target": "paper:lift_splat_shoot", + "rel": "manifests" }, { - "source": "lab:lab03", - "target": "move:cache_KV_state_across_frames_to_amortize_attention_cost", - "rel": "parallel" + "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "target": "paper:bevdet", + "rel": "manifests" }, { - "source": "lab:lab04", - "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", - "rel": "parallel" + "source": "move:embed_camera_geometry_into_positional_encoding", + "target": "paper:petr", + "rel": "manifests" }, { - "source": "move:specify_safety_constraint_as_signal_temporal_logic_then_verify", - "target": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", - "rel": "feeds" + "source": "move:replace_class_specific_box_with_class_agnostic_occupancy", + "target": "paper:occupancy_networks_tesla", + "rel": "manifests" }, { - "source": "move:add_shield_layer_that_rejects_unsafe_actions_at_inference", - "target": "move:treat_corner_case_as_OOD_detection_then_route_to_human", - "rel": "parallel" + "source": "move:carry_object_query_across_time_as_recurrent_state", + "target": "paper:streampetr", + "rel": "manifests" }, { - "source": "move:apply_uncertainty_quantification_via_deep_ensemble_or_evidential_layer", - "target": "move:treat_corner_case_as_OOD_detection_then_route_to_human", - "rel": "feeds" + "source": "move:share_queries_across_multiple_tasks", + "target": "paper:2212.10156", + "rel": "manifests" }, { - "source": "move:tile_attention_to_fit_SRAM_for_speedup", - "target": "move:cache_KV_state_across_frames_to_amortize_attention_cost", - "rel": "parallel" + "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "target": "paper:bevfusion", + "rel": "composes" }, { - "source": "move:replace_softmax_attention_with_linear_kernel_for_long_sequence", - "target": "move:replace_dense_attention_with_sparse_event_driven_attention", - "rel": "parallel" + "source": "move:fuse_modalities_in_shared_intermediate_space", + "target": "paper:bevfusion", + "rel": "composes" }, { - "source": "move:quantize_attention_to_int8_with_calibration", - "target": "move:perform_neural_architecture_search_with_latency_constraint", - "rel": "parallel" + "source": "move:treat_detection_as_set_prediction_with_learnable_queries", + "target": "paper:detr3d", + "rel": "composes" }, { - "source": "move:distill_large_VLM_into_small_realtime_specialist", - "target": "move:quantize_attention_to_int8_with_calibration", - "rel": "feeds" + "source": "move:reproject_3d_query_to_2d_for_feature_sampling", + "target": "paper:detr3d", + "rel": "composes" }, { - "source": "move:augment_dataset_via_offline_scenario_perturbation", - "target": "move:run_replay_simulation_with_perturbed_initial_conditions_for_robustness", - "rel": "feeds" + "source": "move:treat_detection_as_set_prediction_with_learnable_queries", + "target": "paper:petr", + "rel": "composes" }, { - "source": "move:auto_label_with_offline_model_then_human_in_loop_validate", - "target": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", - "rel": "feeds" + "source": "move:embed_camera_geometry_into_positional_encoding", + "target": "paper:petr", + "rel": "composes" }, { - "source": "move:run_active_learning_loop_to_query_hardest_unlabeled_frames", - "target": "move:run_continual_learning_with_rehearsal_buffer_against_forgetting", - "rel": "feeds" + "source": "move:treat_detection_as_set_prediction_with_learnable_queries", + "target": "paper:2212.10156", + "rel": "composes" }, { - "source": "move:share_LiDAR_camera_calibration_via_continuous_time_optimization", - "target": "move:auto_label_with_offline_model_then_human_in_loop_validate", - "rel": "prereq" + "source": "move:share_queries_across_multiple_tasks", + "target": "paper:2212.10156", + "rel": "composes" }, { - "source": "move:design_closed_loop_metric_correlated_with_real_world_safety", - "target": "move:track_metric_correlation_offline_vs_closed_loop_to_select_models", - "rel": "feeds" + "source": "move:make_pipeline_differentiable_via_shared_latent", + "target": "paper:2212.10156", + "rel": "composes" }, { - "source": "move:formalize_safety_case_with_claim_evidence_assumption", - "target": "move:add_explanation_head_to_promote_interpretability", - "rel": "feeds" + "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "target": "paper:li2022bevformer", + "rel": "composes" }, { - "source": "paper:lift_splat_shoot", - "target": "paper:bevdet", - "rel": "prereq" + "source": "move:treat_detection_as_set_prediction_with_learnable_queries", + "target": "paper:li2022bevformer", + "rel": "composes" }, { - "source": "paper:bevdet", - "target": "paper:bevdet4d", - "rel": "extends" + "source": "move:add_auxiliary_perspective_supervision_to_bev", + "target": "paper:bevformer_v2", + "rel": "composes" }, { - "source": "paper:lift_splat_shoot", - "target": "paper:li2022bevformer", - "rel": "parallel" + "source": "move:replace_explicit_module_with_implicit_function", + "target": "paper:nerf", + "rel": "composes" }, { - "source": "paper:li2022bevformer", - "target": "paper:bevformer_v2", - "rel": "extends" + "source": "move:rasterize_differentiable_renderer_for_inverse_problem", + "target": "paper:nerf", + "rel": "composes" }, { - "source": "paper:carion2020", - "target": "paper:detr3d", - "rel": "prereq" + "source": "move:swap_implicit_for_explicit_primitives_when_compute_allows", + "target": "paper:3dgs", + "rel": "composes" }, { - "source": "paper:detr3d", - "target": "paper:petr", - "rel": "parallel" + "source": "move:decompose_scene_into_static_and_dynamic_streams", + "target": "paper:emernerf", + "rel": "composes" }, { - "source": "paper:petr", - "target": "paper:petrv2", - "rel": "extends" + "source": "move:bridge_sim_and_real_via_neural_reconstruction", + "target": "paper:drivinggaussian", + "rel": "composes" }, { - "source": "paper:petr", + "source": "move:carry_object_query_across_time_as_recurrent_state", "target": "paper:streampetr", - "rel": "extends" + "rel": "composes" }, { - "source": "paper:detr3d", - "target": "paper:streampetr", - "rel": "prereq" + "source": "move:make_camera_only_temporal_match_lidar", + "target": "paper:bevdet4d", + "rel": "composes" }, { - "source": "paper:bevdet", - "target": "paper:bevfusion", - "rel": "feeds" + "source": "problem:long_tail_object_categories_in_open_world", + "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", + "rel": "motivates" }, { - "source": "paper:lift_splat_shoot", - "target": "paper:bevfusion", - "rel": "prereq" + "source": "problem:long_tail_object_categories_in_open_world", + "target": "move:open_vocabulary_via_text_alignment", + "rel": "motivates" }, { - "source": "paper:lift_splat_shoot", - "target": "paper:simplebev", - "rel": "contrasts" + "source": "problem:long_tail_object_categories_in_open_world", + "target": "move:augment_via_counterfactual_object_insertion", + "rel": "motivates" }, { - "source": "paper:simplebev", - "target": "paper:bevdet", - "rel": "contrasts" + "source": "problem:sim_to_real_gap_in_camera_only_perception", + "target": "move:bridge_sim_and_real_via_neural_reconstruction", + "rel": "motivates" }, { - "source": "paper:occupancy_networks_tesla", - "target": "paper:surroundocc", - "rel": "feeds" + "source": "problem:temporal_consistency_in_bev_segmentation", + "target": "move:carry_object_query_across_time_as_recurrent_state", + "rel": "motivates" }, { - "source": "paper:tesla_ai_day", - "target": "paper:occupancy_networks_tesla", - "rel": "covers" + "source": "problem:occlusion_reasoning_without_dense_lidar", + "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", + "rel": "motivates" }, { - "source": "paper:surroundocc", - "target": "paper:occ3d", - "rel": "parallel" + "source": "problem:occlusion_reasoning_without_dense_lidar", + "target": "move:learn_motion_in_latent_space_then_decode", + "rel": "motivates" }, { - "source": "paper:occ3d", - "target": "paper:openocc_unic", - "rel": "parallel" + "source": "problem:label_efficiency_for_3d_annotation", + "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", + "rel": "motivates" }, { - "source": "paper:li2022bevformer", - "target": "paper:surroundocc", - "rel": "prereq" + "source": "problem:label_efficiency_for_3d_annotation", + "target": "move:use_geometry_as_self_supervision", + "rel": "motivates" }, { - "source": "paper:vit", - "target": "paper:mae", - "rel": "prereq" + "source": "problem:label_efficiency_for_3d_annotation", + "target": "move:distill_internet_data_into_small_specialist", + "rel": "motivates" }, { - "source": "paper:vit", - "target": "paper:beit", - "rel": "prereq" + "source": "problem:unknown_geometry_in_distant_or_dark_regions", + "target": "move:make_camera_only_temporal_match_lidar", + "rel": "motivates" }, { - "source": "paper:mae", - "target": "paper:dinov2", - "rel": "parallel" + "source": "problem:multi_modal_calibration_drift", + "target": "move:fuse_modalities_in_shared_intermediate_space", + "rel": "motivates" }, { - "source": "paper:beit", - "target": "paper:mae", - "rel": "parallel" + "source": "problem:rendering_speed_vs_quality_tradeoff", + "target": "move:swap_implicit_for_explicit_primitives_when_compute_allows", + "rel": "motivates" }, { - "source": "paper:vit", - "target": "paper:clip", - "rel": "prereq" + "source": "problem:catastrophic_failure_on_rare_weather", + "target": "move:augment_via_counterfactual_object_insertion", + "rel": "motivates" }, { - "source": "paper:clip", - "target": "paper:blip2", - "rel": "prereq" + "source": "problem:annotation_inconsistency_across_datasets", + "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", + "rel": "motivates" }, { - "source": "paper:clip", - "target": "paper:vilt", - "rel": "parallel" + "source": "problem:depth_ambiguity_in_low_parallax", + "target": "move:make_camera_only_temporal_match_lidar", + "rel": "motivates" }, { - "source": "paper:blip2", - "target": "paper:llava", - "rel": "prereq" + "source": "problem:long_tail_object_categories_in_open_world", + "target": "paper:detr3d", + "rel": "unsolved_by" }, { - "source": "paper:vit", - "target": "paper:dinov1", - "rel": "prereq" + "source": "problem:long_tail_object_categories_in_open_world", + "target": "paper:bevdet", + "rel": "unsolved_by" }, { - "source": "paper:dinov1", - "target": "paper:dinov2", - "rel": "prereq" + "source": "problem:occlusion_reasoning_without_dense_lidar", + "target": "paper:detr3d", + "rel": "unsolved_by" }, { - "source": "paper:simclr_mocov3", - "target": "paper:dinov1", - "rel": "contrasts" + "source": "problem:catastrophic_failure_on_rare_weather", + "target": "paper:bevfusion", + "rel": "unsolved_by" }, { - "source": "paper:simclr_mocov3", - "target": "paper:mae", - "rel": "contrasts" + "source": "problem:depth_ambiguity_in_low_parallax", + "target": "paper:lift_splat_shoot", + "rel": "unsolved_by" }, { - "source": "paper:nerf", - "target": "paper:3dgs", - "rel": "contrasts" + "source": "problem:rendering_speed_vs_quality_tradeoff", + "target": "paper:nerf", + "rel": "unsolved_by" }, { - "source": "paper:nerf", - "target": "paper:emernerf", - "rel": "prereq" + "source": "problem:annotation_inconsistency_across_datasets", + "target": "paper:occ3d", + "rel": "unsolved_by" }, { - "source": "paper:3dgs", - "target": "paper:drivinggaussian", - "rel": "prereq" + "source": "insight:multi_view_geometry_as_free_supervision", + "target": "paper:nerf", + "rel": "manifests" }, { - "source": "paper:emernerf", - "target": "paper:drivinggaussian", - "rel": "parallel" + "source": "insight:multi_view_geometry_as_free_supervision", + "target": "paper:emernerf", + "rel": "manifests" }, { - "source": "paper:dinov2", + "source": "insight:multi_view_geometry_as_free_supervision", "target": "paper:depth_anything", - "rel": "prereq" + "rel": "manifests" }, { - "source": "paper:depth_anything", - "target": "paper:lift_splat_shoot", - "rel": "feeds" + "source": "insight:foundation_features_transfer_without_finetune", + "target": "paper:dinov2", + "rel": "manifests" }, { - "source": "paper:depth_anything", - "target": "paper:bevdet", - "rel": "feeds" + "source": "insight:foundation_features_transfer_without_finetune", + "target": "paper:2508.10104", + "rel": "manifests" }, { - "source": "paper:vit", - "target": "paper:vggt", - "rel": "prereq" + "source": "insight:foundation_features_transfer_without_finetune", + "target": "paper:blip2", + "rel": "manifests" }, { - "source": "paper:vggt", - "target": "paper:3dgs", - "rel": "feeds" + "source": "insight:occupancy_unifies_static_and_dynamic_scene", + "target": "paper:occupancy_networks_tesla", + "rel": "manifests" }, { - "source": "paper:li2022bevformer", - "target": "paper:2212.10156", - "rel": "feeds" + "source": "insight:occupancy_unifies_static_and_dynamic_scene", + "target": "paper:surroundocc", + "rel": "manifests" }, { - "source": "paper:bevfusion", - "target": "paper:2212.10156", - "rel": "feeds" + "source": "insight:open_vocabulary_via_language_anchoring", + "target": "paper:clip", + "rel": "manifests" }, { - "source": "paper:streampetr", - "target": "paper:2212.10156", - "rel": "feeds" + "source": "insight:open_vocabulary_via_language_anchoring", + "target": "paper:llava", + "rel": "manifests" }, { - "source": "paper:occupancy_networks_tesla", - "target": "paper:2212.10156", - "rel": "feeds" + "source": "insight:implicit_vs_explicit_is_a_continuum", + "target": "paper:nerf", + "rel": "manifests" }, { - "source": "paper:occupancy_networks_tesla", - "target": "paper:vadv2", - "rel": "feeds" + "source": "insight:implicit_vs_explicit_is_a_continuum", + "target": "paper:3dgs", + "rel": "manifests" }, { - "source": "paper:dinov2", - "target": "paper:surroundocc", - "rel": "feeds" + "source": "insight:bev_is_planning_friendly_intermediate", + "target": "paper:li2022bevformer", + "rel": "manifests" }, { - "source": "paper:2508.10104", - "target": "paper:surroundocc", - "rel": "feeds" + "source": "insight:bev_is_planning_friendly_intermediate", + "target": "paper:bevfusion", + "rel": "manifests" }, { - "source": "paper:sam", - "target": "paper:depth_anything", - "rel": "parallel" + "source": "insight:bev_is_planning_friendly_intermediate", + "target": "paper:2212.10156", + "rel": "manifests" }, { - "source": "paper:clip", - "target": "paper:llava", - "rel": "prereq" + "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", + "target": "paper:bevdet4d", + "rel": "manifests" }, { - "source": "paper:lift_splat_shoot", - "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "rel": "covers" + "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", + "target": "paper:streampetr", + "rel": "manifests" }, { - "source": "paper:bevdet", - "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "rel": "covers" + "source": "insight:differentiable_rendering_is_universal_inverse_solver", + "target": "paper:nerf", + "rel": "manifests" }, { - "source": "paper:bevfusion", - "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "rel": "covers" + "source": "insight:differentiable_rendering_is_universal_inverse_solver", + "target": "paper:3dgs", + "rel": "manifests" }, { - "source": "paper:carion2020", - "target": "move:treat_detection_as_set_prediction_with_learnable_queries", - "rel": "covers" + "source": "insight:differentiable_rendering_is_universal_inverse_solver", + "target": "paper:drivinggaussian", + "rel": "manifests" }, { - "source": "paper:detr3d", - "target": "move:treat_detection_as_set_prediction_with_learnable_queries", - "rel": "covers" + "source": "insight:multi_view_geometry_as_free_supervision", + "target": "move:use_geometry_as_self_supervision", + "rel": "enables" }, { - "source": "paper:2212.10156", - "target": "move:treat_detection_as_set_prediction_with_learnable_queries", - "rel": "covers" + "source": "insight:foundation_features_transfer_without_finetune", + "target": "move:freeze_giant_backbone_train_small_adapter", + "rel": "enables" }, { - "source": "paper:detr3d", - "target": "move:reproject_3d_query_to_2d_for_feature_sampling", - "rel": "covers" + "source": "insight:foundation_features_transfer_without_finetune", + "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", + "rel": "enables" }, { - "source": "paper:streampetr", - "target": "move:reproject_3d_query_to_2d_for_feature_sampling", - "rel": "covers" + "source": "insight:occupancy_unifies_static_and_dynamic_scene", + "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", + "rel": "enables" }, { - "source": "paper:petr", - "target": "move:embed_camera_geometry_into_positional_encoding", - "rel": "covers" + "source": "insight:open_vocabulary_via_language_anchoring", + "target": "move:open_vocabulary_via_text_alignment", + "rel": "enables" }, { - "source": "paper:petrv2", - "target": "move:embed_camera_geometry_into_positional_encoding", - "rel": "covers" + "source": "insight:implicit_vs_explicit_is_a_continuum", + "target": "move:swap_implicit_for_explicit_primitives_when_compute_allows", + "rel": "enables" }, { - "source": "paper:nerf", - "target": "move:replace_explicit_module_with_implicit_function", - "rel": "covers" + "source": "insight:bev_is_planning_friendly_intermediate", + "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", + "rel": "enables" }, { - "source": "paper:3dgs", - "target": "move:swap_implicit_for_explicit_primitives_when_compute_allows", - "rel": "covers" + "source": "insight:bev_is_planning_friendly_intermediate", + "target": "move:fuse_modalities_in_shared_intermediate_space", + "rel": "enables" }, { - "source": "paper:bevformer_v2", - "target": "move:add_auxiliary_perspective_supervision_to_bev", - "rel": "covers" + "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", + "target": "move:make_camera_only_temporal_match_lidar", + "rel": "enables" }, { - "source": "paper:streampetr", + "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", "target": "move:carry_object_query_across_time_as_recurrent_state", - "rel": "covers" + "rel": "enables" }, { - "source": "paper:bevdet4d", - "target": "move:carry_object_query_across_time_as_recurrent_state", - "rel": "covers" + "source": "insight:differentiable_rendering_is_universal_inverse_solver", + "target": "move:rasterize_differentiable_renderer_for_inverse_problem", + "rel": "enables" }, { - "source": "paper:bevfusion", - "target": "move:fuse_modalities_in_shared_intermediate_space", - "rel": "covers" + "source": "insight:differentiable_rendering_is_universal_inverse_solver", + "target": "move:replace_explicit_module_with_implicit_function", + "rel": "enables" }, { - "source": "paper:occupancy_networks_tesla", - "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "rel": "covers" + "source": "paradigm:camera_first_autonomy", + "target": "insight:temporal_aggregation_buys_what_depth_sensor_buys", + "rel": "composes" }, { - "source": "paper:surroundocc", - "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "rel": "covers" + "source": "paradigm:camera_first_autonomy", + "target": "insight:bev_is_planning_friendly_intermediate", + "rel": "composes" }, { - "source": "paper:mae", - "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", - "rel": "covers" + "source": "paradigm:camera_first_autonomy", + "target": "paper:lift_splat_shoot", + "rel": "manifests" }, { - "source": "paper:dinov2", - "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", - "rel": "covers" + "source": "paradigm:camera_first_autonomy", + "target": "paper:occupancy_networks_tesla", + "rel": "manifests" }, { - "source": "paper:2508.10104", - "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", - "rel": "covers" + "source": "paradigm:camera_first_autonomy", + "target": "paper:tesla_ai_day", + "rel": "manifests" }, { - "source": "paper:blip2", - "target": "move:freeze_giant_backbone_train_small_adapter", - "rel": "covers" + "source": "paradigm:neural_scene_reconstruction_as_engine", + "target": "insight:differentiable_rendering_is_universal_inverse_solver", + "rel": "composes" }, { - "source": "paper:llava", - "target": "move:freeze_giant_backbone_train_small_adapter", - "rel": "covers" + "source": "paradigm:neural_scene_reconstruction_as_engine", + "target": "move:bridge_sim_and_real_via_neural_reconstruction", + "rel": "composes" }, { - "source": "paper:vit", - "target": "move:tokenize_continuous_signal_to_use_transformer", - "rel": "covers" + "source": "paradigm:neural_scene_reconstruction_as_engine", + "target": "paper:drivinggaussian", + "rel": "manifests" }, { - "source": "paper:beit", - "target": "move:tokenize_continuous_signal_to_use_transformer", - "rel": "covers" + "source": "paradigm:neural_scene_reconstruction_as_engine", + "target": "paper:emernerf", + "rel": "manifests" }, { - "source": "paper:nerf", - "target": "move:use_geometry_as_self_supervision", - "rel": "covers" + "source": "paradigm:neural_scene_reconstruction_as_engine", + "target": "paper:gaia1", + "rel": "parallel" }, { - "source": "paper:emernerf", - "target": "move:use_geometry_as_self_supervision", + "source": "essay:bitter_lesson", + "target": "move:replace_handcrafted_sfm_with_feedforward_transformer", "rel": "covers" }, { - "source": "paper:2212.10156", - "target": "move:make_pipeline_differentiable_via_shared_latent", + "source": "essay:bitter_lesson", + "target": "move:distill_internet_data_into_small_specialist", "rel": "covers" }, { - "source": "paper:vadv2", - "target": "move:make_pipeline_differentiable_via_shared_latent", + "source": "essay:bitter_lesson", + "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", "rel": "covers" }, { - "source": "paper:nerf", - "target": "move:rasterize_differentiable_renderer_for_inverse_problem", + "source": "concept:bev", + "target": "paper:lift_splat_shoot", "rel": "covers" }, { - "source": "paper:3dgs", - "target": "move:rasterize_differentiable_renderer_for_inverse_problem", + "source": "concept:bev", + "target": "paper:bevfusion", "rel": "covers" }, { - "source": "paper:depth_anything", - "target": "move:distill_internet_data_into_small_specialist", + "source": "concept:bev", + "target": "insight:bev_is_planning_friendly_intermediate", "rel": "covers" }, { - "source": "paper:sam", - "target": "move:distill_internet_data_into_small_specialist", + "source": "concept:detr_query", + "target": "paper:detr3d", "rel": "covers" }, { - "source": "paper:bevdet4d", - "target": "move:make_camera_only_temporal_match_lidar", + "source": "concept:detr_query", + "target": "move:treat_detection_as_set_prediction_with_learnable_queries", "rel": "covers" }, { - "source": "paper:streampetr", - "target": "move:make_camera_only_temporal_match_lidar", + "source": "concept:ssl", + "target": "paper:mae", "rel": "covers" }, { - "source": "paper:clip", - "target": "move:open_vocabulary_via_text_alignment", + "source": "concept:ssl", + "target": "paper:dinov1", "rel": "covers" }, { - "source": "paper:dinov1", - "target": "move:emergent_segmentation_from_self_distillation", + "source": "concept:ssl", + "target": "paper:simclr_mocov3", "rel": "covers" }, { - "source": "paper:dinov2", - "target": "move:emergent_segmentation_from_self_distillation", + "source": "concept:ssl", + "target": "paper:clip", "rel": "covers" }, { - "source": "paper:vggt", - "target": "move:replace_handcrafted_sfm_with_feedforward_transformer", - "rel": "covers" + "source": "paper:bench2drive", + "target": "paper:carla_leaderboard", + "rel": "parallel" }, { - "source": "paper:emernerf", - "target": "move:decompose_scene_into_static_and_dynamic_streams", - "rel": "covers" + "source": "paradigm:modular_perception_to_planning_pipeline", + "target": "paper:apollo_autoware", + "rel": "manifests" }, { - "source": "paper:drivinggaussian", - "target": "move:decompose_scene_into_static_and_dynamic_streams", - "rel": "covers" + "source": "move:apply_dual_lagrangian_to_safety_constraint", + "target": "paper:lagrangian_safe_rl", + "rel": "composes" }, { - "source": "paper:drivinggaussian", - "target": "move:bridge_sim_and_real_via_neural_reconstruction", - "rel": "covers" + "source": "move:treat_planner_as_policy_optimisation_with_constraints", + "target": "paper:cpo_safe_rl", + "rel": "composes" }, { - "source": "paper:emernerf", - "target": "move:bridge_sim_and_real_via_neural_reconstruction", - "rel": "covers" + "source": "move:carry_recurrent_hidden_state_across_long_videos", + "target": "paper:dreamer_v3", + "rel": "composes" }, { - "source": "paper:drivinggaussian", - "target": "move:augment_via_counterfactual_object_insertion", - "rel": "covers" + "source": "move:joint_attention_over_multi_view_3d_queries", + "target": "paper:li2022bevformer", + "rel": "composes" }, { - "source": "paper:2212.10156", - "target": "move:share_queries_across_multiple_tasks", - "rel": "covers" + "source": "move:gather_diverse_pretraining_data_then_filter_by_quality", + "target": "paper:llama", + "rel": "composes" }, { - "source": "paper:petrv2", - "target": "move:share_queries_across_multiple_tasks", - "rel": "covers" + "source": "move:add_noise_then_denoise_for_score_based_generation", + "target": "paper:ddpm", + "rel": "composes" }, { - "source": "paper:gaia1", - "target": "move:learn_motion_in_latent_space_then_decode", - "rel": "covers" + "source": "paper:alvinn", + "target": "paradigm:imitation_learning", + "rel": "manifests" }, { - "source": "paper:drivedreamer", - "target": "move:learn_motion_in_latent_space_then_decode", - "rel": "covers" + "source": "paper:alvinn", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "motivates" }, { - "source": "paper:occ3d", - "target": "move:use_visibility_mask_to_filter_supervision", - "rel": "covers" + "source": "paper:alvinn", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "contrasts" }, { - "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "target": "paper:lift_splat_shoot", - "rel": "manifests" + "source": "paper:alvinn", + "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "rel": "motivates" }, { - "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "target": "paper:bevdet", + "source": "paper:lbc", + "target": "paradigm:differentiable_end_to_end_imitation", "rel": "manifests" }, { - "source": "move:embed_camera_geometry_into_positional_encoding", - "target": "paper:petr", - "rel": "manifests" + "source": "paper:lbc", + "target": "paper:alvinn", + "rel": "extends" }, { - "source": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "target": "paper:occupancy_networks_tesla", - "rel": "manifests" + "source": "paper:lbc", + "target": "paper:carla_leaderboard", + "rel": "validates" }, { - "source": "move:carry_object_query_across_time_as_recurrent_state", - "target": "paper:streampetr", + "source": "paper:lbc", + "target": "paradigm:imitation_learning", "rel": "manifests" }, { - "source": "move:share_queries_across_multiple_tasks", - "target": "paper:2212.10156", + "source": "paper:tcp_carla", + "target": "paradigm:differentiable_end_to_end_imitation", "rel": "manifests" }, { - "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "target": "paper:bevfusion", - "rel": "composes" - }, - { - "source": "move:fuse_modalities_in_shared_intermediate_space", - "target": "paper:bevfusion", - "rel": "composes" + "source": "paper:tcp_carla", + "target": "paper:lbc", + "rel": "extends" }, { - "source": "move:treat_detection_as_set_prediction_with_learnable_queries", - "target": "paper:detr3d", - "rel": "composes" + "source": "paper:tcp_carla", + "target": "paper:carla_leaderboard", + "rel": "validates" }, { - "source": "move:reproject_3d_query_to_2d_for_feature_sampling", - "target": "paper:detr3d", - "rel": "composes" + "source": "paper:tcp_carla", + "target": "paper:bench2drive", + "rel": "feeds" }, { - "source": "move:treat_detection_as_set_prediction_with_learnable_queries", - "target": "paper:petr", - "rel": "composes" + "source": "paper:gameformer", + "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", + "rel": "covers" }, { - "source": "move:embed_camera_geometry_into_positional_encoding", - "target": "paper:petr", - "rel": "composes" + "source": "paper:gameformer", + "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "rel": "manifests" }, { - "source": "move:treat_detection_as_set_prediction_with_learnable_queries", - "target": "paper:2212.10156", - "rel": "composes" + "source": "paper:gameformer", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "extends" }, { - "source": "move:share_queries_across_multiple_tasks", - "target": "paper:2212.10156", - "rel": "composes" + "source": "paper:gameformer", + "target": "paper:tcp_carla", + "rel": "contrasts" }, { - "source": "move:make_pipeline_differentiable_via_shared_latent", - "target": "paper:2212.10156", - "rel": "composes" + "source": "paper:drivegpt", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" }, { - "source": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "target": "paper:li2022bevformer", - "rel": "composes" + "source": "paper:drivegpt", + "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", + "rel": "manifests" }, { - "source": "move:treat_detection_as_set_prediction_with_learnable_queries", - "target": "paper:li2022bevformer", - "rel": "composes" + "source": "paper:drivegpt", + "target": "paper:gpt3", + "rel": "extends" }, { - "source": "move:add_auxiliary_perspective_supervision_to_bev", - "target": "paper:bevformer_v2", + "source": "paper:drivegpt", + "target": "paradigm:imitation_learning", "rel": "composes" }, { - "source": "move:replace_explicit_module_with_implicit_function", - "target": "paper:nerf", - "rel": "composes" + "source": "paper:drivegpt", + "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "rel": "manifests" }, { - "source": "move:rasterize_differentiable_renderer_for_inverse_problem", - "target": "paper:nerf", - "rel": "composes" + "source": "paper:codex", + "target": "paper:gpt3", + "rel": "extends" }, { - "source": "move:swap_implicit_for_explicit_primitives_when_compute_allows", - "target": "paper:3dgs", - "rel": "composes" + "source": "paper:codex", + "target": "paradigm:llm_agent_paradigm", + "rel": "enables" }, { - "source": "move:decompose_scene_into_static_and_dynamic_streams", - "target": "paper:emernerf", - "rel": "composes" + "source": "paper:codex", + "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "rel": "manifests" }, { - "source": "move:bridge_sim_and_real_via_neural_reconstruction", - "target": "paper:drivinggaussian", - "rel": "composes" + "source": "paper:codex", + "target": "paradigm:knowledge_driven_reflective_agent", + "rel": "enables" }, { - "source": "move:carry_object_query_across_time_as_recurrent_state", - "target": "paper:streampetr", - "rel": "composes" + "source": "paper:saycan", + "target": "paradigm:llm_agent_paradigm", + "rel": "manifests" }, { - "source": "move:make_camera_only_temporal_match_lidar", - "target": "paper:bevdet4d", - "rel": "composes" + "source": "paper:saycan", + "target": "problem:grounding_language_token_to_continuous_physical_world", + "rel": "covers" }, { - "source": "problem:long_tail_object_categories_in_open_world", - "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "rel": "motivates" + "source": "paper:saycan", + "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "rel": "manifests" }, { - "source": "problem:long_tail_object_categories_in_open_world", - "target": "move:open_vocabulary_via_text_alignment", - "rel": "motivates" + "source": "paper:saycan", + "target": "paper:rt2", + "rel": "parallel" }, { - "source": "problem:long_tail_object_categories_in_open_world", - "target": "move:augment_via_counterfactual_object_insertion", - "rel": "motivates" + "source": "paper:saycan", + "target": "paper:codex", + "rel": "contrasts" }, { - "source": "problem:sim_to_real_gap_in_camera_only_perception", - "target": "move:bridge_sim_and_real_via_neural_reconstruction", + "source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "target": "move:add_entropy_bonus_to_encourage_exploration", "rel": "motivates" }, { - "source": "problem:temporal_consistency_in_bev_segmentation", - "target": "move:carry_object_query_across_time_as_recurrent_state", - "rel": "motivates" + "source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", + "rel": "feeds" }, { - "source": "problem:occlusion_reasoning_without_dense_lidar", - "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "rel": "motivates" + "source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", + "rel": "feeds" }, { - "source": "problem:occlusion_reasoning_without_dense_lidar", - "target": "move:learn_motion_in_latent_space_then_decode", + "source": "paper:gail", + "target": "problem:behavior_cloning_compounds_errors_over_time", "rel": "motivates" }, { - "source": "problem:label_efficiency_for_3d_annotation", - "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", - "rel": "motivates" + "source": "paper:gail", + "target": "paradigm:imitation_learning", + "rel": "extends" }, { - "source": "problem:label_efficiency_for_3d_annotation", - "target": "move:use_geometry_as_self_supervision", - "rel": "motivates" + "source": "paper:gail", + "target": "insight:human_demonstrations_compress_implicit_reward_function", + "rel": "manifests" }, { - "source": "problem:label_efficiency_for_3d_annotation", - "target": "move:distill_internet_data_into_small_specialist", - "rel": "motivates" + "source": "paper:gail", + "target": "move:add_entropy_bonus_to_encourage_exploration", + "rel": "composes" }, { - "source": "problem:unknown_geometry_in_distant_or_dark_regions", - "target": "move:make_camera_only_temporal_match_lidar", - "rel": "motivates" + "source": "paper:gail", + "target": "insight:max_entropy_closes_policy_value_duality", + "rel": "manifests" }, { - "source": "problem:multi_modal_calibration_drift", - "target": "move:fuse_modalities_in_shared_intermediate_space", - "rel": "motivates" + "source": "paper:airl", + "target": "paper:gail", + "rel": "extends" }, { - "source": "problem:rendering_speed_vs_quality_tradeoff", - "target": "move:swap_implicit_for_explicit_primitives_when_compute_allows", - "rel": "motivates" + "source": "paper:airl", + "target": "paper:ziebart_max_ent_irl", + "rel": "extends" }, { - "source": "problem:catastrophic_failure_on_rare_weather", - "target": "move:augment_via_counterfactual_object_insertion", - "rel": "motivates" + "source": "paper:airl", + "target": "insight:human_demonstrations_compress_implicit_reward_function", + "rel": "manifests" }, { - "source": "problem:annotation_inconsistency_across_datasets", - "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "rel": "motivates" + "source": "paper:airl", + "target": "paper:irl", + "rel": "feeds" }, { - "source": "problem:depth_ambiguity_in_low_parallax", - "target": "move:make_camera_only_temporal_match_lidar", - "rel": "motivates" + "source": "paper:ng_russell_2000_irl", + "target": "paper:irl", + "rel": "covers" }, { - "source": "problem:long_tail_object_categories_in_open_world", - "target": "paper:detr3d", - "rel": "unsolved_by" + "source": "paper:ng_russell_2000_irl", + "target": "insight:human_demonstrations_compress_implicit_reward_function", + "rel": "manifests" }, { - "source": "problem:long_tail_object_categories_in_open_world", - "target": "paper:bevdet", - "rel": "unsolved_by" + "source": "paper:ziebart_max_ent_irl", + "target": "paper:ng_russell_2000_irl", + "rel": "extends" }, { - "source": "problem:occlusion_reasoning_without_dense_lidar", - "target": "paper:detr3d", - "rel": "unsolved_by" + "source": "paper:ziebart_max_ent_irl", + "target": "insight:max_entropy_closes_policy_value_duality", + "rel": "manifests" }, { - "source": "problem:catastrophic_failure_on_rare_weather", - "target": "paper:bevfusion", - "rel": "unsolved_by" + "source": "paper:ziebart_max_ent_irl", + "target": "paper:sac", + "rel": "parallel" }, { - "source": "problem:depth_ambiguity_in_low_parallax", - "target": "paper:lift_splat_shoot", - "rel": "unsolved_by" + "source": "paper:ziebart_max_ent_irl", + "target": "paper:irl", + "rel": "feeds" }, { - "source": "problem:rendering_speed_vs_quality_tradeoff", - "target": "paper:nerf", - "rel": "unsolved_by" + "source": "paper:irl", + "target": "paradigm:imitation_learning", + "rel": "contrasts" }, { - "source": "problem:annotation_inconsistency_across_datasets", - "target": "paper:occ3d", - "rel": "unsolved_by" + "source": "paper:irl", + "target": "insight:human_demonstrations_compress_implicit_reward_function", + "rel": "manifests" }, { - "source": "insight:multi_view_geometry_as_free_supervision", - "target": "paper:nerf", - "rel": "manifests" + "source": "paper:ross_bagnell_2010", + "target": "problem:behavior_cloning_compounds_errors_over_time", + "rel": "validates" }, { - "source": "insight:multi_view_geometry_as_free_supervision", - "target": "paper:emernerf", - "rel": "manifests" + "source": "paper:ross_bagnell_2010", + "target": "paper:ross2011_dagger", + "rel": "motivates" }, { - "source": "insight:multi_view_geometry_as_free_supervision", - "target": "paper:depth_anything", + "source": "paper:ross_bagnell_2010", + "target": "insight:imitation_learning_alone_cannot_recover_from_compounding_errors", "rel": "manifests" }, { - "source": "insight:foundation_features_transfer_without_finetune", - "target": "paper:dinov2", - "rel": "manifests" + "source": "paper:ross_bagnell_2010", + "target": "insight:policy_improvement_bounded_by_distribution_shift", + "rel": "feeds" }, { - "source": "insight:foundation_features_transfer_without_finetune", - "target": "paper:2508.10104", + "source": "insight:policy_improvement_bounded_by_distribution_shift", + "target": "paper:ross_bagnell_2010", "rel": "manifests" }, { - "source": "insight:foundation_features_transfer_without_finetune", - "target": "paper:blip2", + "source": "insight:policy_improvement_bounded_by_distribution_shift", + "target": "move:trust_region_step_for_monotonic_improvement", "rel": "manifests" }, { - "source": "insight:occupancy_unifies_static_and_dynamic_scene", - "target": "paper:occupancy_networks_tesla", + "source": "insight:policy_improvement_bounded_by_distribution_shift", + "target": "paradigm:offline_rl", "rel": "manifests" }, { - "source": "insight:occupancy_unifies_static_and_dynamic_scene", - "target": "paper:surroundocc", - "rel": "manifests" + "source": "insight:policy_improvement_bounded_by_distribution_shift", + "target": "insight:imitation_learning_alone_cannot_recover_from_compounding_errors", + "rel": "feeds" }, { - "source": "insight:open_vocabulary_via_language_anchoring", - "target": "paper:clip", + "source": "insight:policy_improvement_bounded_by_distribution_shift", + "target": "validation:trace_dataset_aggregation_for_imitation", "rel": "manifests" }, { - "source": "insight:open_vocabulary_via_language_anchoring", - "target": "paper:llava", + "source": "insight:max_entropy_closes_policy_value_duality", + "target": "paper:ziebart_max_ent_irl", "rel": "manifests" }, { - "source": "insight:implicit_vs_explicit_is_a_continuum", - "target": "paper:nerf", + "source": "insight:max_entropy_closes_policy_value_duality", + "target": "paper:sac", "rel": "manifests" }, { - "source": "insight:implicit_vs_explicit_is_a_continuum", - "target": "paper:3dgs", + "source": "insight:max_entropy_closes_policy_value_duality", + "target": "move:add_entropy_bonus_to_encourage_exploration", "rel": "manifests" }, { - "source": "insight:bev_is_planning_friendly_intermediate", - "target": "paper:li2022bevformer", - "rel": "manifests" + "source": "insight:max_entropy_closes_policy_value_duality", + "target": "insight:human_demonstrations_compress_implicit_reward_function", + "rel": "composes" }, { - "source": "insight:bev_is_planning_friendly_intermediate", - "target": "paper:bevfusion", - "rel": "manifests" + "source": "move:learn_set_predictor_with_hungarian_matching", + "target": "paper:carion2020", + "rel": "composes" }, { - "source": "insight:bev_is_planning_friendly_intermediate", + "source": "move:share_object_query_across_tasks_for_e2e_planning", "target": "paper:2212.10156", - "rel": "manifests" + "rel": "composes" }, { - "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", - "target": "paper:bevdet4d", - "rel": "manifests" + "source": "move:learn_open_vocabulary_classifier_via_language_anchor", + "target": "paper:sam", + "rel": "composes" }, { - "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", - "target": "paper:streampetr", - "rel": "manifests" + "source": "paper:pointpillars", + "target": "paper:voxelnet", + "rel": "extends" }, { - "source": "insight:differentiable_rendering_is_universal_inverse_solver", - "target": "paper:nerf", - "rel": "manifests" + "source": "paper:pointpillars", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "composes" }, { - "source": "insight:differentiable_rendering_is_universal_inverse_solver", - "target": "paper:3dgs", - "rel": "manifests" + "source": "paper:pointpillars", + "target": "paper:bevfusion", + "rel": "feeds" }, { - "source": "insight:differentiable_rendering_is_universal_inverse_solver", - "target": "paper:drivinggaussian", - "rel": "manifests" + "source": "paper:voxelnet", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "composes" }, { - "source": "insight:multi_view_geometry_as_free_supervision", - "target": "move:use_geometry_as_self_supervision", - "rel": "enables" + "source": "paper:voxelnet", + "target": "problem:label_efficiency_for_3d_annotation", + "rel": "motivates" }, { - "source": "insight:foundation_features_transfer_without_finetune", - "target": "move:freeze_giant_backbone_train_small_adapter", - "rel": "enables" + "source": "paper:centerpoint", + "target": "paper:voxelnet", + "rel": "extends" }, { - "source": "insight:foundation_features_transfer_without_finetune", - "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", - "rel": "enables" + "source": "paper:centerpoint", + "target": "paper:pointpillars", + "rel": "extends" }, { - "source": "insight:occupancy_unifies_static_and_dynamic_scene", - "target": "move:replace_class_specific_box_with_class_agnostic_occupancy", - "rel": "enables" + "source": "paper:centerpoint", + "target": "paper:detr3d", + "rel": "parallel" }, { - "source": "insight:open_vocabulary_via_language_anchoring", - "target": "move:open_vocabulary_via_text_alignment", - "rel": "enables" + "source": "paper:centerpoint", + "target": "paper:bevfusion", + "rel": "feeds" }, { - "source": "insight:implicit_vs_explicit_is_a_continuum", - "target": "move:swap_implicit_for_explicit_primitives_when_compute_allows", - "rel": "enables" + "source": "paper:krizhevsky2012", + "target": "paper:he2015_resnet", + "rel": "prereq" }, { - "source": "insight:bev_is_planning_friendly_intermediate", - "target": "move:lift_2d_features_to_3d_via_learned_depth_distribution", - "rel": "enables" + "source": "paper:krizhevsky2012", + "target": "paper:vit", + "rel": "prereq" }, { - "source": "insight:bev_is_planning_friendly_intermediate", - "target": "move:fuse_modalities_in_shared_intermediate_space", - "rel": "enables" + "source": "paper:krizhevsky2012", + "target": "paradigm:scaling_data_with_self_supervision", + "rel": "motivates" }, { - "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", - "target": "move:make_camera_only_temporal_match_lidar", - "rel": "enables" + "source": "paper:bert", + "target": "paper:vaswani2017", + "rel": "extends" }, { - "source": "insight:temporal_aggregation_buys_what_depth_sensor_buys", - "target": "move:carry_object_query_across_time_as_recurrent_state", - "rel": "enables" + "source": "paper:bert", + "target": "insight:masked_prediction_yields_self_supervised_signal", + "rel": "manifests" }, { - "source": "insight:differentiable_rendering_is_universal_inverse_solver", - "target": "move:rasterize_differentiable_renderer_for_inverse_problem", - "rel": "enables" + "source": "paper:bert", + "target": "paradigm:scaling_data_with_self_supervision", + "rel": "composes" }, { - "source": "insight:differentiable_rendering_is_universal_inverse_solver", - "target": "move:replace_explicit_module_with_implicit_function", - "rel": "enables" + "source": "paper:byol", + "target": "paradigm:scaling_data_with_self_supervision", + "rel": "composes" }, { - "source": "paradigm:camera_first_autonomy", - "target": "insight:temporal_aggregation_buys_what_depth_sensor_buys", - "rel": "composes" + "source": "paper:byol", + "target": "paper:dinov2", + "rel": "feeds" }, { - "source": "paradigm:camera_first_autonomy", - "target": "insight:bev_is_planning_friendly_intermediate", + "source": "paper:byol", + "target": "problem:label_efficiency_for_3d_annotation", + "rel": "motivates" + }, + { + "source": "paper:vicreg", + "target": "paper:byol", + "rel": "contrasts" + }, + { + "source": "paper:vicreg", + "target": "paradigm:scaling_data_with_self_supervision", "rel": "composes" }, { - "source": "paradigm:camera_first_autonomy", - "target": "paper:lift_splat_shoot", - "rel": "manifests" + "source": "paper:bahdanau2014_attention", + "target": "paper:vaswani2017", + "rel": "prereq" }, { - "source": "paradigm:camera_first_autonomy", - "target": "paper:occupancy_networks_tesla", - "rel": "manifests" + "source": "paper:bahdanau2014_attention", + "target": "insight:attention_is_typed_entity_communication", + "rel": "motivates" }, { - "source": "paradigm:camera_first_autonomy", - "target": "paper:tesla_ai_day", - "rel": "manifests" + "source": "paper:bahdanau2014_attention", + "target": "paper:mamba", + "rel": "contrasts" }, { - "source": "paradigm:neural_scene_reconstruction_as_engine", - "target": "insight:differentiable_rendering_is_universal_inverse_solver", - "rel": "composes" + "source": "paper:schulman2016_gae", + "target": "paper:schulman2017_ppo", + "rel": "feeds" }, { - "source": "paradigm:neural_scene_reconstruction_as_engine", - "target": "move:bridge_sim_and_real_via_neural_reconstruction", - "rel": "composes" + "source": "paper:schulman2016_gae", + "target": "paper:schulman2015_trpo", + "rel": "feeds" }, { - "source": "paradigm:neural_scene_reconstruction_as_engine", - "target": "paper:drivinggaussian", - "rel": "manifests" + "source": "paper:schulman2016_gae", + "target": "paper:sac", + "rel": "parallel" }, { - "source": "paradigm:neural_scene_reconstruction_as_engine", - "target": "paper:emernerf", - "rel": "manifests" + "source": "paper:d4rl", + "target": "paradigm:offline_rl", + "rel": "validates" }, { - "source": "paradigm:neural_scene_reconstruction_as_engine", - "target": "paper:gaia1", + "source": "paper:d4rl", + "target": "paper:cql", + "rel": "validates" + }, + { + "source": "paper:d4rl", + "target": "paper:iql", + "rel": "validates" + }, + { + "source": "paper:d4rl", + "target": "paper:decision_transformer", + "rel": "validates" + }, + { + "source": "paper:d4rl", + "target": "paper:levine_offline_rl_tutorial", "rel": "parallel" }, { - "source": "essay:bitter_lesson", - "target": "move:replace_handcrafted_sfm_with_feedforward_transformer", + "source": "paper:levine_offline_rl_tutorial", + "target": "paradigm:offline_rl", "rel": "covers" }, { - "source": "essay:bitter_lesson", - "target": "move:distill_internet_data_into_small_specialist", + "source": "paper:levine_offline_rl_tutorial", + "target": "paper:cql", "rel": "covers" }, { - "source": "essay:bitter_lesson", - "target": "move:scale_pretraining_then_fine_tune_with_minimal_labels", + "source": "paper:levine_offline_rl_tutorial", + "target": "paper:iql", "rel": "covers" }, { - "source": "concept:bev", - "target": "paper:lift_splat_shoot", + "source": "paper:spinning_up", + "target": "paper:schulman2016_gae", "rel": "covers" }, { - "source": "concept:bev", - "target": "paper:bevfusion", + "source": "paper:spinning_up", + "target": "paper:schulman2017_ppo", "rel": "covers" }, { - "source": "concept:bev", - "target": "insight:bev_is_planning_friendly_intermediate", + "source": "paper:spinning_up", + "target": "paper:schulman2015_trpo", "rel": "covers" }, { - "source": "concept:detr_query", - "target": "paper:detr3d", + "source": "paper:spinning_up", + "target": "paper:sac", "rel": "covers" }, { - "source": "concept:detr_query", - "target": "move:treat_detection_as_set_prediction_with_learnable_queries", + "source": "paper:spinning_up", + "target": "paper:mnih2015_dqn", "rel": "covers" }, { - "source": "concept:ssl", - "target": "paper:mae", - "rel": "covers" + "source": "paper:preference_learning", + "target": "paper:rlhf_dpo", + "rel": "feeds" }, { - "source": "concept:ssl", - "target": "paper:dinov1", - "rel": "covers" + "source": "paper:preference_learning", + "target": "paper:schulman2017_ppo", + "rel": "feeds" }, { - "source": "concept:ssl", - "target": "paper:simclr_mocov3", - "rel": "covers" + "source": "paper:planet", + "target": "paper:dreamer_v2", + "rel": "feeds" }, { - "source": "concept:ssl", - "target": "paper:clip", - "rel": "covers" + "source": "paper:planet", + "target": "paper:dreamer_v3", + "rel": "feeds" }, { - "source": "paper:bench2drive", - "target": "paper:carla_leaderboard", - "rel": "parallel" + "source": "paper:planet", + "target": "paradigm:world_model_paradigm", + "rel": "manifests" }, { - "source": "paradigm:modular_perception_to_planning_pipeline", - "target": "paper:apollo_autoware", + "source": "paper:planet", + "target": "paradigm:model_based_rl", "rel": "manifests" }, { - "source": "move:apply_dual_lagrangian_to_safety_constraint", - "target": "paper:lagrangian_safe_rl", - "rel": "composes" + "source": "paper:planet", + "target": "paper:world_models", + "rel": "extends" }, { - "source": "move:treat_planner_as_policy_optimisation_with_constraints", - "target": "paper:cpo_safe_rl", - "rel": "composes" + "source": "paper:planet", + "target": "move:latent_imagination_rollout", + "rel": "manifests" }, { - "source": "move:carry_recurrent_hidden_state_across_long_videos", - "target": "paper:dreamer_v3", - "rel": "composes" + "source": "paper:planet", + "target": "insight:world_models_let_planning_be_done_in_imagination", + "rel": "manifests" }, { - "source": "move:joint_attention_over_multi_view_3d_queries", - "target": "paper:li2022bevformer", - "rel": "composes" + "source": "paper:planet", + "target": "paper:muzero", + "rel": "parallel" }, { - "source": "move:gather_diverse_pretraining_data_then_filter_by_quality", - "target": "paper:llama", - "rel": "composes" + "source": "insight:q_learning_max_is_optimistically_biased", + "target": "move:double_q_to_reduce_overestimation", + "rel": "motivates" }, { - "source": "move:add_noise_then_denoise_for_score_based_generation", - "target": "paper:ddpm", - "rel": "composes" + "source": "insight:q_learning_max_is_optimistically_biased", + "target": "move:bootstrap_target_network_to_stabilize_off_policy_learning", + "rel": "parallel" + }, + { + "source": "insight:q_learning_max_is_optimistically_biased", + "target": "paper:mnih2015_dqn", + "rel": "motivates" + }, + { + "source": "insight:q_learning_max_is_optimistically_biased", + "target": "paper:sac", + "rel": "manifests" + }, + { + "source": "insight:q_learning_max_is_optimistically_biased", + "target": "paradigm:offline_rl", + "rel": "motivates" }, { "source": "paper:gpt3", diff --git a/docs/data/graph_extended.stats.json b/docs/data/graph_extended.stats.json index 20e293d..1e8d859 100644 --- a/docs/data/graph_extended.stats.json +++ b/docs/data/graph_extended.stats.json @@ -1,51 +1,51 @@ { - "node_count": 507, - "edge_count": 1448, + "node_count": 541, + "edge_count": 1568, "by_kind": { - "paper": 186, + "paper": 213, "channel": 3, "course": 2, "essay": 1, "concept": 25, "lab": 11, - "move": 135, + "move": 138, "problem": 41, - "insight": 54, + "insight": 58, "paradigm": 25, "validation": 24 }, "by_tier": { "spine": 14, - "S": 39, - "A": 75, - "B": 64, + "S": 43, + "A": 94, + "B": 69, "concept": 25, "lab": 11, - "move": 135, + "move": 138, "problem": 41, - "insight": 54, + "insight": 57, "paradigm": 25, "validation": 24 }, "by_topic": { - "e2e_ad": 24, + "e2e_ad": 29, "vlm_vla": 71, "brain_inspired": 7, - "ssl_vision": 37, + "ssl_vision": 40, "math_foundations": 14, "companion_media": 2, - "rl_foundations": 9, - "deep_rl": 80, + "rl_foundations": 15, + "deep_rl": 86, "meta_philosophy": 3, - "world_models": 33, - "planning": 16, + "world_models": 34, + "planning": 18, "control": 7, "safety": 16, - "foundation_models": 18, - "alignment": 4, - "llm_agent": 14, + "foundation_models": 21, + "alignment": 5, + "llm_agent": 16, "reasoning": 8, - "evaluation_benchmark": 18, + "evaluation_benchmark": 19, "simulator": 8, "dataset": 6, "efficient_computing": 14, @@ -54,23 +54,23 @@ "safety_standard": 14, "geometry_3d": 27, "sensor_fusion": 3, - "scene_understanding": 27, + "scene_understanding": 31, "data_engineering": 1, "methodology": 1 }, "by_rel": { - "prereq": 89, - "covers": 283, - "parallel": 111, - "contrasts": 23, - "extends": 37, - "feeds": 88, + "prereq": 92, + "covers": 294, + "parallel": 118, + "contrasts": 29, + "extends": 51, + "feeds": 104, "implements": 14, - "manifests": 169, - "enables": 58, - "composes": 361, - "motivates": 144, - "validates": 64, + "manifests": 200, + "enables": 60, + "composes": 372, + "motivates": 156, + "validates": 71, "unsolved_by": 7 }, "by_source": { @@ -81,6 +81,11 @@ "methodology_axis": 84, "perception_axis": 75, "round2_integrity": 8, + "round4_driving": 8, + "round4_irl": 8, + "round4_orphans": 3, + "round4_perception": 8, + "round4_rlinfra": 7, "wave_e_stubs": 10 } } diff --git a/docs/data/lens_fragments/driving.json b/docs/data/lens_fragments/driving.json new file mode 100644 index 0000000..bac93a9 --- /dev/null +++ b/docs/data/lens_fragments/driving.json @@ -0,0 +1,20 @@ +{ + "paper:alvinn": { + "assumption": "人类驾驶演示 $\\mathcal{D}_\\text{expert}$ 覆盖了部署时会遇到的状态分布,于是把每帧 $(I,a)$ 当独立样本做监督回归 $\\min_\\theta\\mathbb{E}\\,\\|f_\\theta(I)-a\\|^2$ 就足以学会开车——感知与决策可以被一个端到端函数一次性吞掉,无需车道线检测等手工模块。", + "failure": "纯演示几乎只含正常行驶,模型从没见过\"已偏离车道该如何回正\",一旦轻微出错就进入演示未覆盖区、误差以 $\\mathcal{O}(\\varepsilon T^2)$ 复合发散(见 [DAgger](paper_ross2011_dagger.md))。Pomerleau 的补救是用几何变换合成偏移位姿样本——这正暴露了\"端到端模仿不会自带纠偏\"这一原则性缺口。", + "experiment": "在闭环里人为把车摆到偏离中心 $d$ 米处,测纯演示训练的网络与\"加合成偏移样本\"训练的网络拉回成功率随 $d$ 的曲线。可证伪点:若不加合成样本恢复率也不随 $d$ 急剧下降,则\"演示覆盖不足是主要失效因\"不成立。", + "isomorphism": "与控制论里开环 vs 闭环稳定性、语言模型自回归的暴露偏差、数值积分误差随步数累积同构;其\"合成偏移样本\"则是 [DAgger](paper_ross2011_dagger.md) 数据聚合的早期手工版,也预演了后来的影子模式数据飞轮。" + }, + "paper:lbc": { + "assumption": "决策与感知是难度悬殊的两个问题:在仿真器特权真值 $\\phi(s)$ 上学决策几乎免费且近乎完美,于是先训出\"作弊\"教师 $\\pi^\\text{priv}$,再把它当作可在任意状态、任意高层指令下查询的标注器,让像素学生 $\\pi^\\text{sens}$ 在自己访问的分布 $d_{\\pi^\\text{sens}}$ 上蒸馏——等价于一次取之不尽的在线 [DAgger](paper_ross2011_dagger.md)。", + "failure": "教师的能力上限钉死学生:任何漏进特权输入的感知缺陷、或教师未覆盖的长尾交规,学生都无从学起。方法还强依赖\"能提供真值的仿真器\",真实世界没有这种上帝视角信息源,迁移到路测只能用离线日志+高精地图近似教师,红利大打折扣。", + "experiment": "消融蒸馏的 on-policy 成分:对比\"在学生 rollout 状态上向教师索标注\"与\"仅用教师离线轨迹离线模仿\",画 CARLA 闭环 driving score 差距。可证伪点:若两者闭环分接近,则 LBC 优势并非来自\"在场教师\"的无限 DAgger,而仅是更好的离线监督。", + "isomorphism": "与知识蒸馏\"大教师→小学生\"、特权信息学习(learning using privileged information, LUPI)、以及 AlphaZero 用搜索结果回标网络同构——核心都是\"先用一个昂贵/特权的过程造出高质量监督,再蒸馏进受限的部署模型\"。" + }, + "paper:saycan": { + "assumption": "语言知识与物理可行性是两类必须分别获得再融合的信号:LLM 给\"语义有用性\" $p_\\text{LLM}(\\ell_a\\mid i,h)$,价值函数给\"当前状态成功概率\" $V_a^\\pi(s)$,两者相乘 $a^\\star=\\arg\\max_a p_\\text{LLM}\\cdot V_a^\\pi$ 即能在\"既相关又可行\"的动作上做出接地的选择,无需微调 LLM。", + "failure": "技能库覆盖即能力上限——库里没有的动作再聪明的 LLM 也唤不出。逐步乘积是贪心分解,缺长程回溯,复杂任务会走进死胡同;而 $V_a^\\pi(s)$ 一旦高估就会选到其实做不到的危险动作(驾驶里等于\"以为来得及变道\"),安全攸关时价值校准比语言更关键。", + "experiment": "把价值函数 $V_a^\\pi(s)$ 系统性加噪/偏置,测任务成功率与\"选到不可行动作\"的频率随价值误差的曲线,并与只用 LLM 打分(去掉 can 项)对照。可证伪点:若去掉 can 项成功率几乎不变,则\"可供性必须被显式估计\"这一论断对该任务不成立。", + "isomorphism": "与 [Codex](paper_codex.md) 的 code-as-action 是 grounding 的两条路(可供性价值 vs 可执行可验证代码);其\"提议×可行性\"结构同构于 MPC 在可行域内优化、束搜索的\"分数×约束\"剪枝、以及强化学习里 actor 提议、critic 把关的分工。" + } +} diff --git a/docs/data/lens_fragments/irl.json b/docs/data/lens_fragments/irl.json new file mode 100644 index 0000000..7ced0e9 --- /dev/null +++ b/docs/data/lens_fragments/irl.json @@ -0,0 +1,20 @@ +{ + "paper:gail": { + "assumption": "承重假设:模仿等价于占用度量匹配,即只要让策略与专家的状态-动作分布 $\\rho_\\pi=\\rho_{\\pi_E}$ 在 Jensen-Shannon 散度意义下重合,行为就等价;且能负担大量 on-policy 环境交互来让判别器在策略自身访问的分布 $d_\\pi$ 上打分。", + "failure": "失效边界:当环境交互昂贵或只有离线日志时无法做 on-policy rollout,框架失效;判别器一旦过强会使 reward 信号 $-\\log D$ 饱和、策略梯度方差爆炸;若状态表示含可作弊的捷径特征(时间戳、id),$D$ 学到伪差异而非真实行为差异,对抗优化退化。", + "experiment": "可证伪的下一步:在固定交互预算下,把 GAIL 与同等数据下的 [DAgger](paper_ross2011_dagger.md) 和 BC 在 CARLA 闭环里做样本效率对照——预测当专家可被随时查询时 DAgger 的样本效率应优于 GAIL;若 GAIL 在低交互预算下反超,则'分布匹配优于数据聚合'的假设被推翻。", + "isomorphism": "同构迁移:与 GAN 的生成器-判别器博弈、统计物理里用密度比估计两个分布的差异、以及 [AIRL](paper_airl.md) 把判别器结构化恢复 reward,是同一密度比/占用匹配结构在不同领域的具现。" + }, + "paper:ziebart_max_ent_irl": { + "assumption": "承重假设:专家行为服从 Boltzmann 分布 $P(\\tau)\\propto\\exp(\\theta^\\top \\mathbf{f}_\\tau)$,即高 reward 轨迹概率高但不独占;reward 在手写线性特征 $\\mathbf{f}(s,a)$ 张成的空间里可表示;动力学可枚举/确定,配分函数与状态访问频率可用 forward-backward 精确算出。", + "failure": "失效边界:状态空间大或随机动力学下配分函数 $Z(\\theta)$ 无法精确求,必须采样近似而引入噪声;reward 真实结构若超出线性特征基的张成范围则永远学不到;专家若严重偏离软最优(如新手或对抗驾驶),Boltzmann 模型设定错误,反推的 reward 随之失真。", + "experiment": "可证伪的下一步:在行人轨迹预测上,用最大熵 IRL 输出的多模态可达路径分布与一个纯 [GAIL](paper_gail.md) 判别器的输出对比覆盖率与校准度;可证伪预测是——当特征基覆盖目的地语义时 MaxEnt 的多模态校准应不劣于对抗方法,若系统性更差则说明手写特征是瓶颈。", + "isomorphism": "同构迁移:同一最大熵/配分函数结构出现在统计物理的自由能 $-T\\log Z$、概率图模型的对数配分函数、以及最大熵 forward RL([SAC](paper_sac.md) 的 soft 价值 $V_{\\mathrm{soft}}=\\alpha\\log\\sum_a e^{Q/\\alpha}$)——正逆两向共享同一对偶。" + }, + "paper:ross_bagnell_2010": { + "assumption": "承重假设:模仿可归约为一串监督学习子问题,单步犯错率 $\\epsilon$ 有界且可估计;存在可在任意状态被查询的交互式专家,能在策略实际到达的分布上提供标注;horizon $T$ 有限。其核心界 $J(\\hat\\pi)\\le J(\\pi^*)+O(\\epsilon T^2)$ 成立依赖训练分布 $d_{\\pi^*}$ 与部署分布 $d_{\\hat\\pi}$ 的总变差随时间累积这一机制。", + "failure": "失效边界:当专家不可交互查询(只有静态日志)时 forward training 无法构造分布正确的训练集;$T$ 很大时为每步训独立策略不可行(这正是 [DAgger](paper_ross2011_dagger.md) 用单策略+聚合取代它的原因);若单步误差 $\\epsilon$ 本身随状态强烈异质,统一上界过松,二次/线性的区分不再准确刻画真实复合行为。", + "experiment": "可证伪的下一步:在可控仿真里系统改变 horizon $T$,测量 BC 与 DAgger 的闭环失败率随 $T$ 的标度——可证伪预测是 BC 失败率应近似 $T^2$、DAgger 近似 $T$;若 BC 也呈线性标度,则二次复合机制(或其前提)被推翻。", + "isomorphism": "同构迁移:$O(\\epsilon T^2)$ 的分布偏移复合与 [策略改进被分布偏移上界锁死](insight_policy_improvement_bounded_by_distribution_shift.md) 的内核一致,同样的'用旧分布评估新分布'结构出现在 [TRPO](paper_schulman2015_trpo.md) 的信赖域、离线 RL 的支撑约束、以及监督学习的协变量偏移泛化界中。" + } +} diff --git a/docs/data/lens_fragments/perception.json b/docs/data/lens_fragments/perception.json new file mode 100644 index 0000000..6d7dc52 --- /dev/null +++ b/docs/data/lens_fragments/perception.json @@ -0,0 +1,20 @@ +{ + "paper:pointpillars": { + "assumption": "自动驾驶里的目标几乎都贴地,$z$ 方向分布很窄,因此把整列高度压成一个柱(pillar)几乎不损失可分性,可以用便宜的 2D 卷积彻底替代 3D 卷积。它还默认柱内固定采样 $N$ 个点足以代表该柱,超出的点可随机丢弃。", + "failure": "在多层立体结构(立交桥下的车、货架式场景、悬空横杆)里高度信息恰恰是判别关键,压扁 $z$ 会把上下不同物体混进同一柱而无法区分。柱内固定采样 $N$ 在近处高密度区会丢点,导致大目标边界与小目标召回受损;与保留体素的 [VoxelNet](paper_voxelnet.md) 相比,这正是它牺牲的部分。", + "experiment": "按高度分层构造测试集:把目标按其上方是否存在另一物体分成单层 vs 多层两组,对比 [PointPillars](paper_pointpillars.md) 与保留 $z$ 维体素的 [VoxelNet](paper_voxelnet.md) 在两组上的 AP 差。可证伪预测:两者在单层组接近、在多层组 PointPillars 显著落后;若多层组也无明显差距,则压扁 $z$ 的代价被高估。", + "isomorphism": "与 [Lift-Splat-Shoot](paper_lift_splat_shoot.md) 把相机特征压成 BEV、占用栅格沿高度边缘化、乃至把图像投影成俯视图都是同一招——为换取规则的 2D 计算与 planning 友好的 BEV 表示而牺牲一个维度的细节,区别只在压扁的是相机射线还是激光点。" + }, + "paper:krizhevsky2012": { + "assumption": "在足够大的标注数据(ImageNet)与足够强的算力(GPU)下,一个端到端学习层级特征的深度卷积网络,会全面优于任何手工设计的特征流水线(SIFT/HOG + SVM)。即瓶颈是数据与算力,不是人类对特征的巧思。", + "failure": "当标注数据稀缺或下游分布与 ImageNet 差异巨大时,纯监督预训练的迁移会退化;它对大量人工标签的依赖本身就是瓶颈,这正是后来 [BYOL](paper_byol.md)、[VICReg](paper_vicreg.md) 等自监督要摆脱的。其具体设计(LRN、双 GPU 拆分)也是时代权宜,盲目照搬会引入无谓约束。", + "experiment": "固定架构,沿两个轴做消融:(1) 把训练标签量按 $\\{1\\%, 10\\%, 100\\%\\}$ 抽取,(2) 把算力/训练步数按档位缩放,画测试精度对数据量与算力的响应面。可证伪预测:精度随数据与算力近似单调上升且未饱和;若在小数据档手工特征反超,则\"瓶颈是数据算力而非特征工程\"在该 regime 不成立。", + "isomorphism": "与 [BERT](paper_bert.md) 在语言、[AlphaGo](essay_bitter_lesson.md) 在搜索里的胜利同型——都是\"通用、可随算力扩展的学习方法\"取代\"精巧但难扩展的人类设计\",是 bitter lesson 在不同模态上的同一剧本,区别只在被取代的手工组件是 SIFT、语言学规则还是开局棋谱。" + }, + "paper:bert": { + "assumption": "一个极通用、无需人工标签的自监督目标——遮住约 $15\\%$ 的 token 让模型用双向上下文填回——就能从海量文本中学出可迁移的通用语义表示,且这个目标随数据与算力放大持续受益。下游任务只需在预训练表示上加轻量头微调。", + "failure": "`[MASK]` 只在预训练出现、微调与推理不出现,造成天然分布失配,必须用 $80/10/10$ 混合补偿,否则迁移受损。掩码目标偏向理解/抽取,直接做生成需改造;当下游语料与预训练语料严重失配(专业领域、低资源语言)时表示迁移也会退化。", + "experiment": "在固定算力下对比掩码比例 $\\{5\\%, 15\\%, 40\\%, 80\\%\\}$ 与训练数据量档位对下游 GLUE 类任务的影响,并消融 NSP 任务。可证伪预测:存在一个最优掩码比例区间,且加大数据与训练步数(去掉弱信号的 NSP)比调目标更有效(RoBERTa 现象);若性能对数据规模不敏感,则\"可规模化的简单目标\"论断被削弱。", + "isomorphism": "与视觉的 MAE/BEiT 遮像素块、[VICReg](paper_vicreg.md)/[BYOL](paper_byol.md) 从无标签图像学表示是同一主线——用一个不需人工标签、能吃下任意规模数据的自监督目标学通用表示,区别只在被预测/被对齐的是被遮 token、被遮像素,还是同图的另一增广视图。" + } +} diff --git a/docs/data/lens_fragments/rlinfra.json b/docs/data/lens_fragments/rlinfra.json new file mode 100644 index 0000000..e165f44 --- /dev/null +++ b/docs/data/lens_fragments/rlinfra.json @@ -0,0 +1,20 @@ +{ + "paper:schulman2016_gae": { + "assumption": "数据严格 on-policy(来自当前策略),且学到的价值函数 $V$ 在策略实际访问的状态上偏差有界,从而 TD 残差 $\\delta_t = r_t + \\gamma V(s_{t+1}) - V(s_t)$ 是一个低方差但只有小偏差的优势近似。隐含相信 $\\lambda$ 这一个标量足以在 bias/variance 谱上找到一个对所有任务都接近最优的折中点。", + "failure": "当 $V$ 严重欠拟合(训练早期、或非平稳奖励)时,小 $\\lambda$ 会把价值函数的偏差直接灌进梯度,策略学歪;而长 horizon 任务取大 $\\lambda$ 又让方差随步数爆炸。一旦数据不再 on-policy(误塞进 replay buffer 复用旧 rollout),GAE 的无偏性前提崩塌,整条优势估计失真。", + "experiment": "固定 [PPO](paper_schulman2017_ppo.md) 其余超参,在一个长 horizon 连续控制任务上扫 $\\lambda \\in \\{0, 0.9, 0.95, 0.97, 0.99, 1.0\\}$,画最终回报与样本效率随 $\\lambda$ 的 U 形曲线,并叠加价值函数解释方差(explained variance)。可证伪预测:存在一个中间 $\\lambda^\\star$ 严格优于两端;若 $\\lambda=1$(纯蒙特卡洛)始终最优,则\"价值自举降方差有净收益\"的核心论断被推翻。", + "isomorphism": "与 TD($\\lambda$) 的 eligibility trace 是同一递推($\\lambda$ 即\"信任 bootstrap 多深\");也对应时间序列预测里 multi-step vs one-step forecast 的偏差-方差权衡、以及数值微分中步长选择对截断误差与舍入误差的折中——都是\"用一个尺度参数在两类误差间滑动\"。" + }, + "paper:d4rl": { + "assumption": "用\"数据生成策略的质量谱\"(random / medium / expert / medium-replay / medium-expert)构造的难度梯度,能代表真实离线 RL 部署中会遇到的分布困难;且把任务回报线性归一化到 $[0,100]$(random≈0、expert≈100)后,跨任务平均分是一个有意义、可比的进展标量。", + "failure": "固定基准被长期刷榜后,算法会过拟合到 D4RL 特定数据集的人为结构(尤其 AntMaze 的稀疏奖励与 stitching 布局),归一化分上涨却不蕴含真实长尾分布上的泛化。AntMaze 的 done/奖励偏移处理、以及 D4RL 跨版本的数据差异,还会让\"同一张表里的数字\"其实不可比。", + "experiment": "取在 D4RL 上排名靠前的若干离线算法(如 [CQL](paper_cql.md)、[IQL](paper_iql.md)、[Decision Transformer](paper_decision_transformer.md)),迁移到一个分布结构不同的离线任务集(如真实驾驶日志重采样),比较 D4RL 排名与新基准排名的秩相关。可证伪点:若两套排名高度一致,则\"过拟合基准结构\"的担忧被削弱;若秩相关接近零,则\"D4RL 分数预测迁移性能\"被推翻。", + "isomorphism": "与 ImageNet 之于视觉、GLUE 之于 NLP 同构——一个被广泛采用的固定基准既加速进展(提供统一尺子)又诱发过拟合(榜单饱和后脱离真实分布);也对应计量经济学里\"用历史数据评估反事实策略\"的不可实验困境,呼应 [离线 RL 范式](paradigm_offline_rl.md) 的核心张力。" + }, + "paper:planet": { + "assumption": "环境的可控动力学可被一个低维潜空间模型(RSSM:确定性记忆 $s_t$ + 随机隐变量 $z_t$)以足够保真度学到,使得完全在潜空间里展开的多步预测,其累积奖励估计与真实环境足够相关——从而 CEM 规划可以全程不解码回像素、不碰真实环境。", + "failure": "潜动力学在数据稀疏区失真,CEM 规划会专门挑到模型高估回报的动作序列(model exploitation),在想象里很强、上车就崩。去掉 RSSM 的确定性骨干会让长程预测迅速发散,去掉随机分量又无法建模环境噪声;像素重建损失还会把模型容量耗在视觉细节而非任务相关动力学上。", + "experiment": "用 PlaNet 在像素观测任务上展开 $H=1\\ldots 50$ 步,画\"想象累积奖励\"与\"真实累积奖励\"的相关系数随 $H$ 的衰减曲线,定位相关性跌破 0.5 的临界 $H^\\star$;再对比保留/移除 latent overshooting 时 $H^\\star$ 的变化。可证伪点:若 $H^\\star$ 与最终规划性能无关,则\"长程预测保真度主导规划质量\"不成立。", + "isomorphism": "等同于模型预测控制(MPC)在学得动力学上滚动优化、卡尔曼滤波维持内部状态做前瞻、以及 [MuZero](paper_muzero.md) 在隐空间做树搜索——都是\"用一个可查询的环境替身把昂贵真实试错搬进廉价内部推演\",是 [世界模型让规划在想象中进行](insight_world_models_let_planning_be_done_in_imagination.md) 的潜空间-规划实例。" + } +} diff --git a/docs/data/research_lens.json b/docs/data/research_lens.json index 5194a8e..e1122ce 100644 --- a/docs/data/research_lens.json +++ b/docs/data/research_lens.json @@ -94,5 +94,119 @@ "failure": "强依赖一个完美、廉价、可回溯的环境模型——这正是驾驶不具备的([MuZero](paper_muzero.md) 改为学模型来松绑)。自博弈要求对称零和或可自我对弈的结构;搜索成本随分支因子爆炸,实时控制预算下难以照搬。", "experiment": "复刻 AlphaZero 自博弈循环(本仓库 [validation trace](validation_trace_alpha_zero_self_play_with_mcts_guided_policy.md)),系统削减每步 MCTS 模拟次数,测棋力随搜索预算的衰减,定位\"网络先验单独能达到的下界\"。可证伪点:若零搜索的纯网络先验已接近满搜索棋力,则\"测试时搜索是棋力主因\"被推翻。", "isomorphism": "是 [测试时算力替代训练时算力](insight_test_time_compute_substitutes_train_time_via_search.md) 的奠基范例,与大模型推理时的多次采样投票、规划里的 MPC 滚动优化同构;自蒸馏搜索结果也对应\"用慢而准的过程教快而糙的模型\"。" + }, + "paper:vit": { + "assumption": "图像可以被切成 patch 当作 token 序列,交给纯 Transformer 处理;只要数据量足够大,自注意力能学到卷积的局部性先验,无需把它硬编码进结构。相信\"规模 + 弱先验\"在大数据下胜过\"强先验 + 小数据\"。", + "failure": "在中小数据集(ImageNet-1k 从零训)上,缺少平移等变与局部性先验的 ViT 不如 CNN,必须靠 JFT 级预训练或强增广补偿;$\\mathcal{O}(n^2)$ 注意力也让高分辨率输入昂贵。归纳偏置不是没用,只是被推迟到\"数据买得起\"时再丢弃。", + "experiment": "固定算力,扫描预训练数据规模,画 ViT 与 [ResNet](paper_he2015_resnet.md) 的精度交叉点 $N^\\star$:小于 $N^\\star$ 时 CNN 先验占优,大于则 ViT 反超。可证伪点:若任意数据规模下 ViT 都不反超,则\"先验可被规模替代\"在视觉上不成立。", + "isomorphism": "与 [Transformer](paper_vaswani2017.md) 在语言上抛弃循环先验、[Bitter Lesson](essay_bitter_lesson.md) 中\"通用方法 + 算力\"反复胜出是同一脉络;patch 化也对应 [把图像 token 化](move_patchify_tokenization.md) 这一可复用动作。" + }, + "paper:carion2020": { + "assumption": "目标检测可以写成\"集合预测\":用一组可学习的 object query 直接输出无序框集合,配二分图匹配损失,从而删掉 NMS、anchor 这些手工后处理。相信端到端可微的集合损失能学出比启发式去重更好的归纳。", + "failure": "DETR 收敛极慢(数百 epoch),小目标精度差,根因是匈牙利匹配在训练早期不稳定、query 与目标的对应关系漂移;Deformable-DETR 等后续才缓解。把后处理塞进损失不是免费的——它把难度转移到了优化稳定性上。", + "experiment": "消融 query 数量与匹配策略:固定 backbone,测匹配从匈牙利换成固定先验分配时收敛速度与小目标 AP 的变化,定位\"集合损失\"真正的代价来源。可证伪点:若去掉二分匹配后精度不降反升,则集合预测的必要性被削弱。", + "isomorphism": "object query 与 [BEVFormer](paper_li2022bevformer.md)、[UniAD](paper_2212.10156_uniad.md) 的查询总线、[把检测当集合预测](move_treat_detection_as_set_prediction_with_learnable_queries.md) 同源;集合损失也对应排序学习、点云配准里的可微匹配。" + }, + "paper:li2022bevformer": { + "assumption": "多相机特征可以通过一组 BEV query 用时空注意力\"拉\"到统一的鸟瞰平面,时序上聚合历史 BEV 即可补足单帧缺失的速度与遮挡信息。相信显式的 BEV 中间表示比各相机各自为政更利于下游规划。", + "failure": "BEV 投影依赖准确的相机外参与自车位姿,标定漂移会让特征\"投歪\";远处与小目标在 BEV 网格上分辨率不足;时序注意力对动态物体的运动补偿有限。统一表示的代价是对几何精度的强依赖。", + "experiment": "注入可控的外参扰动,测 BEV 检测/分割精度随标定误差的衰减曲线,并与 [LSS](paper_lift_splat_shoot.md) 的深度分布投影对比鲁棒性。可证伪点:若精度对标定扰动不敏感,则\"BEV 强依赖几何\"的判断需修正。", + "isomorphism": "BEV 作为规划友好的中间层,是 [洞察:BEV 是规划友好的中间表示](insight_bev_is_planning_friendly_intermediate.md) 的具现;与 [占据栅格统一动静态场景](insight_occupancy_unifies_static_and_dynamic_scene.md)、SLAM 的全局地图同属\"投影到一个利于决策的公共坐标系\"。" + }, + "paper:sac": { + "assumption": "最优策略应在最大化回报的同时最大化熵 $J(\\pi)=\\sum_t \\mathbb{E}[r_t+\\alpha\\mathcal{H}(\\pi(\\cdot\\mid s_t))]$,熵项既鼓励探索又让策略对模型误差更鲁棒。相信\"最大熵 + off-policy\"能兼得样本效率与稳定。", + "failure": "温度 $\\alpha$ 决定探索-利用平衡,固定值难调(自动调 $\\alpha$ 才实用);连续高维动作下双 Q 仍有残余高估;奖励尺度变化会让熵项相对权重漂移。最大熵的好处在确定性最优任务上反而稀释。", + "experiment": "扫描温度 $\\alpha$(含自动调节),画\"探索充分度-最终回报\"曲线,并测把熵项置零(退化为 DDPG 式)后的崩溃频率。可证伪点:若熵项对最终性能与稳定性都无显著影响,则最大熵框架在该任务上是冗余的。", + "isomorphism": "最大熵 RL 与统计力学的玻尔兹曼分布、概率推断里的变分自由能、[洞察:最大熵打通策略-价值对偶](insight_max_entropy_closes_policy_value_duality.md) 同构——都把\"带温度的最优化\"写成同一个软极大形式。" + }, + "paper:muzero": { + "assumption": "不需要真实环境模型,只需学一个\"对预测价值/策略/奖励够用\"的隐空间模型,就能在隐空间里做 MCTS。相信模型只要在决策相关量上自洽,不必重建观测的每个像素。", + "failure": "隐模型只对训练分布内的展开可信,长程或分布外展开会累积误差;学模型 + 搜索 + 表征三者联合训练对超参与算力极敏感。\"够用即可\"的隐模型在需要精确长程预测的安全场景里可能不够。", + "experiment": "对比 MuZero 的隐模型与 [Dreamer](paper_dreamer_v3.md) 的重建式世界模型在相同算力下的样本效率与长程展开误差,定位\"重建观测\"是否必要。可证伪点:若重建式模型在所有指标上都不优,则\"价值等价模型足矣\"得到强支持。", + "isomorphism": "把 [AlphaZero](paper_silver2017_alphazero.md) 的完美模型替换为学得模型,是\"价值等价模型\"思想的代表;与卡尔曼滤波只维护决策所需的充分统计量、强化学习里的双模拟度量(bisimulation)同构。" + }, + "insight:bev_is_planning_friendly_intermediate": { + "assumption": "把多传感器信息统一投影到自车中心的鸟瞰平面,能得到一个尺度一致、利于度量距离与碰撞的表示;下游规划在该平面上做几何推理比在透视图里更自然。相信存在一个\"对规划友好\"的中间坐标系值得显式构造。", + "failure": "BEV 丢失了高度信息(对高架、地形、悬空障碍不友好),远处分辨率稀疏;它假设地面近似平面,在坡道/复杂三维结构上失真。\"友好\"是相对规划而言,对感知细节是有损压缩。", + "experiment": "在含坡道与悬空障碍的场景里,对比 BEV 规划与保留高度的 [占据栅格](insight_occupancy_unifies_static_and_dynamic_scene.md) 规划的碰撞率,量化\"压扁高度\"的代价。可证伪点:若两者碰撞率无差异,则高度信息对规划冗余。", + "isomorphism": "与机器人里的代价地图(costmap)、SLAM 的占据栅格、GIS 的正射投影同构——都是\"为某类决策选一个最省力的公共坐标系\",[BEVFormer](paper_li2022bevformer.md)、[LSS](paper_lift_splat_shoot.md) 是其学习化实现。" + }, + "paradigm:foundation_model_zero_shot_driving_agent": { + "assumption": "在海量网络数据上预训练的视觉-语言基础模型,已隐含足够的常识与场景理解,可零样本或少样本迁移到驾驶决策,无需大规模驾驶专用标注。相信通用先验能覆盖驾驶长尾。", + "failure": "基础模型的常识是\"陈述性\"的,未必对应可执行的精确控制;它在数值空间推理(距离、速度、时序)上弱,且会自信地编造(hallucination);推理延迟与实时控制预算冲突。零样本的代价是不可控的可靠性。", + "experiment": "构造驾驶专用的反事实探针(轻微改动场景看决策是否一致),测基础模型零样本决策对场景扰动的稳定性,与少样本微调对比。可证伪点:若零样本决策已足够稳定且可执行,则驾驶专用对齐数据的必要性被削弱。", + "isomorphism": "与 [RT-2](paper_rt2.md)、[VLA 范式](paradigm_vla_paradigm.md) 把通才基座迁移到具身控制同源;也对应 NLP 里\"预训练-提示\"取代任务专用模型、[洞察:基础特征免微调迁移](insight_foundation_features_transfer_without_finetune.md)。" + }, + "paper:alvinn": { + "assumption": "人类驾驶演示 $\\mathcal{D}_\\text{expert}$ 覆盖了部署时会遇到的状态分布,于是把每帧 $(I,a)$ 当独立样本做监督回归 $\\min_\\theta\\mathbb{E}\\,\\|f_\\theta(I)-a\\|^2$ 就足以学会开车——感知与决策可以被一个端到端函数一次性吞掉,无需车道线检测等手工模块。", + "failure": "纯演示几乎只含正常行驶,模型从没见过\"已偏离车道该如何回正\",一旦轻微出错就进入演示未覆盖区、误差以 $\\mathcal{O}(\\varepsilon T^2)$ 复合发散(见 [DAgger](paper_ross2011_dagger.md))。Pomerleau 的补救是用几何变换合成偏移位姿样本——这正暴露了\"端到端模仿不会自带纠偏\"这一原则性缺口。", + "experiment": "在闭环里人为把车摆到偏离中心 $d$ 米处,测纯演示训练的网络与\"加合成偏移样本\"训练的网络拉回成功率随 $d$ 的曲线。可证伪点:若不加合成样本恢复率也不随 $d$ 急剧下降,则\"演示覆盖不足是主要失效因\"不成立。", + "isomorphism": "与控制论里开环 vs 闭环稳定性、语言模型自回归的暴露偏差、数值积分误差随步数累积同构;其\"合成偏移样本\"则是 [DAgger](paper_ross2011_dagger.md) 数据聚合的早期手工版,也预演了后来的影子模式数据飞轮。" + }, + "paper:lbc": { + "assumption": "决策与感知是难度悬殊的两个问题:在仿真器特权真值 $\\phi(s)$ 上学决策几乎免费且近乎完美,于是先训出\"作弊\"教师 $\\pi^\\text{priv}$,再把它当作可在任意状态、任意高层指令下查询的标注器,让像素学生 $\\pi^\\text{sens}$ 在自己访问的分布 $d_{\\pi^\\text{sens}}$ 上蒸馏——等价于一次取之不尽的在线 [DAgger](paper_ross2011_dagger.md)。", + "failure": "教师的能力上限钉死学生:任何漏进特权输入的感知缺陷、或教师未覆盖的长尾交规,学生都无从学起。方法还强依赖\"能提供真值的仿真器\",真实世界没有这种上帝视角信息源,迁移到路测只能用离线日志+高精地图近似教师,红利大打折扣。", + "experiment": "消融蒸馏的 on-policy 成分:对比\"在学生 rollout 状态上向教师索标注\"与\"仅用教师离线轨迹离线模仿\",画 CARLA 闭环 driving score 差距。可证伪点:若两者闭环分接近,则 LBC 优势并非来自\"在场教师\"的无限 DAgger,而仅是更好的离线监督。", + "isomorphism": "与知识蒸馏\"大教师→小学生\"、特权信息学习(learning using privileged information, LUPI)、以及 AlphaZero 用搜索结果回标网络同构——核心都是\"先用一个昂贵/特权的过程造出高质量监督,再蒸馏进受限的部署模型\"。" + }, + "paper:saycan": { + "assumption": "语言知识与物理可行性是两类必须分别获得再融合的信号:LLM 给\"语义有用性\" $p_\\text{LLM}(\\ell_a\\mid i,h)$,价值函数给\"当前状态成功概率\" $V_a^\\pi(s)$,两者相乘 $a^\\star=\\arg\\max_a p_\\text{LLM}\\cdot V_a^\\pi$ 即能在\"既相关又可行\"的动作上做出接地的选择,无需微调 LLM。", + "failure": "技能库覆盖即能力上限——库里没有的动作再聪明的 LLM 也唤不出。逐步乘积是贪心分解,缺长程回溯,复杂任务会走进死胡同;而 $V_a^\\pi(s)$ 一旦高估就会选到其实做不到的危险动作(驾驶里等于\"以为来得及变道\"),安全攸关时价值校准比语言更关键。", + "experiment": "把价值函数 $V_a^\\pi(s)$ 系统性加噪/偏置,测任务成功率与\"选到不可行动作\"的频率随价值误差的曲线,并与只用 LLM 打分(去掉 can 项)对照。可证伪点:若去掉 can 项成功率几乎不变,则\"可供性必须被显式估计\"这一论断对该任务不成立。", + "isomorphism": "与 [Codex](paper_codex.md) 的 code-as-action 是 grounding 的两条路(可供性价值 vs 可执行可验证代码);其\"提议×可行性\"结构同构于 MPC 在可行域内优化、束搜索的\"分数×约束\"剪枝、以及强化学习里 actor 提议、critic 把关的分工。" + }, + "paper:gail": { + "assumption": "承重假设:模仿等价于占用度量匹配,即只要让策略与专家的状态-动作分布 $\\rho_\\pi=\\rho_{\\pi_E}$ 在 Jensen-Shannon 散度意义下重合,行为就等价;且能负担大量 on-policy 环境交互来让判别器在策略自身访问的分布 $d_\\pi$ 上打分。", + "failure": "失效边界:当环境交互昂贵或只有离线日志时无法做 on-policy rollout,框架失效;判别器一旦过强会使 reward 信号 $-\\log D$ 饱和、策略梯度方差爆炸;若状态表示含可作弊的捷径特征(时间戳、id),$D$ 学到伪差异而非真实行为差异,对抗优化退化。", + "experiment": "可证伪的下一步:在固定交互预算下,把 GAIL 与同等数据下的 [DAgger](paper_ross2011_dagger.md) 和 BC 在 CARLA 闭环里做样本效率对照——预测当专家可被随时查询时 DAgger 的样本效率应优于 GAIL;若 GAIL 在低交互预算下反超,则'分布匹配优于数据聚合'的假设被推翻。", + "isomorphism": "同构迁移:与 GAN 的生成器-判别器博弈、统计物理里用密度比估计两个分布的差异、以及 [AIRL](paper_airl.md) 把判别器结构化恢复 reward,是同一密度比/占用匹配结构在不同领域的具现。" + }, + "paper:ziebart_max_ent_irl": { + "assumption": "承重假设:专家行为服从 Boltzmann 分布 $P(\\tau)\\propto\\exp(\\theta^\\top \\mathbf{f}_\\tau)$,即高 reward 轨迹概率高但不独占;reward 在手写线性特征 $\\mathbf{f}(s,a)$ 张成的空间里可表示;动力学可枚举/确定,配分函数与状态访问频率可用 forward-backward 精确算出。", + "failure": "失效边界:状态空间大或随机动力学下配分函数 $Z(\\theta)$ 无法精确求,必须采样近似而引入噪声;reward 真实结构若超出线性特征基的张成范围则永远学不到;专家若严重偏离软最优(如新手或对抗驾驶),Boltzmann 模型设定错误,反推的 reward 随之失真。", + "experiment": "可证伪的下一步:在行人轨迹预测上,用最大熵 IRL 输出的多模态可达路径分布与一个纯 [GAIL](paper_gail.md) 判别器的输出对比覆盖率与校准度;可证伪预测是——当特征基覆盖目的地语义时 MaxEnt 的多模态校准应不劣于对抗方法,若系统性更差则说明手写特征是瓶颈。", + "isomorphism": "同构迁移:同一最大熵/配分函数结构出现在统计物理的自由能 $-T\\log Z$、概率图模型的对数配分函数、以及最大熵 forward RL([SAC](paper_sac.md) 的 soft 价值 $V_{\\mathrm{soft}}=\\alpha\\log\\sum_a e^{Q/\\alpha}$)——正逆两向共享同一对偶。" + }, + "paper:ross_bagnell_2010": { + "assumption": "承重假设:模仿可归约为一串监督学习子问题,单步犯错率 $\\epsilon$ 有界且可估计;存在可在任意状态被查询的交互式专家,能在策略实际到达的分布上提供标注;horizon $T$ 有限。其核心界 $J(\\hat\\pi)\\le J(\\pi^*)+O(\\epsilon T^2)$ 成立依赖训练分布 $d_{\\pi^*}$ 与部署分布 $d_{\\hat\\pi}$ 的总变差随时间累积这一机制。", + "failure": "失效边界:当专家不可交互查询(只有静态日志)时 forward training 无法构造分布正确的训练集;$T$ 很大时为每步训独立策略不可行(这正是 [DAgger](paper_ross2011_dagger.md) 用单策略+聚合取代它的原因);若单步误差 $\\epsilon$ 本身随状态强烈异质,统一上界过松,二次/线性的区分不再准确刻画真实复合行为。", + "experiment": "可证伪的下一步:在可控仿真里系统改变 horizon $T$,测量 BC 与 DAgger 的闭环失败率随 $T$ 的标度——可证伪预测是 BC 失败率应近似 $T^2$、DAgger 近似 $T$;若 BC 也呈线性标度,则二次复合机制(或其前提)被推翻。", + "isomorphism": "同构迁移:$O(\\epsilon T^2)$ 的分布偏移复合与 [策略改进被分布偏移上界锁死](insight_policy_improvement_bounded_by_distribution_shift.md) 的内核一致,同样的'用旧分布评估新分布'结构出现在 [TRPO](paper_schulman2015_trpo.md) 的信赖域、离线 RL 的支撑约束、以及监督学习的协变量偏移泛化界中。" + }, + "paper:pointpillars": { + "assumption": "自动驾驶里的目标几乎都贴地,$z$ 方向分布很窄,因此把整列高度压成一个柱(pillar)几乎不损失可分性,可以用便宜的 2D 卷积彻底替代 3D 卷积。它还默认柱内固定采样 $N$ 个点足以代表该柱,超出的点可随机丢弃。", + "failure": "在多层立体结构(立交桥下的车、货架式场景、悬空横杆)里高度信息恰恰是判别关键,压扁 $z$ 会把上下不同物体混进同一柱而无法区分。柱内固定采样 $N$ 在近处高密度区会丢点,导致大目标边界与小目标召回受损;与保留体素的 [VoxelNet](paper_voxelnet.md) 相比,这正是它牺牲的部分。", + "experiment": "按高度分层构造测试集:把目标按其上方是否存在另一物体分成单层 vs 多层两组,对比 [PointPillars](paper_pointpillars.md) 与保留 $z$ 维体素的 [VoxelNet](paper_voxelnet.md) 在两组上的 AP 差。可证伪预测:两者在单层组接近、在多层组 PointPillars 显著落后;若多层组也无明显差距,则压扁 $z$ 的代价被高估。", + "isomorphism": "与 [Lift-Splat-Shoot](paper_lift_splat_shoot.md) 把相机特征压成 BEV、占用栅格沿高度边缘化、乃至把图像投影成俯视图都是同一招——为换取规则的 2D 计算与 planning 友好的 BEV 表示而牺牲一个维度的细节,区别只在压扁的是相机射线还是激光点。" + }, + "paper:krizhevsky2012": { + "assumption": "在足够大的标注数据(ImageNet)与足够强的算力(GPU)下,一个端到端学习层级特征的深度卷积网络,会全面优于任何手工设计的特征流水线(SIFT/HOG + SVM)。即瓶颈是数据与算力,不是人类对特征的巧思。", + "failure": "当标注数据稀缺或下游分布与 ImageNet 差异巨大时,纯监督预训练的迁移会退化;它对大量人工标签的依赖本身就是瓶颈,这正是后来 [BYOL](paper_byol.md)、[VICReg](paper_vicreg.md) 等自监督要摆脱的。其具体设计(LRN、双 GPU 拆分)也是时代权宜,盲目照搬会引入无谓约束。", + "experiment": "固定架构,沿两个轴做消融:(1) 把训练标签量按 $\\{1\\%, 10\\%, 100\\%\\}$ 抽取,(2) 把算力/训练步数按档位缩放,画测试精度对数据量与算力的响应面。可证伪预测:精度随数据与算力近似单调上升且未饱和;若在小数据档手工特征反超,则\"瓶颈是数据算力而非特征工程\"在该 regime 不成立。", + "isomorphism": "与 [BERT](paper_bert.md) 在语言、[AlphaGo](essay_bitter_lesson.md) 在搜索里的胜利同型——都是\"通用、可随算力扩展的学习方法\"取代\"精巧但难扩展的人类设计\",是 bitter lesson 在不同模态上的同一剧本,区别只在被取代的手工组件是 SIFT、语言学规则还是开局棋谱。" + }, + "paper:bert": { + "assumption": "一个极通用、无需人工标签的自监督目标——遮住约 $15\\%$ 的 token 让模型用双向上下文填回——就能从海量文本中学出可迁移的通用语义表示,且这个目标随数据与算力放大持续受益。下游任务只需在预训练表示上加轻量头微调。", + "failure": "`[MASK]` 只在预训练出现、微调与推理不出现,造成天然分布失配,必须用 $80/10/10$ 混合补偿,否则迁移受损。掩码目标偏向理解/抽取,直接做生成需改造;当下游语料与预训练语料严重失配(专业领域、低资源语言)时表示迁移也会退化。", + "experiment": "在固定算力下对比掩码比例 $\\{5\\%, 15\\%, 40\\%, 80\\%\\}$ 与训练数据量档位对下游 GLUE 类任务的影响,并消融 NSP 任务。可证伪预测:存在一个最优掩码比例区间,且加大数据与训练步数(去掉弱信号的 NSP)比调目标更有效(RoBERTa 现象);若性能对数据规模不敏感,则\"可规模化的简单目标\"论断被削弱。", + "isomorphism": "与视觉的 MAE/BEiT 遮像素块、[VICReg](paper_vicreg.md)/[BYOL](paper_byol.md) 从无标签图像学表示是同一主线——用一个不需人工标签、能吃下任意规模数据的自监督目标学通用表示,区别只在被预测/被对齐的是被遮 token、被遮像素,还是同图的另一增广视图。" + }, + "paper:schulman2016_gae": { + "assumption": "数据严格 on-policy(来自当前策略),且学到的价值函数 $V$ 在策略实际访问的状态上偏差有界,从而 TD 残差 $\\delta_t = r_t + \\gamma V(s_{t+1}) - V(s_t)$ 是一个低方差但只有小偏差的优势近似。隐含相信 $\\lambda$ 这一个标量足以在 bias/variance 谱上找到一个对所有任务都接近最优的折中点。", + "failure": "当 $V$ 严重欠拟合(训练早期、或非平稳奖励)时,小 $\\lambda$ 会把价值函数的偏差直接灌进梯度,策略学歪;而长 horizon 任务取大 $\\lambda$ 又让方差随步数爆炸。一旦数据不再 on-policy(误塞进 replay buffer 复用旧 rollout),GAE 的无偏性前提崩塌,整条优势估计失真。", + "experiment": "固定 [PPO](paper_schulman2017_ppo.md) 其余超参,在一个长 horizon 连续控制任务上扫 $\\lambda \\in \\{0, 0.9, 0.95, 0.97, 0.99, 1.0\\}$,画最终回报与样本效率随 $\\lambda$ 的 U 形曲线,并叠加价值函数解释方差(explained variance)。可证伪预测:存在一个中间 $\\lambda^\\star$ 严格优于两端;若 $\\lambda=1$(纯蒙特卡洛)始终最优,则\"价值自举降方差有净收益\"的核心论断被推翻。", + "isomorphism": "与 TD($\\lambda$) 的 eligibility trace 是同一递推($\\lambda$ 即\"信任 bootstrap 多深\");也对应时间序列预测里 multi-step vs one-step forecast 的偏差-方差权衡、以及数值微分中步长选择对截断误差与舍入误差的折中——都是\"用一个尺度参数在两类误差间滑动\"。" + }, + "paper:d4rl": { + "assumption": "用\"数据生成策略的质量谱\"(random / medium / expert / medium-replay / medium-expert)构造的难度梯度,能代表真实离线 RL 部署中会遇到的分布困难;且把任务回报线性归一化到 $[0,100]$(random≈0、expert≈100)后,跨任务平均分是一个有意义、可比的进展标量。", + "failure": "固定基准被长期刷榜后,算法会过拟合到 D4RL 特定数据集的人为结构(尤其 AntMaze 的稀疏奖励与 stitching 布局),归一化分上涨却不蕴含真实长尾分布上的泛化。AntMaze 的 done/奖励偏移处理、以及 D4RL 跨版本的数据差异,还会让\"同一张表里的数字\"其实不可比。", + "experiment": "取在 D4RL 上排名靠前的若干离线算法(如 [CQL](paper_cql.md)、[IQL](paper_iql.md)、[Decision Transformer](paper_decision_transformer.md)),迁移到一个分布结构不同的离线任务集(如真实驾驶日志重采样),比较 D4RL 排名与新基准排名的秩相关。可证伪点:若两套排名高度一致,则\"过拟合基准结构\"的担忧被削弱;若秩相关接近零,则\"D4RL 分数预测迁移性能\"被推翻。", + "isomorphism": "与 ImageNet 之于视觉、GLUE 之于 NLP 同构——一个被广泛采用的固定基准既加速进展(提供统一尺子)又诱发过拟合(榜单饱和后脱离真实分布);也对应计量经济学里\"用历史数据评估反事实策略\"的不可实验困境,呼应 [离线 RL 范式](paradigm_offline_rl.md) 的核心张力。" + }, + "paper:planet": { + "assumption": "环境的可控动力学可被一个低维潜空间模型(RSSM:确定性记忆 $s_t$ + 随机隐变量 $z_t$)以足够保真度学到,使得完全在潜空间里展开的多步预测,其累积奖励估计与真实环境足够相关——从而 CEM 规划可以全程不解码回像素、不碰真实环境。", + "failure": "潜动力学在数据稀疏区失真,CEM 规划会专门挑到模型高估回报的动作序列(model exploitation),在想象里很强、上车就崩。去掉 RSSM 的确定性骨干会让长程预测迅速发散,去掉随机分量又无法建模环境噪声;像素重建损失还会把模型容量耗在视觉细节而非任务相关动力学上。", + "experiment": "用 PlaNet 在像素观测任务上展开 $H=1\\ldots 50$ 步,画\"想象累积奖励\"与\"真实累积奖励\"的相关系数随 $H$ 的衰减曲线,定位相关性跌破 0.5 的临界 $H^\\star$;再对比保留/移除 latent overshooting 时 $H^\\star$ 的变化。可证伪点:若 $H^\\star$ 与最终规划性能无关,则\"长程预测保真度主导规划质量\"不成立。", + "isomorphism": "等同于模型预测控制(MPC)在学得动力学上滚动优化、卡尔曼滤波维持内部状态做前瞻、以及 [MuZero](paper_muzero.md) 在隐空间做树搜索——都是\"用一个可查询的环境替身把昂贵真实试错搬进廉价内部推演\",是 [世界模型让规划在想象中进行](insight_world_models_let_planning_be_done_in_imagination.md) 的潜空间-规划实例。" } } diff --git a/tools/.link_baseline.json b/tools/.link_baseline.json index c29bb4b..17b2f41 100644 --- a/tools/.link_baseline.json +++ b/tools/.link_baseline.json @@ -1,4 +1,4 @@ { - "max_inert": 45, - "max_orphans": 4 + "max_inert": 0, + "max_orphans": 0 } diff --git a/tools/build_research_lens.py b/tools/build_research_lens.py index fe99497..fccf1d4 100644 --- a/tools/build_research_lens.py +++ b/tools/build_research_lens.py @@ -133,12 +133,80 @@ "isomorphism": r"""是 [测试时算力替代训练时算力](insight_test_time_compute_substitutes_train_time_via_search.md) 的奠基范例,与大模型推理时的多次采样投票、规划里的 MPC 滚动优化同构;自蒸馏搜索结果也对应"用慢而准的过程教快而糙的模型"。""", }, +"paper:vit": { + "assumption": r"""图像可以被切成 patch 当作 token 序列,交给纯 Transformer 处理;只要数据量足够大,自注意力能学到卷积的局部性先验,无需把它硬编码进结构。相信"规模 + 弱先验"在大数据下胜过"强先验 + 小数据"。""", + "failure": r"""在中小数据集(ImageNet-1k 从零训)上,缺少平移等变与局部性先验的 ViT 不如 CNN,必须靠 JFT 级预训练或强增广补偿;$\mathcal{O}(n^2)$ 注意力也让高分辨率输入昂贵。归纳偏置不是没用,只是被推迟到"数据买得起"时再丢弃。""", + "experiment": r"""固定算力,扫描预训练数据规模,画 ViT 与 [ResNet](paper_he2015_resnet.md) 的精度交叉点 $N^\star$:小于 $N^\star$ 时 CNN 先验占优,大于则 ViT 反超。可证伪点:若任意数据规模下 ViT 都不反超,则"先验可被规模替代"在视觉上不成立。""", + "isomorphism": r"""与 [Transformer](paper_vaswani2017.md) 在语言上抛弃循环先验、[Bitter Lesson](essay_bitter_lesson.md) 中"通用方法 + 算力"反复胜出是同一脉络;patch 化也对应 [把图像 token 化](move_patchify_tokenization.md) 这一可复用动作。""", +}, + +"paper:carion2020": { + "assumption": r"""目标检测可以写成"集合预测":用一组可学习的 object query 直接输出无序框集合,配二分图匹配损失,从而删掉 NMS、anchor 这些手工后处理。相信端到端可微的集合损失能学出比启发式去重更好的归纳。""", + "failure": r"""DETR 收敛极慢(数百 epoch),小目标精度差,根因是匈牙利匹配在训练早期不稳定、query 与目标的对应关系漂移;Deformable-DETR 等后续才缓解。把后处理塞进损失不是免费的——它把难度转移到了优化稳定性上。""", + "experiment": r"""消融 query 数量与匹配策略:固定 backbone,测匹配从匈牙利换成固定先验分配时收敛速度与小目标 AP 的变化,定位"集合损失"真正的代价来源。可证伪点:若去掉二分匹配后精度不降反升,则集合预测的必要性被削弱。""", + "isomorphism": r"""object query 与 [BEVFormer](paper_li2022bevformer.md)、[UniAD](paper_2212.10156_uniad.md) 的查询总线、[把检测当集合预测](move_treat_detection_as_set_prediction_with_learnable_queries.md) 同源;集合损失也对应排序学习、点云配准里的可微匹配。""", +}, + +"paper:li2022bevformer": { + "assumption": r"""多相机特征可以通过一组 BEV query 用时空注意力"拉"到统一的鸟瞰平面,时序上聚合历史 BEV 即可补足单帧缺失的速度与遮挡信息。相信显式的 BEV 中间表示比各相机各自为政更利于下游规划。""", + "failure": r"""BEV 投影依赖准确的相机外参与自车位姿,标定漂移会让特征"投歪";远处与小目标在 BEV 网格上分辨率不足;时序注意力对动态物体的运动补偿有限。统一表示的代价是对几何精度的强依赖。""", + "experiment": r"""注入可控的外参扰动,测 BEV 检测/分割精度随标定误差的衰减曲线,并与 [LSS](paper_lift_splat_shoot.md) 的深度分布投影对比鲁棒性。可证伪点:若精度对标定扰动不敏感,则"BEV 强依赖几何"的判断需修正。""", + "isomorphism": r"""BEV 作为规划友好的中间层,是 [洞察:BEV 是规划友好的中间表示](insight_bev_is_planning_friendly_intermediate.md) 的具现;与 [占据栅格统一动静态场景](insight_occupancy_unifies_static_and_dynamic_scene.md)、SLAM 的全局地图同属"投影到一个利于决策的公共坐标系"。""", +}, + +"paper:sac": { + "assumption": r"""最优策略应在最大化回报的同时最大化熵 $J(\pi)=\sum_t \mathbb{E}[r_t+\alpha\mathcal{H}(\pi(\cdot\mid s_t))]$,熵项既鼓励探索又让策略对模型误差更鲁棒。相信"最大熵 + off-policy"能兼得样本效率与稳定。""", + "failure": r"""温度 $\alpha$ 决定探索-利用平衡,固定值难调(自动调 $\alpha$ 才实用);连续高维动作下双 Q 仍有残余高估;奖励尺度变化会让熵项相对权重漂移。最大熵的好处在确定性最优任务上反而稀释。""", + "experiment": r"""扫描温度 $\alpha$(含自动调节),画"探索充分度-最终回报"曲线,并测把熵项置零(退化为 DDPG 式)后的崩溃频率。可证伪点:若熵项对最终性能与稳定性都无显著影响,则最大熵框架在该任务上是冗余的。""", + "isomorphism": r"""最大熵 RL 与统计力学的玻尔兹曼分布、概率推断里的变分自由能、[洞察:最大熵打通策略-价值对偶](insight_max_entropy_closes_policy_value_duality.md) 同构——都把"带温度的最优化"写成同一个软极大形式。""", +}, + +"paper:muzero": { + "assumption": r"""不需要真实环境模型,只需学一个"对预测价值/策略/奖励够用"的隐空间模型,就能在隐空间里做 MCTS。相信模型只要在决策相关量上自洽,不必重建观测的每个像素。""", + "failure": r"""隐模型只对训练分布内的展开可信,长程或分布外展开会累积误差;学模型 + 搜索 + 表征三者联合训练对超参与算力极敏感。"够用即可"的隐模型在需要精确长程预测的安全场景里可能不够。""", + "experiment": r"""对比 MuZero 的隐模型与 [Dreamer](paper_dreamer_v3.md) 的重建式世界模型在相同算力下的样本效率与长程展开误差,定位"重建观测"是否必要。可证伪点:若重建式模型在所有指标上都不优,则"价值等价模型足矣"得到强支持。""", + "isomorphism": r"""把 [AlphaZero](paper_silver2017_alphazero.md) 的完美模型替换为学得模型,是"价值等价模型"思想的代表;与卡尔曼滤波只维护决策所需的充分统计量、强化学习里的双模拟度量(bisimulation)同构。""", +}, + +"insight:bev_is_planning_friendly_intermediate": { + "assumption": r"""把多传感器信息统一投影到自车中心的鸟瞰平面,能得到一个尺度一致、利于度量距离与碰撞的表示;下游规划在该平面上做几何推理比在透视图里更自然。相信存在一个"对规划友好"的中间坐标系值得显式构造。""", + "failure": r"""BEV 丢失了高度信息(对高架、地形、悬空障碍不友好),远处分辨率稀疏;它假设地面近似平面,在坡道/复杂三维结构上失真。"友好"是相对规划而言,对感知细节是有损压缩。""", + "experiment": r"""在含坡道与悬空障碍的场景里,对比 BEV 规划与保留高度的 [占据栅格](insight_occupancy_unifies_static_and_dynamic_scene.md) 规划的碰撞率,量化"压扁高度"的代价。可证伪点:若两者碰撞率无差异,则高度信息对规划冗余。""", + "isomorphism": r"""与机器人里的代价地图(costmap)、SLAM 的占据栅格、GIS 的正射投影同构——都是"为某类决策选一个最省力的公共坐标系",[BEVFormer](paper_li2022bevformer.md)、[LSS](paper_lift_splat_shoot.md) 是其学习化实现。""", +}, + +"paradigm:foundation_model_zero_shot_driving_agent": { + "assumption": r"""在海量网络数据上预训练的视觉-语言基础模型,已隐含足够的常识与场景理解,可零样本或少样本迁移到驾驶决策,无需大规模驾驶专用标注。相信通用先验能覆盖驾驶长尾。""", + "failure": r"""基础模型的常识是"陈述性"的,未必对应可执行的精确控制;它在数值空间推理(距离、速度、时序)上弱,且会自信地编造(hallucination);推理延迟与实时控制预算冲突。零样本的代价是不可控的可靠性。""", + "experiment": r"""构造驾驶专用的反事实探针(轻微改动场景看决策是否一致),测基础模型零样本决策对场景扰动的稳定性,与少样本微调对比。可证伪点:若零样本决策已足够稳定且可执行,则驾驶专用对齐数据的必要性被削弱。""", + "isomorphism": r"""与 [RT-2](paper_rt2.md)、[VLA 范式](paradigm_vla_paradigm.md) 把通才基座迁移到具身控制同源;也对应 NLP 里"预训练-提示"取代任务专用模型、[洞察:基础特征免微调迁移](insight_foundation_features_transfer_without_finetune.md)。""", +}, + } +FRAGMENTS = OUT.parent / "lens_fragments" + + def main() -> int: - OUT.write_text(json.dumps(LENS, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") - print(f"wrote {OUT.relative_to(OUT.parents[2])}: {len(LENS)} research lenses") + merged = dict(LENS) + # Merge any per-domain fragment files (authored separately to avoid edit + # collisions). Flagship entries above win on conflict. + frag_count = 0 + if FRAGMENTS.exists(): + for f in sorted(FRAGMENTS.glob("*.json")): + try: + data = json.loads(f.read_text(encoding="utf-8")) + except Exception as ex: + print(f"skip {f.name}: {ex}") + continue + for nid, fields in data.items(): + if nid not in merged and isinstance(fields, dict): + merged[nid] = fields + frag_count += 1 + OUT.write_text(json.dumps(merged, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + print(f"wrote {OUT.relative_to(OUT.parents[2])}: {len(merged)} lenses " + f"({len(LENS)} flagship + {frag_count} from fragments)") return 0 From 0134211b41dbba52bd22bb7aac6ea9c85e2e02a9 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 04:00:40 +0000 Subject: [PATCH 6/8] Round 4.1: README reflects current state (research lens, accurate counts) README still advertised the removed 'dynamic insight' panel and stale counts (499 nodes / 1440 edges). Update to the real artifact: 541 nodes / 1568 edges, 0 dead internal links, ~270 hand-written cards, and the new research-lens layer (load-bearing assumption / failure boundary / falsifiable next experiment / cross-domain isomorphism) on core nodes, with honest graph-derived navigation replacing the old synthetic insight. --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 77a31d6..a1d25c8 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Atlas live site (graph + side panel)

-

点击图片直达"打开 UniAD 卡片"的可分享 permalink。当前图谱 499 节点 / 1440 条带类型边,其中 200+ 个 paradigm / insight / validation / move / problem / paper 节点附带手写深度卡片,每张卡片自动追加一段基于邻接的动态洞察。

+

点击图片直达"打开 UniAD 卡片"的可分享 permalink。当前图谱 541 节点 / 1568 条带类型边,内部交叉链接全部可跳转(0 条断链)。约 270 个 paradigm / insight / validation / move / problem / paper 节点附带手写深度卡片;其中的核心节点还各配一面「研究透镜」,逐条拆解它成立依赖的前提、失效的边界、一个可证伪的下一步实验,以及它在其它学科里的同型结构。

2x2 map of AD research paradigms @@ -28,7 +28,7 @@ 一个针对**已具备 ML/CV 基础的自动驾驶研究者 / 博士生**整理的、可交互的**学习地图**。它不是论文列表、不是综述、不是教程合集;它做三件事: 1. **画出研究范式的全景图。** 把 8 篇论文 + 3 个视频频道 + 2 门课 + 1 篇 Sutton 的短文(共 14 个 spine 节点),加上一组经典基石、平行路线与定位参考节点,在一张可交互的知识图谱里按 *拓扑 / 时间轴 / playbook* 三种视图组织起来。 -2. **每个节点配一张深度阅读卡片。** 每张卡片包含 *TL;DR · 数学锚点 · 架构直觉 · 工程实现 · 深度链接(PDF 页内锚 + 视频时间戳)· Bitter-Lesson 视角*,最大程度减少"再去 Google 一遍"的成本。 +2. **每个节点配一张深度阅读卡片,核心节点再加一面「研究透镜」。** 卡片包含 *TL;DR · 数学锚点 · 架构直觉 · 工程实现 · 深度链接(PDF 页内锚 + 视频时间戳)· Bitter-Lesson 视角*;研究透镜则进一步追问四件事——它成立依赖什么前提、在什么条件下会崩、可以用哪个可证伪的实验把它往前推或推翻、以及同一套结构在别的学科里出现在哪里。卡片底部不再堆砌泛泛的"洞察",只保留由关系网络直接给出的下一步线索(下游可读 / 上游组件 / 它正面回答的开放问题)。 3. **每篇主线论文配一份可跑通的 lab。** 11 个根目录 Jupyter 笔记本由 CI 用 `Mock` 后端跑一遍,全绿才算"全部跑通";涉及 LLM/VLM 的 lab 通过统一 `llm_provider.py` 抽象支持 OpenAI / Ollama / HF / Mock 四种后端。另有 3 份长程复现 demo 进驻 `labs///`:`rl_decision/lab_dqn_ppo_sac_cartpole`(DQN / PPO / SAC 同台对比 + 取消 target net 的发散 ablation)、`world_models/lab_dreamer_cartpole_pixels`(CartPole 像素观测下学世界模型再做 latent imagination)、`rl_decision/lab_cql_offline_minigrid`(CQL vs BC vs DQN,附 Q 值过估计可视化与 α 自调 ablation)。 > **风格定位**:客观中立、专业但克制、有明确启发。每张卡片只在 *Bitter-Lesson 视角* 一节明确表态。 @@ -93,11 +93,12 @@ Autonomous-Driving-Learning-Atlas/ ├── docs/ # GitHub-Pages 根目录(交互站点) │ ├── index.html · atlas3d.css │ ├── js/ # atlas-main · atlas-render · atlas-physics · -│ │ # atlas-cards (含 Mermaid 渲染 + 动态洞察) +│ │ # atlas-cards (Mermaid + 研究透镜 + 结构导航) · math-katex │ ├── vendor/ # KaTeX + auto-render · Mermaid · DOMPurify · marked · Three.js │ └── data/ │ ├── graph.json · schema.json -│ ├── graph_extended.json # 489 节点 / 1440 边 (含 paradigm/insight/validation/move/problem) +│ ├── graph_extended.json # 541 节点 / 1568 边 (含 paradigm/insight/validation/move/problem) +│ ├── research_lens.json # 核心节点的「研究透镜」(承重假设/失效边界/可证伪实验/同构迁移) │ ├── layout_positions.json # 由 tools/precompute_layout.py 预烤的稳定 3D 位置 │ ├── generated/ # 多维度生成轴(decision / foundation / methodology / perception / wave-E stubs) │ └── cards/ From 300b27579dce9f6558fa5696babc9d43a973cb6c Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 04:10:14 +0000 Subject: [PATCH 7/8] Round 5: apply adversarial-review fixes + expand lenses to 49 A read-only correctness reviewer audited the 31 new cards + lenses (equations all verified correct). Applied its findings: - ALVINN: corrected venue/year to NeurIPS 1988 (matching its own deep link), fixed the input-layer description (30x32 video retina + 8x32 range-finder retina + 1 road-intensity feedback unit -> 29 hidden -> 45 outputs), and the mislabeled '[NVIDIA PilotNet](paper_transfuser.md)' link (PilotNet != TransFuser). - GAE: removed the anachronism (original TRPO 2015 used single-path/vine, not GAE 2016) and scoped the claim. - Learning-by-Cheating: reframed CARLA Leaderboard as the later standardisation of the closed-loop eval lineage (LBC used the original CARLA/NoCrash protocol). - Multimodal-behavior insight: softened an invented-looking exact KL bound to an honest order-of-magnitude scaling relation. - Normalised 270 seed-card cross-links across 75 extended cards to use ../ so they resolve in GitHub's raw markdown view too (they already jumped correctly in-app). Research lenses expanded 35 -> 49: added the four new insight nodes plus AIRL, VoxelNet, CenterPoint, BYOL, VICReg, Bahdanau attention, preference learning, TCP, GameFormer, Codex. QC: 0 dead links, 0 meta-language, 0/231 lint findings, math test green, every lens link resolves. --- ...is_just_iterated_conditional_generation.md | 2 +- ...constraint_satisfaction_over_generation.md | 6 +- ...attention_is_typed_entity_communication.md | 10 +- ...t_bev_is_planning_friendly_intermediate.md | 6 +- ...n_is_the_only_ground_truth_for_planners.md | 2 +- ...e_rendering_is_universal_inverse_solver.md | 2 +- ...rries_reactive_and_deliberative_control.md | 4 +- ...system_handles_latency_quality_tradeoff.md | 2 +- ...able_beats_handcraft_when_signal_strong.md | 2 +- ...tion_features_transfer_without_finetune.md | 6 +- ...on_pretraining_decouples_data_from_task.md | 10 +- ..._cannot_recover_from_compounding_errors.md | 2 +- ...ht_in_context_learning_emerges_at_scale.md | 4 +- ...tail_solved_by_synthesis_not_data_alone.md | 6 +- ...rediction_yields_self_supervised_signal.md | 2 +- ...behavior_is_intrinsic_to_traffic_scenes.md | 2 +- ...multi_view_geometry_as_free_supervision.md | 4 +- ...upancy_unifies_static_and_dynamic_scene.md | 4 +- ...sidual_learning_unlocks_arbitrary_depth.md | 6 +- ...abilities_not_present_in_smaller_models.md | 6 +- ...aling_laws_predict_capability_emergence.md | 10 +- ...on_eliminates_postprocessing_heuristics.md | 10 +- ...ealism_is_lower_bound_on_training_value.md | 4 +- ...aggregation_buys_what_depth_sensor_buys.md | 4 +- ...ght_tokenization_collapses_modality_gap.md | 4 +- ..._planning_borrow_from_language_modeling.md | 4 +- ...e_model_into_environment_grounded_actor.md | 2 +- ...eo_diffusion_is_implicit_physics_engine.md | 6 +- ...els_let_planning_be_done_in_imagination.md | 6 +- .../data/cards/extended/move_lift_2d_to_3d.md | 4 +- docs/data/cards/extended/paper_alvinn.md | 14 +- docs/data/cards/extended/paper_chinchilla.md | 4 +- docs/data/cards/extended/paper_ddpm.md | 4 +- .../cards/extended/paper_depth_anything.md | 2 +- .../cards/extended/paper_he2015_resnet.md | 4 +- docs/data/cards/extended/paper_lbc.md | 2 +- docs/data/cards/extended/paper_llama.md | 4 +- docs/data/cards/extended/paper_qwen.md | 4 +- .../cards/extended/paper_schulman2016_gae.md | 2 +- .../data/cards/extended/paper_tesla_ai_day.md | 2 +- .../paradigm_camera_first_autonomy.md | 10 +- ...digm_counterfactual_data_centric_safety.md | 6 +- ...igm_differentiable_end_to_end_imitation.md | 10 +- .../paradigm_foundation_model_axis.md | 12 +- ...oundation_model_zero_shot_driving_agent.md | 6 +- .../extended/paradigm_imitation_learning.md | 4 +- .../extended/paradigm_llm_agent_paradigm.md | 2 +- ..._model_based_world_imagination_planning.md | 6 +- ...m_neural_scene_reconstruction_as_engine.md | 8 +- ...digm_scaling_data_with_self_supervision.md | 4 +- ..._simulator_first_synthetic_data_centric.md | 6 +- .../cards/extended/paradigm_vla_paradigm.md | 4 +- .../extended/paradigm_world_model_paradigm.md | 6 +- ...vior_cloning_compounds_errors_over_time.md | 4 +- ...lem_closed_loop_simulation_fidelity_gap.md | 6 +- ...blem_label_efficiency_for_3d_annotation.md | 2 +- ...ng_horizon_credit_assignment_in_driving.md | 4 +- ...on_reasoning_with_finite_context_window.md | 4 +- ...occlusion_reasoning_without_dense_lidar.md | 4 +- ...oes_not_predict_closed_loop_performance.md | 4 +- ...orld_corner_case_synthesis_for_training.md | 4 +- ...ning_horizon_vs_compute_budget_tradeoff.md | 4 +- ...ate_real_risk_but_are_under_represented.md | 2 +- ...w_transformer_with_temporal_aggregation.md | 12 +- ...tion_trace_counterfactual_vla_replanner.md | 8 +- ...former_offline_rl_via_sequence_modeling.md | 10 +- ...on_policy_as_score_based_action_sampler.md | 10 +- ...e_few_shot_in_context_learning_at_scale.md | 6 +- ...mage_transformer_via_patch_tokenization.md | 12 +- ...on_trace_llm_decision_agent_for_driving.md | 2 +- ...lar_perception_pipeline_with_bev_fusion.md | 14 +- ..._neural_field_for_dynamic_driving_scene.md | 12 +- ...calable_self_supervised_vision_backbone.md | 10 +- ...race_self_attention_replaces_recurrence.md | 6 +- ...race_set_prediction_with_object_queries.md | 10 +- ...e_unified_planning_oriented_e2e_driving.md | 6 +- ..._trace_vision_language_action_dual_loop.md | 6 +- ...vision_language_pretrained_dual_encoder.md | 10 +- ...trace_world_model_in_latent_imagination.md | 2 +- docs/data/generated/round4_driving.json | 209 ++++++++++++++---- docs/data/graph_extended.json | 2 +- docs/data/research_lens.json | 84 +++++++ tools/build_research_lens.py | 98 ++++++++ 83 files changed, 571 insertions(+), 264 deletions(-) diff --git a/docs/data/cards/extended/insight_agent_loop_is_just_iterated_conditional_generation.md b/docs/data/cards/extended/insight_agent_loop_is_just_iterated_conditional_generation.md index e709e87..0295f64 100644 --- a/docs/data/cards/extended/insight_agent_loop_is_just_iterated_conditional_generation.md +++ b/docs/data/cards/extended/insight_agent_loop_is_just_iterated_conditional_generation.md @@ -44,4 +44,4 @@ $$a_t \sim p_\theta(a_t \mid c_t),\quad c_{t+1} = \text{Aggregate}(c_t, a_t, o_{ ## 推演链路 -[`paper:gpt3`](paper_gpt3.md)(in-context 能力)→ [ReAct](paper_react.md)(thought-action-observation 协议)→ [Reflexion](paper_reflexion.md)(错误—反思—修正)→ [Toolformer](paper_toolformer.md) 与 [VOYAGER](paper_voyager.md)(工具与技能内化)→ [`paper:2311.10813_agent_driver`](paper_2311.10813_agent_driver.md) 与 [`paper:2309.16292_dilu`](paper_2309.16292_dilu.md)(驾驶 agent 的具体形态)→ [DriveVLM-Dual](paper_2402.12289_drivevlm.md)(agent 作为慢回路的工程落地)。 +[`paper:gpt3`](../paper_gpt3.md)(in-context 能力)→ [ReAct](paper_react.md)(thought-action-observation 协议)→ [Reflexion](paper_reflexion.md)(错误—反思—修正)→ [Toolformer](paper_toolformer.md) 与 [VOYAGER](paper_voyager.md)(工具与技能内化)→ [`paper:2311.10813_agent_driver`](paper_2311.10813_agent_driver.md) 与 [`paper:2309.16292_dilu`](paper_2309.16292_dilu.md)(驾驶 agent 的具体形态)→ [DriveVLM-Dual](paper_2402.12289_drivevlm.md)(agent 作为慢回路的工程落地)。 diff --git a/docs/data/cards/extended/insight_alignment_is_constraint_satisfaction_over_generation.md b/docs/data/cards/extended/insight_alignment_is_constraint_satisfaction_over_generation.md index 16e3903..3fe3d7c 100644 --- a/docs/data/cards/extended/insight_alignment_is_constraint_satisfaction_over_generation.md +++ b/docs/data/cards/extended/insight_alignment_is_constraint_satisfaction_over_generation.md @@ -19,14 +19,14 @@ $$\pi^\star(a\mid s)\ =\ \arg\min_\pi\ D_\text{KL}\!\big(\pi\,\|\,\pi_0\big)\ \ | 大语言模型 | pretrain LM | helpfulness + harmlessness | | 图像扩散 | unconditional 扩散 | classifier-free guidance / negative prompt | | 机器人策略 | BC pretrain | 安全约束、能耗约束 | -| 自动驾驶规划 | 模仿预训练 | 碰撞 / 舒适 / 合规 ([VADv2](paper_vadv2.md)) | +| 自动驾驶规划 | 模仿预训练 | 碰撞 / 舒适 / 合规 ([VADv2](../paper_vadv2.md)) | | 程序生成 | code LM | 单元测试 + 静态检查 | | 推荐系统 | 历史点击 | 多样性 / 公平性 | | 对抗鲁棒训练 | clean 模型 | 局部 Lipschitz 约束 | ## 这条洞察对自动驾驶的意义 -奖励工程在驾驶规划里向来是隐形泥潭——把"安全 + 进度 + 舒适"压成单一标量 cost 时,工程师必须手工调权重,而权重在新场景里几乎一定要再调。约束工程把这一难题拆开:碰撞、合规、舒适分别写成 $\mathcal C_k$,每个约束都有可监控的违反率作为诊断信号,对应的对偶变量 $\lambda_k$ 由优化器自适应推进。[VADv2](paper_vadv2.md) 在端到端轨迹规划上直接用偏好约束替代手工 cost,[`insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping`](insight_safety_emerges_from_constraint_lagrangian_not_reward_shaping.md) 把同一思路推广到通用 safe RL。 +奖励工程在驾驶规划里向来是隐形泥潭——把"安全 + 进度 + 舒适"压成单一标量 cost 时,工程师必须手工调权重,而权重在新场景里几乎一定要再调。约束工程把这一难题拆开:碰撞、合规、舒适分别写成 $\mathcal C_k$,每个约束都有可监控的违反率作为诊断信号,对应的对偶变量 $\lambda_k$ 由优化器自适应推进。[VADv2](../paper_vadv2.md) 在端到端轨迹规划上直接用偏好约束替代手工 cost,[`insight:safety_emerges_from_constraint_lagrangian_not_reward_shaping`](insight_safety_emerges_from_constraint_lagrangian_not_reward_shaping.md) 把同一思路推广到通用 safe RL。 闭式解 $\pi^\star \propto \pi_0 \exp(-\beta\,\mathcal C)$ 也意味着基模型可被多次复用:[`paradigm:imitation_learning`](paradigm_imitation_learning.md) 训出的轨迹分布可以同时被 RLHF 风格的偏好约束、DPO 风格的对比对约束、以及反事实约束([CF-VLA](paper_2512.24426_cfvla.md))独立加权。[`paradigm:vla_paradigm`](paradigm_vla_paradigm.md) 与 [`paradigm:knowledge_driven_reflective_agent`](paradigm_knowledge_driven_reflective_agent.md) 都共享这一基模型加约束的层叠结构。 @@ -46,4 +46,4 @@ $$\pi^\star(a\mid s)\ =\ \arg\min_\pi\ D_\text{KL}\!\big(\pi\,\|\,\pi_0\big)\ \ ## 推演链路 -Maximum Entropy IRL(Ziebart 等,2008)→ [InstructGPT](paper_instructgpt.md)(首次大规模 RLHF)→ [RLHF / DPO](paper_rlhf_dpo.md)(把对齐写成偏好对损失)→ [Constitutional AI](paper_constitutional_ai.md)(自我对齐)→ [VADv2](paper_vadv2.md) / [CF-VLA](paper_2512.24426_cfvla.md)(驾驶域的偏好对齐与反事实约束)→ [Cal-QL](paper_calql.md) / [CPO](paper_cpo_safe_rl.md)(约束 RL 的对偶解法)。 +Maximum Entropy IRL(Ziebart 等,2008)→ [InstructGPT](paper_instructgpt.md)(首次大规模 RLHF)→ [RLHF / DPO](paper_rlhf_dpo.md)(把对齐写成偏好对损失)→ [Constitutional AI](paper_constitutional_ai.md)(自我对齐)→ [VADv2](../paper_vadv2.md) / [CF-VLA](paper_2512.24426_cfvla.md)(驾驶域的偏好对齐与反事实约束)→ [Cal-QL](paper_calql.md) / [CPO](paper_cpo_safe_rl.md)(约束 RL 的对偶解法)。 diff --git a/docs/data/cards/extended/insight_attention_is_typed_entity_communication.md b/docs/data/cards/extended/insight_attention_is_typed_entity_communication.md index bc514de..f988c82 100644 --- a/docs/data/cards/extended/insight_attention_is_typed_entity_communication.md +++ b/docs/data/cards/extended/insight_attention_is_typed_entity_communication.md @@ -52,15 +52,15 @@ graph TB ## 这条洞察对自动驾驶研究的具体意义 -把研究对象抽象为一类 query,是注意力时代最便宜的接入方式。可行驶区域、对手意图、ego 未来若干秒、社会规范,每一类都可以分配一组可学习 query embedding,让它们去 cross-attend 主特征图——[UniAD](paper_2212.10156_uniad.md) 把跟踪、运动、规划全部表达为这种 query 串接,[DETR](paper_carion2020.md) 与 [BEVFormer](paper_li2022bevformer.md) 也是同源思路。这种方式比"为新对象设计新模块头"更稳定,因为接口是统一的注意力协议。 +把研究对象抽象为一类 query,是注意力时代最便宜的接入方式。可行驶区域、对手意图、ego 未来若干秒、社会规范,每一类都可以分配一组可学习 query embedding,让它们去 cross-attend 主特征图——[UniAD](paper_2212.10156_uniad.md) 把跟踪、运动、规划全部表达为这种 query 串接,[DETR](../paper_carion2020.md) 与 [BEVFormer](../paper_li2022bevformer.md) 也是同源思路。这种方式比"为新对象设计新模块头"更稳定,因为接口是统一的注意力协议。 -跨模态融合也由此摆脱了早期 / 晚期 concat 的二分。给每个模态(图像、LiDAR、自然语言、地图)分配自己的 token 集合,再让所有模态相互 cross-attend,相当于让模态共享同一张可微表征空间,[LLaVA](paper_llava.md) 与 [RT-2](paper_rt2.md) 都依赖这一结构把视觉与动作 token 拼到同一序列里。 +跨模态融合也由此摆脱了早期 / 晚期 concat 的二分。给每个模态(图像、LiDAR、自然语言、地图)分配自己的 token 集合,再让所有模态相互 cross-attend,相当于让模态共享同一张可微表征空间,[LLaVA](../paper_llava.md) 与 [RT-2](paper_rt2.md) 都依赖这一结构把视觉与动作 token 拼到同一序列里。 -规划层也可以套同一壳子——把每条候选轨迹看作一个 query,让它去 cross-attend 占据、地图、其他 agent,从而"问出"自己的风险评估与可行性分数;[VADv2](paper_vadv2.md) 的多模态轨迹打分就是这种 query-as-trajectory 的范例。一个附赠收益是注意力权重本身就是可观察元数据,无须额外解释头就能给出每个决策的归因热力图。 +规划层也可以套同一壳子——把每条候选轨迹看作一个 query,让它去 cross-attend 占据、地图、其他 agent,从而"问出"自己的风险评估与可行性分数;[VADv2](../paper_vadv2.md) 的多模态轨迹打分就是这种 query-as-trajectory 的范例。一个附赠收益是注意力权重本身就是可观察元数据,无须额外解释头就能给出每个决策的归因热力图。 ## 这条洞察什么时候被滥用 -把任意结构都改写成 attention 并不必然提升性能。当数据中的"实体"本质上没有相互关系(典型例子是密集时序信号里相邻样本之间的随机噪声),softmax 加权和退化为一个昂贵的近恒等变换,几乎所有计算预算都被浪费。CNN 与卷积归纳偏置在这种情形下反而更高效,[ViT](paper_vit.md) 之所以能在视觉上跑赢 CNN,前提是数据规模足够大、能让"任意 patch 之间可对话"的协议学到有用结构。 +把任意结构都改写成 attention 并不必然提升性能。当数据中的"实体"本质上没有相互关系(典型例子是密集时序信号里相邻样本之间的随机噪声),softmax 加权和退化为一个昂贵的近恒等变换,几乎所有计算预算都被浪费。CNN 与卷积归纳偏置在这种情形下反而更高效,[ViT](../paper_vit.md) 之所以能在视觉上跑赢 CNN,前提是数据规模足够大、能让"任意 patch 之间可对话"的协议学到有用结构。 当 query 数量极大时,$O(N^2)$ 的注意力代价本身成为瓶颈——例如在百万级点云或长视频里直接做全注意力,显存与时延都会爆炸。FlashAttention、Linear Attention、Performer 的真正动机不是"提升精度"而是"让大 $N$ 仍可行";忽视这一点,把注意力盲目套到长上下文会撞到工程墙。 @@ -71,4 +71,4 @@ graph TB ## 推演链路 -[`paper:bahdanau2014_attention`](paper_bahdanau2014_attention.md) → [`paper:vaswani2017`](paper_vaswani2017.md) → [`paper:vit`](paper_vit.md) → [`paper:carion2020`](paper_carion2020.md) → [`paper:li2022bevformer`](paper_li2022bevformer.md) → [`paper:2212.10156`](paper_2212.10156_uniad.md) → [`paper:2402.12289`](paper_2402.12289_drivevlm.md)。 +[`paper:bahdanau2014_attention`](paper_bahdanau2014_attention.md) → [`paper:vaswani2017`](../paper_vaswani2017.md) → [`paper:vit`](../paper_vit.md) → [`paper:carion2020`](../paper_carion2020.md) → [`paper:li2022bevformer`](../paper_li2022bevformer.md) → [`paper:2212.10156`](paper_2212.10156_uniad.md) → [`paper:2402.12289`](paper_2402.12289_drivevlm.md)。 diff --git a/docs/data/cards/extended/insight_bev_is_planning_friendly_intermediate.md b/docs/data/cards/extended/insight_bev_is_planning_friendly_intermediate.md index acbb517..2ad9d15 100644 --- a/docs/data/cards/extended/insight_bev_is_planning_friendly_intermediate.md +++ b/docs/data/cards/extended/insight_bev_is_planning_friendly_intermediate.md @@ -40,7 +40,7 @@ $$J(\tau) = \int_0^T c\big(\tau(t),\, \mathcal{S}(t)\big)\, dt$$ | 领域 | "为规划友好的中间表示" 的具体形态 | |---|---| -| 自动驾驶 | BEV 特征 + 占用栅格,详见 [BEVFormer](paper_li2022bevformer.md)、[BEVFusion](paper_bevfusion.md) | +| 自动驾驶 | BEV 特征 + 占用栅格,详见 [BEVFormer](../paper_li2022bevformer.md)、[BEVFusion](paper_bevfusion.md) | | 机器人导航 | 2D 占用栅格地图(occupancy grid),Dijkstra / A* 直接在其上跑 | | 工业 SLAM | 平面地图 + 高度图,用于扫地机器人、AGV | | 视频游戏 AI | mini-map / 战术地图,玩家视角与决策视角分离 | @@ -52,7 +52,7 @@ $$J(\tau) = \int_0^T c\big(\tau(t),\, \mathcal{S}(t)\big)\, dt$$ ## 这条洞察对自动驾驶的意义 -1. **把感知模块的输出从"对象列表"升级为"BEV 特征图"**:让下游规划直接消费稠密表示,避免对象抽取造成的信息损失。这一升级是 [BEVFormer](paper_li2022bevformer.md) 与 [UniAD](paper_2212.10156_uniad.md) 共同的设计原则。 +1. **把感知模块的输出从"对象列表"升级为"BEV 特征图"**:让下游规划直接消费稠密表示,避免对象抽取造成的信息损失。这一升级是 [BEVFormer](../paper_li2022bevformer.md) 与 [UniAD](paper_2212.10156_uniad.md) 共同的设计原则。 2. **多模态融合应发生在 BEV 空间而非传感器原始空间**:[BEVFusion](paper_bevfusion.md) 把 LiDAR 编码也变成 BEV 特征,与相机 BEV 直接 concat,绕过传感器原始数据格式的差异。这是 [在共享中间表示空间中融合多种模态](move_fuse_modalities_in_shared_intermediate_space.md) 的直接动机。 3. **占用栅格作为 BEV 的 3D 推广**:Tesla 占用网络把 BEV 升到 3D 体素,代价是分辨率,但保留了"规划友好"的核心性质。 4. **规划损失可以反传到 BEV 主干**:由于 BEV 是可微表示,从 trajectory 损失开始的梯度可以一路反传到图像主干,使得 [可微端到端模仿](paradigm_differentiable_end_to_end_imitation.md) 在 BEV 上自然成立。 @@ -73,4 +73,4 @@ $$J(\tau) = \int_0^T c\big(\tau(t),\, \mathcal{S}(t)\big)\, dt$$ ## 推演链路 -[模块化感知到规划流水线](paradigm_modular_perception_to_planning_pipeline.md) 提出对"统一中间表示"的工程需求 → [Lift-Splat-Shoot](paper_lift_splat_shoot.md) 给出第一份可微升维 → [BEVFormer](paper_li2022bevformer.md) 把升维写成 transformer cross-attention → [BEVFusion](paper_bevfusion.md) 把它推广到多模态 → [UniAD](paper_2212.10156_uniad.md) 把它推广到多任务 → Tesla 占用网络把它推广到 3D 体素 → [DrivingGaussian](paper_drivinggaussian.md) 把它推广到可重渲染的神经场。 +[模块化感知到规划流水线](paradigm_modular_perception_to_planning_pipeline.md) 提出对"统一中间表示"的工程需求 → [Lift-Splat-Shoot](paper_lift_splat_shoot.md) 给出第一份可微升维 → [BEVFormer](../paper_li2022bevformer.md) 把升维写成 transformer cross-attention → [BEVFusion](paper_bevfusion.md) 把它推广到多模态 → [UniAD](paper_2212.10156_uniad.md) 把它推广到多任务 → Tesla 占用网络把它推广到 3D 体素 → [DrivingGaussian](paper_drivinggaussian.md) 把它推广到可重渲染的神经场。 diff --git a/docs/data/cards/extended/insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md b/docs/data/cards/extended/insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md index d7e64e0..2c2d1a5 100644 --- a/docs/data/cards/extended/insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md +++ b/docs/data/cards/extended/insight_closed_loop_evaluation_is_the_only_ground_truth_for_planners.md @@ -29,7 +29,7 @@ $$M_\text{cl}(\pi)\ =\ \mathbb E_{s_0\sim p_0}\!\Big[\sum_{t=0}^{T-1} c\big(s_t, ## 这条洞察对自动驾驶的意义 - **基准选型直接决定研究范式**:[`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 的全部技术栈是这条洞察的工程化。 -- **必须把仿真当作 *评测器* 而不仅是 *训练器***:[CARLA / nuScenes / NAVSIM / Bench2Drive](benchmarks_ad.md) 在每代都被重新审视。 +- **必须把仿真当作 *评测器* 而不仅是 *训练器***:[CARLA / nuScenes / NAVSIM / Bench2Drive](../benchmarks_ad.md) 在每代都被重新审视。 - **离线代理需要被持续审计**:[`move:design_closed_loop_metric_correlated_with_real_world_safety`](move_design_closed_loop_metric_correlated_with_real_world_safety.md) 不是一次性投入,是周期工作。 - **数据引擎闭环化**:把"模型出问题—回放—生成对抗场景—重新训练"做成 CI 流水线。 - **决策评估必须用 seed 多样性度量**:单条闭环 trace 信噪比过低,需统计置信。 diff --git a/docs/data/cards/extended/insight_differentiable_rendering_is_universal_inverse_solver.md b/docs/data/cards/extended/insight_differentiable_rendering_is_universal_inverse_solver.md index f0d3740..14274dd 100644 --- a/docs/data/cards/extended/insight_differentiable_rendering_is_universal_inverse_solver.md +++ b/docs/data/cards/extended/insight_differentiable_rendering_is_universal_inverse_solver.md @@ -50,4 +50,4 @@ $$\theta^* = \arg\min_\theta \sum_{(I, \pi) \in \mathcal{D}} \mathcal{L}\big(\ma ## 推演链路 -可微 SfM (Bundle Adjustment 的梯度版本) → [NeRF](paper_nerf.md) (把场景写成 MLP,体积渲染可微) → Instant-NGP / Plenoxels (加速 NeRF) → [3D Gaussian Splatting](paper_3dgs.md) (从隐式回到显式但保持可微) → [EmerNeRF](paper_emernerf.md) / [DrivingGaussian](paper_drivinggaussian.md) (推广到动态驾驶场) → 接入闭环评估、反事实生成、数据扩增等下游工程 → 跟 [GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md) 的生成式路径在 [Cosmos](paper_cosmos.md) 等基础模型尝试中合流。 +可微 SfM (Bundle Adjustment 的梯度版本) → [NeRF](paper_nerf.md) (把场景写成 MLP,体积渲染可微) → Instant-NGP / Plenoxels (加速 NeRF) → [3D Gaussian Splatting](paper_3dgs.md) (从隐式回到显式但保持可微) → [EmerNeRF](paper_emernerf.md) / [DrivingGaussian](paper_drivinggaussian.md) (推广到动态驾驶场) → 接入闭环评估、反事实生成、数据扩增等下游工程 → 跟 [GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md) 的生成式路径在 [Cosmos](paper_cosmos.md) 等基础模型尝试中合流。 diff --git a/docs/data/cards/extended/insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md b/docs/data/cards/extended/insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md index d888d24..b1b78e4 100644 --- a/docs/data/cards/extended/insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md +++ b/docs/data/cards/extended/insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md @@ -79,7 +79,7 @@ flowchart TD 调度器本身错判是最直接的失败模式。把"该慢"的场景(行人意图歧义、施工区临时绕行)误判为"该快",会错失关键审议机会,导致大模型本应纠正的盲区直接通过快回路输出动作;这是 [`problem:hallucinated_action_from_vision_language_model_in_safety_critical_loop`](problem_hallucinated_action_from_vision_language_model_in_safety_critical_loop.md) 的一个根因。 -两条回路若目标不一致也会破坏整体——当 $\pi_\text{slow}$ 优化"解释合理"而 $\pi_\text{fast}$ 优化"轨迹平滑",调度切换时控制信号会震荡,工程上表现为车辆在边界场景轻微抖动。最后,慢回路本身可能过慢:大模型推理超过 500 ms 时,系统延迟与可控性同时失稳,[`paper:gpt3`](paper_gpt3.md) 的 CoT 在 7B 上跑得通但在车载预算下未必。 +两条回路若目标不一致也会破坏整体——当 $\pi_\text{slow}$ 优化"解释合理"而 $\pi_\text{fast}$ 优化"轨迹平滑",调度切换时控制信号会震荡,工程上表现为车辆在边界场景轻微抖动。最后,慢回路本身可能过慢:大模型推理超过 500 ms 时,系统延迟与可控性同时失稳,[`paper:gpt3`](../paper_gpt3.md) 的 CoT 在 7B 上跑得通但在车载预算下未必。 ## 这条洞察可以孵化的下一步研究 @@ -89,4 +89,4 @@ flowchart TD ## 在图谱里的邻居 -它在算法上以 [`paper:silver2017_alphazero`](paper_silver2017_alphazero.md) 的快策略加 MCTS 为先驱——策略网络给出候选,搜索给出深度评估;以 [`paper:gpt3`](paper_gpt3.md) 的 CoT 为语言模型侧的最早可行性证据。落地范式集中在 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md);最相邻的洞察是 [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md),它从另一侧解释为什么"慢回路其实是用推理替代训练"。 +它在算法上以 [`paper:silver2017_alphazero`](paper_silver2017_alphazero.md) 的快策略加 MCTS 为先驱——策略网络给出候选,搜索给出深度评估;以 [`paper:gpt3`](../paper_gpt3.md) 的 CoT 为语言模型侧的最早可行性证据。落地范式集中在 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md);最相邻的洞察是 [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md),它从另一侧解释为什么"慢回路其实是用推理替代训练"。 diff --git a/docs/data/cards/extended/insight_dual_system_handles_latency_quality_tradeoff.md b/docs/data/cards/extended/insight_dual_system_handles_latency_quality_tradeoff.md index 3e14c48..935a090 100644 --- a/docs/data/cards/extended/insight_dual_system_handles_latency_quality_tradeoff.md +++ b/docs/data/cards/extended/insight_dual_system_handles_latency_quality_tradeoff.md @@ -43,4 +43,4 @@ $$\bar Q\ =\ (1-\Pr[g=1])\,Q_f\ +\ \Pr[g=1]\,Q_s$$ ## 推演链路 -Kahneman《Thinking, Fast and Slow》(认知科学源头)→ [AlphaGo / AlphaZero](paper_silver2017_alphazero.md)(快策略 + 慢搜索的工程化)→ [GPT-3](paper_gpt3.md) + CoT(语言模型版本)→ ReAct / Reflexion / [Toolformer](paper_toolformer.md)(带工具的慢回路)→ [DriveVLM / DriveVLM-Dual](paper_2402.12289_drivevlm.md)(驾驶域的快慢双系统)→ [CF-VLA](paper_2512.24426_cfvla.md)(慢回路的反事实重规划)→ OpenAI o1(把"思考时间"作为可调旋钮)。手把手实验见 [`../../../labs/lab09_drivevlm_dual_pipeline.ipynb`](../../../labs/lab09_drivevlm_dual_pipeline.ipynb)。 +Kahneman《Thinking, Fast and Slow》(认知科学源头)→ [AlphaGo / AlphaZero](paper_silver2017_alphazero.md)(快策略 + 慢搜索的工程化)→ [GPT-3](../paper_gpt3.md) + CoT(语言模型版本)→ ReAct / Reflexion / [Toolformer](paper_toolformer.md)(带工具的慢回路)→ [DriveVLM / DriveVLM-Dual](paper_2402.12289_drivevlm.md)(驾驶域的快慢双系统)→ [CF-VLA](paper_2512.24426_cfvla.md)(慢回路的反事实重规划)→ OpenAI o1(把"思考时间"作为可调旋钮)。手把手实验见 [`../../../labs/lab09_drivevlm_dual_pipeline.ipynb`](../../../labs/lab09_drivevlm_dual_pipeline.ipynb)。 diff --git a/docs/data/cards/extended/insight_end_to_end_differentiable_beats_handcraft_when_signal_strong.md b/docs/data/cards/extended/insight_end_to_end_differentiable_beats_handcraft_when_signal_strong.md index d09d60a..224c53b 100644 --- a/docs/data/cards/extended/insight_end_to_end_differentiable_beats_handcraft_when_signal_strong.md +++ b/docs/data/cards/extended/insight_end_to_end_differentiable_beats_handcraft_when_signal_strong.md @@ -34,4 +34,4 @@ ## 谱系 / Lineage in this atlas -[ResNet 残差](paper_he2015_resnet.md) → [Transformer](paper_vaswani2017.md) → [DETR](paper_carion2020.md) → [BEVFormer](paper_li2022bevformer.md) → [UniAD](paper_2212.10156_uniad.md) → [DriveVLM](paper_2402.12289_drivevlm.md) → [CF-VLA](paper_2512.24426_cfvla.md) +[ResNet 残差](paper_he2015_resnet.md) → [Transformer](../paper_vaswani2017.md) → [DETR](../paper_carion2020.md) → [BEVFormer](../paper_li2022bevformer.md) → [UniAD](paper_2212.10156_uniad.md) → [DriveVLM](paper_2402.12289_drivevlm.md) → [CF-VLA](paper_2512.24426_cfvla.md) diff --git a/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md b/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md index 9d118b1..9ed49a6 100644 --- a/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md +++ b/docs/data/cards/extended/insight_foundation_features_transfer_without_finetune.md @@ -14,11 +14,11 @@ $$\hat{y} = W_\mathcal{T}\, \phi_\theta(\mathbf{x}) + b_\mathcal{T}$$ | 领域 | 冻结特征 + 轻量适配的典型形态 | |---|---| -| ImageNet 分类 | [DINOv2](paper_dinov2.md) 冻结 + 线性头,top-1 接近全 finetune | +| ImageNet 分类 | [DINOv2](../paper_dinov2.md) 冻结 + 线性头,top-1 接近全 finetune | | 语义分割 | [DINOv3](paper_2508.10104_dinov3.md) 冻结 + 线性 head,Cityscapes mIoU 接近 SOTA | | 视觉问答 | [BLIP-2](paper_blip2.md) 冻结视觉基座与语言基座,只训中间 Q-Former | | 开放词汇检测 | [CLIP](paper_clip.md) 冻结视觉文本编码器,只学少量 anchor | -| 提示式分割 | [SAM](paper_sam.md) 训练时把 image encoder 与 prompt encoder 分离,推理时 encoder 输出可缓存复用 | +| 提示式分割 | [SAM](../paper_sam.md) 训练时把 image encoder 与 prompt encoder 分离,推理时 encoder 输出可缓存复用 | | 机器人操作 | RT-2 把视觉基座冻结,只 finetune 动作 head | | 蛋白结构 | ESM 把蛋白序列嵌入冻结,只在下游接 MLP 头 | @@ -48,4 +48,4 @@ $$\hat{y} = W_\mathcal{T}\, \phi_\theta(\mathbf{x}) + b_\mathcal{T}$$ ## 推演链路 -[BERT / GPT-3](paper_gpt3.md) 在语言上首次观察到"linear probing 即可接近 finetune" → [SimCLR / MoCo](paper_simclr_mocov3.md) 在视觉对比学习上复现 → [DINOv2](paper_dinov2.md) 把这一性质做到 SOTA → [DINOv3](paper_2508.10104_dinov3.md) 进一步扩展到 patch 级别 → [BLIP-2](paper_blip2.md) 把"冻结基座"做成多模态的标准实践 → [SAM](paper_sam.md) 把"image encoder 一次性算完,prompt 反复用"做成工程默认 → 投影到自动驾驶:冻结主干 + 多任务 head 成为感知栈的事实标准。 +[BERT / GPT-3](../paper_gpt3.md) 在语言上首次观察到"linear probing 即可接近 finetune" → [SimCLR / MoCo](paper_simclr_mocov3.md) 在视觉对比学习上复现 → [DINOv2](../paper_dinov2.md) 把这一性质做到 SOTA → [DINOv3](paper_2508.10104_dinov3.md) 进一步扩展到 patch 级别 → [BLIP-2](paper_blip2.md) 把"冻结基座"做成多模态的标准实践 → [SAM](../paper_sam.md) 把"image encoder 一次性算完,prompt 反复用"做成工程默认 → 投影到自动驾驶:冻结主干 + 多任务 head 成为感知栈的事实标准。 diff --git a/docs/data/cards/extended/insight_foundation_pretraining_decouples_data_from_task.md b/docs/data/cards/extended/insight_foundation_pretraining_decouples_data_from_task.md index 6fa0f2a..254c280 100644 --- a/docs/data/cards/extended/insight_foundation_pretraining_decouples_data_from_task.md +++ b/docs/data/cards/extended/insight_foundation_pretraining_decouples_data_from_task.md @@ -16,11 +16,11 @@ $$\theta = \arg\min_\theta \mathcal{L}_\text{pretrain}(\phi_\theta;\, \mathcal{D |---|---|---| | 语言理解 | masked LM (BERT) | GLUE 子任务标注量为预训练的万分之一 | | 语言生成 | next-token prediction (GPT) | few-shot prompting 几乎为零 | -| 视觉表征 | 自蒸馏 + 多视图 ([DINOv2](paper_dinov2.md), [DINOv3](paper_2508.10104_dinov3.md)) | linear probing 即接近 SOTA | +| 视觉表征 | 自蒸馏 + 多视图 ([DINOv2](../paper_dinov2.md), [DINOv3](paper_2508.10104_dinov3.md)) | linear probing 即接近 SOTA | | 视觉语言对齐 | 图文对比 ([CLIP](paper_clip.md)) | 零样本分类 | -| 图像分割 | 提示式 mask 预测 ([SAM](paper_sam.md)) | 一点击即出 mask | +| 图像分割 | 提示式 mask 预测 ([SAM](../paper_sam.md)) | 一点击即出 mask | | 机器人控制 | 行为克隆 + 大规模演示 | 一次性 finetune 适配新机器人 | -| 自动驾驶感知 | masked patch + 时序对比 | 1/10 标注量微调 [BEVFormer](paper_li2022bevformer.md) | +| 自动驾驶感知 | masked patch + 时序对比 | 1/10 标注量微调 [BEVFormer](../paper_li2022bevformer.md) | | 蛋白结构 | masked MSA + 几何先验 (ESM, AlphaFold) | 单条序列即出结构 | 每一栏的共同点都是:**任务无关阶段消耗 99% 的算力与数据,任务相关阶段消耗 99% 的研究关注**。 @@ -28,7 +28,7 @@ $$\theta = \arg\min_\theta \mathcal{L}_\text{pretrain}(\phi_\theta;\, \mathcal{D ## 这条洞察对自动驾驶的意义 1. **3D 标注成本不再是瓶颈**:百万小时驾驶视频做自监督主干,nuScenes 标注量级足以做下游检测的微调。直接缓解 [3D 标注的标签效率](problem_label_efficiency_for_3d_annotation.md)。 -2. **开放词汇感知变得可行**:[CLIP](paper_clip.md) + [SAM](paper_sam.md) 的组合让驾驶系统在不重训的情况下识别长尾物体类别,直接缓解 [开放世界长尾对象类别](problem_long_tail_object_categories_in_open_world.md)。 +2. **开放词汇感知变得可行**:[CLIP](paper_clip.md) + [SAM](../paper_sam.md) 的组合让驾驶系统在不重训的情况下识别长尾物体类别,直接缓解 [开放世界长尾对象类别](problem_long_tail_object_categories_in_open_world.md)。 3. **跨车队、跨地区迁移**:一个在全球数据上训好的视觉基座,部署到任何车型只需小数据微调,极大降低区域适配成本。 4. **多任务架构变得自然**:同一个 [DINOv3](paper_2508.10104_dinov3.md) 主干同时支撑 BEV 检测、占用预测、轨迹预测,所有任务都从同一份冻结特征出发。 5. **研究投入结构改变**:小型实验室不再需要预训练能力,只需聚焦于 task head 与 finetune 配方。 @@ -49,4 +49,4 @@ $$\theta = \arg\min_\theta \mathcal{L}_\text{pretrain}(\phi_\theta;\, \mathcal{D ## 推演链路 -[Attention Is All You Need](paper_vaswani2017.md) → [BERT / GPT](paper_gpt3.md) (首次在语言上把数据与任务解耦) → [CLIP](paper_clip.md) (视觉语言版本) → [DINOv2](paper_dinov2.md) → [DINOv3](paper_2508.10104_dinov3.md) (纯视觉版本) → [SAM](paper_sam.md) (分割版本) → 投影到自动驾驶 → 驾驶视频自监督主干 → 下游 BEV、占用、规划任务以 1/10 数据量达到 SOTA。 +[Attention Is All You Need](../paper_vaswani2017.md) → [BERT / GPT](../paper_gpt3.md) (首次在语言上把数据与任务解耦) → [CLIP](paper_clip.md) (视觉语言版本) → [DINOv2](../paper_dinov2.md) → [DINOv3](paper_2508.10104_dinov3.md) (纯视觉版本) → [SAM](../paper_sam.md) (分割版本) → 投影到自动驾驶 → 驾驶视频自监督主干 → 下游 BEV、占用、规划任务以 1/10 数据量达到 SOTA。 diff --git a/docs/data/cards/extended/insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md b/docs/data/cards/extended/insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md index 6e7011e..8aae450 100644 --- a/docs/data/cards/extended/insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md +++ b/docs/data/cards/extended/insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md @@ -44,4 +44,4 @@ $$\mathbb{E}[\text{regret}] \le O(T^2 \cdot \epsilon)$$ ## 推演链路 -[Pomerleau 1989 ALVINN](paper_alvinn.md)(早期 BC 失败案例)→ Bagnell 早期模仿学习理论 → [Ross & Bagnell 2010 efficient reductions](paper_ross_bagnell_2010.md) → [Ross 等 2011 DAgger](paper_ross2011_dagger.md) → [离线 RL](paradigm_offline_rl.md) 整条线(CQL/IQL)→ [Diffusion Policy](paper_diffusion_policy_chi2023.md) 用多模态损失缓解 → 自动驾驶里 [TransFuser](paper_transfuser.md)、[InterFuser](paper_interfuser.md)、[UniAD](paper_2212.10156_uniad.md) 都各自给出工程化补救。 +[Pomerleau 1989 ALVINN](paper_alvinn.md)(早期 BC 失败案例)→ Bagnell 早期模仿学习理论 → [Ross & Bagnell 2010 efficient reductions](paper_ross_bagnell_2010.md) → [Ross 等 2011 DAgger](paper_ross2011_dagger.md) → [离线 RL](paradigm_offline_rl.md) 整条线(CQL/IQL)→ [Diffusion Policy](paper_diffusion_policy_chi2023.md) 用多模态损失缓解 → 自动驾驶里 [TransFuser](../paper_transfuser.md)、[InterFuser](paper_interfuser.md)、[UniAD](paper_2212.10156_uniad.md) 都各自给出工程化补救。 diff --git a/docs/data/cards/extended/insight_in_context_learning_emerges_at_scale.md b/docs/data/cards/extended/insight_in_context_learning_emerges_at_scale.md index 4009a6b..c88a4b7 100644 --- a/docs/data/cards/extended/insight_in_context_learning_emerges_at_scale.md +++ b/docs/data/cards/extended/insight_in_context_learning_emerges_at_scale.md @@ -26,7 +26,7 @@ $$p_\theta(y_q \mid c) \approx \int p(y_q \mid x_q, \mathcal{T})\, p(\mathcal{T} 最直接的工程价值是城市与工况之间的快速适配。把当地交规、左转规则、施工区典型处置写进 prompt,agent 无须重新训练就能切换决策习惯——[DiLu](paper_2309.16292_dilu.md) 与 [Agent-Driver](paper_2311.10813_agent_driver.md) 都把"知识库 + 检索 + few-shot prompt"作为底层架构,把传统需要数月微调的城市部署压缩到几天。等价地,教练员通过编写 few-shot 例子就能把驾驶经验沉淀进 agent,无须触碰参数。 -跨车型与跨硬件的复用也由此变得便宜。同一 [LLaVA](paper_llava.md) 或 [DriveVLM](paper_2402.12289_drivevlm.md) 主干,配上描述各家传感器布局与执行机构延迟的 prompt,就能服务不同硬件平台——参数不变、知识切换。这对车厂战略意味着 backbone 投资可以在多个车型上摊销。 +跨车型与跨硬件的复用也由此变得便宜。同一 [LLaVA](../paper_llava.md) 或 [DriveVLM](paper_2402.12289_drivevlm.md) 主干,配上描述各家传感器布局与执行机构延迟的 prompt,就能服务不同硬件平台——参数不变、知识切换。这对车厂战略意味着 backbone 投资可以在多个车型上摊销。 罕见场景的零样本应对则是涌现能力最戏剧化的一面:通过给出几个相似 corner case 的处理示例,agent 在 prompt 里类比组合就能在从未见过的组合场景里给出可解释决策。这正是 [`paradigm:knowledge_driven_reflective_agent`](paradigm_knowledge_driven_reflective_agent.md) 的核心论据——知识本身就是 in-context 例子的集合,而非需要梯度更新的参数。 @@ -44,4 +44,4 @@ $$p_\theta(y_q \mid c) \approx \int p(y_q \mid x_q, \mathcal{T})\, p(\mathcal{T} ## 推演链路 -[`paper:vaswani2017`](paper_vaswani2017.md) → `paper:gpt2`(首次观察零样本能力)→ [`paper:gpt3`](paper_gpt3.md)(涌现的实证)→ [Chinchilla scaling laws](paper_chinchilla.md)(解释何时阈值被跨越)→ [`paper:llava`](paper_llava.md)(多模态 in-context)→ [`paper:2402.12289_drivevlm`](paper_2402.12289_drivevlm.md)(驾驶领域的 in-context 落地)→ [`paper:2309.16292_dilu`](paper_2309.16292_dilu.md) 与 [`paper:2311.10813_agent_driver`](paper_2311.10813_agent_driver.md)(驾驶 agent 的 prompt 工程化)。 +[`paper:vaswani2017`](../paper_vaswani2017.md) → `paper:gpt2`(首次观察零样本能力)→ [`paper:gpt3`](../paper_gpt3.md)(涌现的实证)→ [Chinchilla scaling laws](paper_chinchilla.md)(解释何时阈值被跨越)→ [`paper:llava`](../paper_llava.md)(多模态 in-context)→ [`paper:2402.12289_drivevlm`](paper_2402.12289_drivevlm.md)(驾驶领域的 in-context 落地)→ [`paper:2309.16292_dilu`](paper_2309.16292_dilu.md) 与 [`paper:2311.10813_agent_driver`](paper_2311.10813_agent_driver.md)(驾驶 agent 的 prompt 工程化)。 diff --git a/docs/data/cards/extended/insight_long_tail_solved_by_synthesis_not_data_alone.md b/docs/data/cards/extended/insight_long_tail_solved_by_synthesis_not_data_alone.md index d18e336..1decc9a 100644 --- a/docs/data/cards/extended/insight_long_tail_solved_by_synthesis_not_data_alone.md +++ b/docs/data/cards/extended/insight_long_tail_solved_by_synthesis_not_data_alone.md @@ -28,13 +28,13 @@ $$\text{合成总成本}(k) = c_\text{model} + (c_\text{generate} + c_\text{labe 它给出了一个具体的工程切换点:当目标事件概率落到 $10^{-6}$ 量级以下时,被动收集进入边际收益逼近零的区段——一辆车一年开 $10^5$ km,一万辆车队全年才能积累几次该事件,统计不显著。这是 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md) 接手的切换信号:从这里起,合成器(视频扩散世界模型、NeRF、高斯泼溅)成为长尾覆盖的主要工具,被动里程则降为校准而非来源。 -工程上的连锁效应是数据基础设施重写。[Tesla AI Day](paper_tesla_ai_day.md) 的影子模式与 corner case mining、[DriveDreamer](paper_drivedreamer.md) 的视频扩散世界模型、[Cosmos](paper_cosmos.md) 的合成基础设施都把"长尾合成"做成可调配的训练数据源。这进一步指向一种新的数据集发布形态——发布的不再是几十万张图,而是一组可参数化的场景生成器附上覆盖度证书与保真度评估。 +工程上的连锁效应是数据基础设施重写。[Tesla AI Day](paper_tesla_ai_day.md) 的影子模式与 corner case mining、[DriveDreamer](../paper_drivedreamer.md) 的视频扩散世界模型、[Cosmos](paper_cosmos.md) 的合成基础设施都把"长尾合成"做成可调配的训练数据源。这进一步指向一种新的数据集发布形态——发布的不再是几十万张图,而是一组可参数化的场景生成器附上覆盖度证书与保真度评估。 反事实编辑也变成可执行的安全声明。"如果这一帧把行人挪到马路中央,模型怎么决策"由 [CF-VLA](paper_2512.24426_cfvla.md) 形式化为可比对的闭环评估,回归测试因此可以由合成承担——每个修过的 bug 转为一条永久合成轨迹,进入 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 的 CI 流水线,避免回归同一类失效。 ## 什么时候被误用 -最危险的误用是把合成当成真实分布的无偏估计。当合成器的偏差 $D(p^\star \| \hat p)$ 比缺失稀有事件的统计强度还大时,整套合成反而把策略推向错误模式——一份完全在 [DriveDreamer](paper_drivedreamer.md) 生成视频上训练的规划器,可能在真实场景里对生成器从未学到的高频小细节(夜间反光、雨滴粒子)系统性失效。 +最危险的误用是把合成当成真实分布的无偏估计。当合成器的偏差 $D(p^\star \| \hat p)$ 比缺失稀有事件的统计强度还大时,整套合成反而把策略推向错误模式——一份完全在 [DriveDreamer](../paper_drivedreamer.md) 生成视频上训练的规划器,可能在真实场景里对生成器从未学到的高频小细节(夜间反光、雨滴粒子)系统性失效。 合成器与下游模型共训会触发第二种病态。合成器学到的"哪些场景有用"会被下游模型的损失梯度反馈塑造,最终 over-fit 在自家生成器上——下游模型看似在合成 benchmark 上进步,离开生成器分布后性能崩坏。工程上的对策是评估必须分两层:合成上的相对提升只能预筛,真实回归仍要按 [`problem:offline_metric_does_not_predict_closed_loop_performance`](problem_offline_metric_does_not_predict_closed_loop_performance.md) 同等怀疑。 @@ -46,4 +46,4 @@ $$\text{合成总成本}(k) = c_\text{model} + (c_\text{generate} + c_\text{labe ## 推演链路 -域随机化 (Tobin 等)(机器人长尾的最早合成成功)→ DreamFusion / Magic3D(文本到 3D 用扩散合成稀有物体)→ [Tesla AI Day](paper_tesla_ai_day.md)(工业级 corner case mining + 影子模式)→ [SHIFT](paper_shift_dataset.md) / [V2X-Sim](paper_v2x_sim.md)(合成数据集形式化进入驾驶)→ [DriveDreamer](paper_drivedreamer.md)(视频扩散世界模型作为通用合成器)→ [CF-VLA](paper_2512.24426_cfvla.md)(反事实生成显式接入 VLA 闭环)→ [Cosmos](paper_cosmos.md)(把"合成长尾"做成基础设施)。 +域随机化 (Tobin 等)(机器人长尾的最早合成成功)→ DreamFusion / Magic3D(文本到 3D 用扩散合成稀有物体)→ [Tesla AI Day](paper_tesla_ai_day.md)(工业级 corner case mining + 影子模式)→ [SHIFT](paper_shift_dataset.md) / [V2X-Sim](paper_v2x_sim.md)(合成数据集形式化进入驾驶)→ [DriveDreamer](../paper_drivedreamer.md)(视频扩散世界模型作为通用合成器)→ [CF-VLA](paper_2512.24426_cfvla.md)(反事实生成显式接入 VLA 闭环)→ [Cosmos](paper_cosmos.md)(把"合成长尾"做成基础设施)。 diff --git a/docs/data/cards/extended/insight_masked_prediction_yields_self_supervised_signal.md b/docs/data/cards/extended/insight_masked_prediction_yields_self_supervised_signal.md index 8fe2503..146f59a 100644 --- a/docs/data/cards/extended/insight_masked_prediction_yields_self_supervised_signal.md +++ b/docs/data/cards/extended/insight_masked_prediction_yields_self_supervised_signal.md @@ -40,6 +40,6 @@ mask ratio 选低了是另一类典型失败。当遮蔽比例只占 10%–20% ## 推演链路 -[BERT / GPT-3](paper_gpt3.md) → [MAE](paper_mae.md) / [BEiT](paper_beit.md)(视觉 patch mask 成立)→ [DINOv2](paper_dinov2.md)(对比 + mask 融合)→ [DINOv3](paper_2508.10104_dinov3.md)(patch-level MAE 加进 DINOv2 打破瓶颈)→ 驾驶专用变体:BEV 占据 mask、video mask、trajectory mask。 +[BERT / GPT-3](../paper_gpt3.md) → [MAE](paper_mae.md) / [BEiT](paper_beit.md)(视觉 patch mask 成立)→ [DINOv2](../paper_dinov2.md)(对比 + mask 融合)→ [DINOv3](paper_2508.10104_dinov3.md)(patch-level MAE 加进 DINOv2 打破瓶颈)→ 驾驶专用变体:BEV 占据 mask、video mask、trajectory mask。 [`move:use_geometry_as_self_supervision`](move_use_geometry_as_self_supervision.md) 是这条洞察的方法学兄弟:它用几何一致性而非 mask,但内核相同——免费的监督信号。 diff --git a/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md b/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md index 83adb4b..a9a694d 100644 --- a/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md +++ b/docs/data/cards/extended/insight_multi_modal_behavior_is_intrinsic_to_traffic_scenes.md @@ -14,7 +14,7 @@ $$\hat y=\mathbb{E}[y\mid s]=\tfrac{1}{2}(y_A+y_B)$$ 当两个模态分得够开($\|y_A-y_B\|\gg\sigma$)时,均值点 $\hat y$ 落在两峰之间的**低密度谷**里:$p(\hat y\mid s)\approx 0$。物理上这条"平均轨迹"可能是"以一半速度开进路口正中"——既不是通过也不是等待,是任何一个真实驾驶员都不会选、且常常违规或致碰的动作。 -形式化代价:单峰高斯对双峰目标的最优拟合,其 KL 散度有不可消除的下界,约为 $\mathrm{KL}\big(p\,\|\,q^\star\big)\gtrsim \tfrac{1}{8}\|y_A-y_B\|^2/\sigma^2$(随模态间距平方增长)——模态越分离,单峰损失越大,且这部分损失**靠加数据、加参数都压不下去**,因为它源于假设类与真实分布的结构错配,而非估计误差。补救只有两条:要么让输出分布本身多模态(GMM / 扩散 / 自回归 token 采样),要么在训练中用熵/多样性奖励阻止模态塌缩([`move:add_entropy_bonus_to_encourage_exploration`](move_add_entropy_bonus_to_encourage_exploration.md))。 +形式化代价:单峰高斯对双峰目标的最优拟合,其 KL 散度有一个不可消除的正下界,在标度上随模态间距平方增长 $\mathrm{KL}\big(p\,\|\,q^\star\big)\sim \|y_A-y_B\|^2/\sigma^2$(这里给出的是量级标度关系,而非精确常数)——模态越分离,单峰损失越大,且这部分损失**靠加数据、加参数都压不下去**,因为它源于假设类与真实分布的结构错配,而非估计误差。补救只有两条:要么让输出分布本身多模态(GMM / 扩散 / 自回归 token 采样),要么在训练中用熵/多样性奖励阻止模态塌缩([`move:add_entropy_bonus_to_encourage_exploration`](move_add_entropy_bonus_to_encourage_exploration.md))。 ## 在不同领域的具现 diff --git a/docs/data/cards/extended/insight_multi_view_geometry_as_free_supervision.md b/docs/data/cards/extended/insight_multi_view_geometry_as_free_supervision.md index 9ba9628..510c370 100644 --- a/docs/data/cards/extended/insight_multi_view_geometry_as_free_supervision.md +++ b/docs/data/cards/extended/insight_multi_view_geometry_as_free_supervision.md @@ -17,7 +17,7 @@ $\Pi_{j\to i}$ 是基于深度的 warp 投影,$D_\theta$ 可以是深度估计 | 单目深度估计 | 立体对或时序连续帧 | photometric reprojection loss | | 多视图 stereo | 多机位静态拍摄 | feature volume + plane-sweep | | 辐射场重建 | 不同视角图像 | volumetric rendering 一致性 ([NeRF](paper_nerf.md)) | -| 自监督特征 | 同图的随机视图增强 | invariance to view transform (DINO / [DINOv2](paper_dinov2.md)) | +| 自监督特征 | 同图的随机视图增强 | invariance to view transform (DINO / [DINOv2](../paper_dinov2.md)) | | 驾驶动静解耦场 | 自车多时刻 | 静态背景 + 动态 flow 残差 ([EmerNeRF](paper_emernerf.md)) | | 通用单目深度 | 千万级互联网图 + 跨数据集对齐 | 相对深度排序 ([Depth Anything](paper_depth_anything.md)) | | 物体姿态估计 | 多视图渲染 | rendering-comparison loss | @@ -44,4 +44,4 @@ $\Pi_{j\to i}$ 是基于深度的 warp 投影,$D_\theta$ 可以是深度估计 ## 推演链路 -Multi-view stereo 经典工作(Furukawa & Ponce)→ Monodepth / Monodepth2(单目自监督深度)→ [NeRF](paper_nerf.md)(体积渲染将几何一致性几何化)→ [EmerNeRF](paper_emernerf.md)(驾驶域动静解耦)→ [Depth Anything](paper_depth_anything.md)(用大规模无标签数据训通用深度)→ [DINOv2](paper_dinov2.md) / [DINOv3](paper_2508.10104_dinov3.md)(把多视图不变性推到 patch-level 表征)→ 占用预测中的几何蒸馏 ([SurroundOcc](paper_surroundocc.md))。同步实验 [`../../../labs/lab05_dinov3_features_minidata.ipynb`](../../../labs/lab05_dinov3_features_minidata.ipynb)。 +Multi-view stereo 经典工作(Furukawa & Ponce)→ Monodepth / Monodepth2(单目自监督深度)→ [NeRF](paper_nerf.md)(体积渲染将几何一致性几何化)→ [EmerNeRF](paper_emernerf.md)(驾驶域动静解耦)→ [Depth Anything](paper_depth_anything.md)(用大规模无标签数据训通用深度)→ [DINOv2](../paper_dinov2.md) / [DINOv3](paper_2508.10104_dinov3.md)(把多视图不变性推到 patch-level 表征)→ 占用预测中的几何蒸馏 ([SurroundOcc](paper_surroundocc.md))。同步实验 [`../../../labs/lab05_dinov3_features_minidata.ipynb`](../../../labs/lab05_dinov3_features_minidata.ipynb)。 diff --git a/docs/data/cards/extended/insight_occupancy_unifies_static_and_dynamic_scene.md b/docs/data/cards/extended/insight_occupancy_unifies_static_and_dynamic_scene.md index 0703535..5db326e 100644 --- a/docs/data/cards/extended/insight_occupancy_unifies_static_and_dynamic_scene.md +++ b/docs/data/cards/extended/insight_occupancy_unifies_static_and_dynamic_scene.md @@ -28,7 +28,7 @@ $$\partial_t o\ +\ \nabla\!\cdot(o\,v)\ =\ 0$$ 体素表示天然支持遮挡推理——每个体素显式带 free / occupied / unknown 三态,对应观测的有 / 无 / 未观测,[`problem:occlusion_reasoning_without_dense_lidar`](problem_occlusion_reasoning_without_dense_lidar.md) 在这种结构里有自然落点。[`problem:annotation_inconsistency_across_datasets`](problem_annotation_inconsistency_across_datasets.md) 也被压平:跨数据集的类别命名歧义在占用世界里被规约成"体素分辨率与坐标系是否对齐",工程治理更直接。 -规划友好性是它在端到端栈里被青睐的最终原因。占用 + 流速可直接喂给 cost map 或 MPC,比"先画框、再跟踪、再回归未来轨迹"的传递链短得多,[UniAD](paper_2212.10156_uniad.md) 因此把占用 head 加入其感知层。在生成侧,[`paradigm:world_model_paradigm`](paradigm_world_model_paradigm.md) 把占用 token 化作为视频世界模型的中间层,让 [DriveDreamer](paper_drivedreamer.md) 这种生成器在 3D 空间一致性上有显式抓手。 +规划友好性是它在端到端栈里被青睐的最终原因。占用 + 流速可直接喂给 cost map 或 MPC,比"先画框、再跟踪、再回归未来轨迹"的传递链短得多,[UniAD](paper_2212.10156_uniad.md) 因此把占用 head 加入其感知层。在生成侧,[`paradigm:world_model_paradigm`](paradigm_world_model_paradigm.md) 把占用 token 化作为视频世界模型的中间层,让 [DriveDreamer](../paper_drivedreamer.md) 这种生成器在 3D 空间一致性上有显式抓手。 ## 什么时候被误用 @@ -44,4 +44,4 @@ $$\partial_t o\ +\ \nabla\!\cdot(o\,v)\ =\ 0$$ ## 推演链路 -OctoMap / TSDF(机器人占用地图传统)→ [Tesla 占用网络](paper_occupancy_networks_tesla.md)(首次工业级使用占用栅格作为感知-规划接口)→ [SurroundOcc](paper_surroundocc.md)(多相机 3D 占用预测)→ [OpenOccupancy / UniOcc](paper_openocc_unic.md)(占用 benchmark 标准化)→ [UniAD](paper_2212.10156_uniad.md) 在感知层引入占用 head → [世界模型 + 占用](paper_drivedreamer.md) 把占用作为生成对象。 +OctoMap / TSDF(机器人占用地图传统)→ [Tesla 占用网络](paper_occupancy_networks_tesla.md)(首次工业级使用占用栅格作为感知-规划接口)→ [SurroundOcc](paper_surroundocc.md)(多相机 3D 占用预测)→ [OpenOccupancy / UniOcc](paper_openocc_unic.md)(占用 benchmark 标准化)→ [UniAD](paper_2212.10156_uniad.md) 在感知层引入占用 head → [世界模型 + 占用](../paper_drivedreamer.md) 把占用作为生成对象。 diff --git a/docs/data/cards/extended/insight_residual_learning_unlocks_arbitrary_depth.md b/docs/data/cards/extended/insight_residual_learning_unlocks_arbitrary_depth.md index 92e8bd4..71368c7 100644 --- a/docs/data/cards/extended/insight_residual_learning_unlocks_arbitrary_depth.md +++ b/docs/data/cards/extended/insight_residual_learning_unlocks_arbitrary_depth.md @@ -19,7 +19,7 @@ $$\min_\theta\ \ell\big(x + F_\theta(x), y\big)$$ | 领域 | 基线 (identity) | 残差对象 | |---|---|---| | 视觉识别 | $x$ 自身 | [ResNet](paper_he2015_resnet.md) 的卷积残差块 | -| Transformer | sub-layer 输入 | self-attention / MLP 的残差路径 ([Vaswani 2017](paper_vaswani2017.md)) | +| Transformer | sub-layer 输入 | self-attention / MLP 的残差路径 ([Vaswani 2017](../paper_vaswani2017.md)) | | 扩散模型 | 加噪样本 | 预测的噪声残差 | | 流模型 | 标准基分布 | 可逆变换的残差形式 | | 强化学习控制 | 规则控制器输出 | residual policy 叠加 | @@ -32,7 +32,7 @@ $$\min_\theta\ \ell\big(x + F_\theta(x), y\big)$$ 可证安全因此更容易达到——规则部分可以通过形式化方法(CBF、MPC + 约束)验证,残差部分被约束在小幅值范围内,整体安全核心可被审计。这与 [`paradigm:safety_by_constraint_layered_architecture`](paradigm_safety_by_constraint_layered_architecture.md) 的分层思想天然契合:规则是底层、神经是顶层,两者通过残差关系组合。 -训练稳定性也随之提升。在驾驶轨迹空间里,残差形式让目标分布与基线分布之间的距离比直接拟合显著缩小,行为克隆的损失景观更平坦、收敛更快。它还支持渐进部署——发布初期把残差幅值截断到 0(等价于纯规则),随后逐步放宽阈值,实车迭代风险被分级。残差不仅是工程小技巧,它也是 [Transformer](paper_vaswani2017.md) 与 [DDPM](paper_ddpm.md) 等下游能力的隐式前提:现代驾驶模型几乎都建在残差结构之上。 +训练稳定性也随之提升。在驾驶轨迹空间里,残差形式让目标分布与基线分布之间的距离比直接拟合显著缩小,行为克隆的损失景观更平坦、收敛更快。它还支持渐进部署——发布初期把残差幅值截断到 0(等价于纯规则),随后逐步放宽阈值,实车迭代风险被分级。残差不仅是工程小技巧,它也是 [Transformer](../paper_vaswani2017.md) 与 [DDPM](paper_ddpm.md) 等下游能力的隐式前提:现代驾驶模型几乎都建在残差结构之上。 ## 什么时候被误用 @@ -48,4 +48,4 @@ $$\min_\theta\ \ell\big(x + F_\theta(x), y\big)$$ ## 推演链路 -[He 2015 ResNet](paper_he2015_resnet.md)(视觉百层网络)→ Highway Networks(残差思想的前身)→ [Vaswani 2017 Transformer](paper_vaswani2017.md)(在序列模型里复用残差子层)→ [DDPM](paper_ddpm.md)(预测噪声残差,扩散模型基石)→ [LoRA](paper_lora.md) / Adapter(低秩残差用于微调)→ Residual Policy Learning(机器人控制)→ [Roach](paper_roach.md) / 工业规划栈中的 residual MPC(规则 + 学习增量)。 +[He 2015 ResNet](paper_he2015_resnet.md)(视觉百层网络)→ Highway Networks(残差思想的前身)→ [Vaswani 2017 Transformer](../paper_vaswani2017.md)(在序列模型里复用残差子层)→ [DDPM](paper_ddpm.md)(预测噪声残差,扩散模型基石)→ [LoRA](paper_lora.md) / Adapter(低秩残差用于微调)→ Residual Policy Learning(机器人控制)→ [Roach](paper_roach.md) / 工业规划栈中的 residual MPC(规则 + 学习增量)。 diff --git a/docs/data/cards/extended/insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md b/docs/data/cards/extended/insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md index 977e084..d1637ea 100644 --- a/docs/data/cards/extended/insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md +++ b/docs/data/cards/extended/insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md @@ -49,9 +49,9 @@ graph LR ## 这条洞察对自动驾驶的意义 -它给出"VLM 直接做规划"路线的时机判断。当前 7B 级开源 VLM([LLaVA](paper_llava.md) 量级)在轨迹规划上仍在挣扎——缺少 in-context learning 与 CoT 的稳定支持;推测 70B+ 级在 corner case 推理上将出现质变。这意味着即使现在投入这条路线的投资回报曲线呈低值,也有理由把数据基础设施、闭环评估管线、训练集群按 70B 规模提前布好。 +它给出"VLM 直接做规划"路线的时机判断。当前 7B 级开源 VLM([LLaVA](../paper_llava.md) 量级)在轨迹规划上仍在挣扎——缺少 in-context learning 与 CoT 的稳定支持;推测 70B+ 级在 corner case 推理上将出现质变。这意味着即使现在投入这条路线的投资回报曲线呈低值,也有理由把数据基础设施、闭环评估管线、训练集群按 70B 规模提前布好。 -闭环 self-play 是同一阈值思维的另一面。当模型规模足够大、[GAIA-1](paper_gaia1.md) / [Cosmos](paper_cosmos.md) 级别的世界模型足够好时,闭环 RL 可能从"工程难度极高"突变为"几乎免费"——大模型的策略改写能力会让"在想象空间训练"变成主流路径,这是 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md) 的潜在落地条件。 +闭环 self-play 是同一阈值思维的另一面。当模型规模足够大、[GAIA-1](../paper_gaia1.md) / [Cosmos](paper_cosmos.md) 级别的世界模型足够好时,闭环 RL 可能从"工程难度极高"突变为"几乎免费"——大模型的策略改写能力会让"在想象空间训练"变成主流路径,这是 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md) 的潜在落地条件。 多车队 / 多城市数据规模回报也呈阶跃。在某个数据量阈值之后,跨城市泛化可能突然变得稳定——这是 [Chinchilla](paper_chinchilla.md) 标定的最优配比在驾驶域的具体表现。两条推论由此得到:第一,不要用小模型实验的负结果否定大模型可能性;第二,应观察 scaling 曲线的二阶导数本身,损失曲线在加大模型时出现"变曲"往往是涌现前兆。 @@ -69,4 +69,4 @@ graph LR ## 推演链路 -[`paper:vaswani2017`](paper_vaswani2017.md) → [`paper:gpt3`](paper_gpt3.md)(第一份显式 emergent capability 的工作)→ [Chinchilla scaling laws](paper_chinchilla.md) → [`paper:llava`](paper_llava.md) → [`paper:2402.12289`](paper_2402.12289_drivevlm.md) → [`paper:rt2`](paper_rt2.md) → 推演给驾驶 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md)。 +[`paper:vaswani2017`](../paper_vaswani2017.md) → [`paper:gpt3`](../paper_gpt3.md)(第一份显式 emergent capability 的工作)→ [Chinchilla scaling laws](paper_chinchilla.md) → [`paper:llava`](../paper_llava.md) → [`paper:2402.12289`](paper_2402.12289_drivevlm.md) → [`paper:rt2`](paper_rt2.md) → 推演给驾驶 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md)。 diff --git a/docs/data/cards/extended/insight_scaling_laws_predict_capability_emergence.md b/docs/data/cards/extended/insight_scaling_laws_predict_capability_emergence.md index 679f53e..731952d 100644 --- a/docs/data/cards/extended/insight_scaling_laws_predict_capability_emergence.md +++ b/docs/data/cards/extended/insight_scaling_laws_predict_capability_emergence.md @@ -18,10 +18,10 @@ $$L(N, D) = \left(\frac{N_c}{N}\right)^{\alpha_N} + \left(\frac{D_c}{D}\right)^{ | 领域 | 拟合曲线的关键变量 | 在多大规模上验证 | |---|---|---| -| 语言 ([GPT-3](paper_gpt3.md), [Chinchilla](paper_chinchilla.md)) | 参数 + token 数 | 175B / 70B | -| 视觉 ([DINOv2](paper_dinov2.md), [DINOv3](paper_2508.10104_dinov3.md)) | 参数 + 视频 / 图像帧数 | 1B+ | -| 多模态 ([Flamingo](paper_flamingo.md), [LLaVA](paper_llava.md)) | 参数 + 图文对数 | 7B–80B | -| 视频生成 ([Sora](paper_sora.md), [GAIA-1](paper_gaia1.md)) | 参数 + 视频小时数 | 数 B | +| 语言 ([GPT-3](../paper_gpt3.md), [Chinchilla](paper_chinchilla.md)) | 参数 + token 数 | 175B / 70B | +| 视觉 ([DINOv2](../paper_dinov2.md), [DINOv3](paper_2508.10104_dinov3.md)) | 参数 + 视频 / 图像帧数 | 1B+ | +| 多模态 ([Flamingo](paper_flamingo.md), [LLaVA](../paper_llava.md)) | 参数 + 图文对数 | 7B–80B | +| 视频生成 ([Sora](paper_sora.md), [GAIA-1](../paper_gaia1.md)) | 参数 + 视频小时数 | 数 B | | 驾驶 VLA ([EMMA](paper_emma.md)) | 参数 + 驾驶视频小时数 | 10B+ | | 机器人 VLA ([RT-2](paper_rt2.md), [OpenVLA](paper_openvla.md)) | 参数 + 跨本体演示 | 7B | @@ -49,4 +49,4 @@ Chinchilla 给出的数据—参数最优配比让研究者不再盲目堆参数 ## 推演链路 -[`paper:vaswani2017`](paper_vaswani2017.md)(统一架构的可能性)→ Kaplan 2020(首次拟合 LM 的 scaling laws)→ [`paper:gpt3`](paper_gpt3.md)(涌现的实证)→ [Chinchilla](paper_chinchilla.md)(参数 / 数据最优配比)→ [`paper:dinov2`](paper_dinov2.md)(视觉侧验证)→ [`paper:llava`](paper_llava.md) 与 [`paper:flamingo`](paper_flamingo.md)(多模态侧验证)→ [EMMA](paper_emma.md) 与 [RT-2](paper_rt2.md)(驾驶 / 机器人 VLA 的规模化)→ [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 的工程信仰固化。 +[`paper:vaswani2017`](../paper_vaswani2017.md)(统一架构的可能性)→ Kaplan 2020(首次拟合 LM 的 scaling laws)→ [`paper:gpt3`](../paper_gpt3.md)(涌现的实证)→ [Chinchilla](paper_chinchilla.md)(参数 / 数据最优配比)→ [`paper:dinov2`](../paper_dinov2.md)(视觉侧验证)→ [`paper:llava`](../paper_llava.md) 与 [`paper:flamingo`](paper_flamingo.md)(多模态侧验证)→ [EMMA](paper_emma.md) 与 [RT-2](paper_rt2.md)(驾驶 / 机器人 VLA 的规模化)→ [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 的工程信仰固化。 diff --git a/docs/data/cards/extended/insight_set_prediction_eliminates_postprocessing_heuristics.md b/docs/data/cards/extended/insight_set_prediction_eliminates_postprocessing_heuristics.md index 3eb7f21..c41051e 100644 --- a/docs/data/cards/extended/insight_set_prediction_eliminates_postprocessing_heuristics.md +++ b/docs/data/cards/extended/insight_set_prediction_eliminates_postprocessing_heuristics.md @@ -1,6 +1,6 @@ # 跨学科洞察 · 集合预测消除后处理启发式 -> 把"多个对象的输出"建模为集合并用匈牙利匹配计算损失,可以一次性消除非极大值抑制、anchor 设计、阈值调节等大量手工后处理。这条洞察在 [DETR](paper_carion2020.md) 中第一次系统化,随后在 DETR3D、Sparse R-CNN、UniAD、PlanT 中反复出现,几乎重塑了整个感知与规划的输出接口设计。 +> 把"多个对象的输出"建模为集合并用匈牙利匹配计算损失,可以一次性消除非极大值抑制、anchor 设计、阈值调节等大量手工后处理。这条洞察在 [DETR](../paper_carion2020.md) 中第一次系统化,随后在 DETR3D、Sparse R-CNN、UniAD、PlanT 中反复出现,几乎重塑了整个感知与规划的输出接口设计。 下图把同一"K 个 query + 匈牙利匹配"接口在多个领域的具现并排画出来: @@ -41,11 +41,11 @@ $$\hat{\sigma} = \arg\min_{\sigma \in \mathfrak{S}_K} \sum_{j=1}^M \mathcal{L}_\ | 领域 | 集合预测的具体形态 | |---|---| -| 2D 检测 | [DETR](paper_carion2020.md) 用 100 个 query 取代 anchor + NMS | +| 2D 检测 | [DETR](../paper_carion2020.md) 用 100 个 query 取代 anchor + NMS | | 3D 检测 | [DETR3D](paper_detr3d.md) 把 query 抬到 3D 空间,直接出 3D 框 | | 实例分割 | Mask2Former 用 query 同时出 mask + 类别 | | 多目标跟踪 | MOTR 把 track query 跨帧传递,跟踪与检测合二为一 | -| BEV 感知 | [BEVFormer](paper_li2022bevformer.md) 把 BEV 格点作为稠密 query | +| BEV 感知 | [BEVFormer](../paper_li2022bevformer.md) 把 BEV 格点作为稠密 query | | 多任务驾驶 | [UniAD](paper_2212.10156_uniad.md) 让跟踪、运动、规划共享 query | | 规划 | [PlanT](paper_2210.14222_plant.md) 把场景对象作为 token,规划作为 query | | 蛋白质结构 | AlphaFold 把残基对作为 query,接触图作为集合输出 | @@ -55,7 +55,7 @@ $$\hat{\sigma} = \arg\min_{\sigma \in \mathfrak{S}_K} \sum_{j=1}^M \mathcal{L}_\ ## 这条洞察对自动驾驶的意义 1. **彻底消除 NMS 与 anchor**:NMS 在密集场景下会丢遗近距离的并排车辆,anchor 设计与训练数据耦合导致部署时的领域迁移失败。集合预测把这两类启发式从架构里拔除。 -2. **跟踪、检测、预测共享接口**:每个对象就是一个 query,跟踪即让 query 跨帧传递,预测即在 query 内部展开未来轨迹。这是 [UniAD](paper_2212.10156_uniad.md) 与 [VADv2](paper_vadv2.md) 多任务架构的接口红利。 +2. **跟踪、检测、预测共享接口**:每个对象就是一个 query,跟踪即让 query 跨帧传递,预测即在 query 内部展开未来轨迹。这是 [UniAD](paper_2212.10156_uniad.md) 与 [VADv2](../paper_vadv2.md) 多任务架构的接口红利。 3. **规划可以写成 query 写日程**:把每条候选轨迹看作一个 query,匈牙利匹配让它跟真值轨迹对齐。这是 [PlanT](paper_2210.14222_plant.md) 与 VAD 系列的工程动机。 4. **可解释性出口免费**:每个 query 的注意力权重直接告诉你"这个对象是从哪些像素 / 哪些格点 / 哪些其它 query 推出来的"。 5. **跨模态融合自然化**:让相机 query 与 LiDAR query 共享同一组 attention,集合预测的匹配损失同时约束两种模态指向同一对象。 @@ -75,4 +75,4 @@ $$\hat{\sigma} = \arg\min_{\sigma \in \mathfrak{S}_K} \sum_{j=1}^M \mathcal{L}_\ ## 推演链路 -[Attention Is All You Need](paper_vaswani2017.md) → [ViT](paper_vit.md) → [DETR](paper_carion2020.md) (集合预测首次系统化) → [DETR3D](paper_detr3d.md) (3D 推广) → [BEVFormer](paper_li2022bevformer.md) (BEV 稠密 query) → [UniAD](paper_2212.10156_uniad.md) (多任务共享 query) → [PlanT](paper_2210.14222_plant.md) / [VADv2](paper_vadv2.md) (规划也写成 query)。 +[Attention Is All You Need](../paper_vaswani2017.md) → [ViT](../paper_vit.md) → [DETR](../paper_carion2020.md) (集合预测首次系统化) → [DETR3D](paper_detr3d.md) (3D 推广) → [BEVFormer](../paper_li2022bevformer.md) (BEV 稠密 query) → [UniAD](paper_2212.10156_uniad.md) (多任务共享 query) → [PlanT](paper_2210.14222_plant.md) / [VADv2](../paper_vadv2.md) (规划也写成 query)。 diff --git a/docs/data/cards/extended/insight_simulator_realism_is_lower_bound_on_training_value.md b/docs/data/cards/extended/insight_simulator_realism_is_lower_bound_on_training_value.md index d19c2f1..01336c8 100644 --- a/docs/data/cards/extended/insight_simulator_realism_is_lower_bound_on_training_value.md +++ b/docs/data/cards/extended/insight_simulator_realism_is_lower_bound_on_training_value.md @@ -32,7 +32,7 @@ $$J_{p^\star}(\pi^\dagger)\ \ge\ J_{\hat p_\phi}(\pi^\dagger)\ -\ \tfrac{R_\max} ## 什么时候被误用 -最常见的误用是把渲染保真当成整体保真。3D 高斯泼溅或 [GAIA-1](paper_gaia1.md) 风格的视频生成可以做出与照片几乎无差的画面,但 NPC 行为仍然是脚本——规划层的训练价值被 [`problem:realistic_other_agent_behavior_in_simulator`](problem_realistic_other_agent_behavior_in_simulator.md) 直接截断,外观光鲜不代表策略学到了真实多车交互。 +最常见的误用是把渲染保真当成整体保真。3D 高斯泼溅或 [GAIA-1](../paper_gaia1.md) 风格的视频生成可以做出与照片几乎无差的画面,但 NPC 行为仍然是脚本——规划层的训练价值被 [`problem:realistic_other_agent_behavior_in_simulator`](problem_realistic_other_agent_behavior_in_simulator.md) 直接截断,外观光鲜不代表策略学到了真实多车交互。 第二种是用同一个保真度阈值横扫所有任务。感知微调可能在 $D_\text{TV} \approx 0.3$(保真度约 0.7)就足够,因为下游策略对感知噪声有一定容忍;但规划训练对代理行为分布极敏感,$D_\text{TV} = 0.05$(保真度 0.95)都未必够——同一仿真的训练价值对不同下游任务差异极大。 @@ -44,4 +44,4 @@ $$J_{p^\star}(\pi^\dagger)\ \ge\ J_{\hat p_\phi}(\pi^\dagger)\ -\ \tfrac{R_\max} ## 推演链路 -域随机化 (Tobin 等) → [CARLA](paper_carla_leaderboard.md)(驾驶仿真的第一代基线)→ [MetaDrive](paper_metadrive.md)(程序化场景 + 标准化基准)→ [Trajeglish](paper_trajeglish.md) / [MoST](paper_most_simagents.md)(NPC 数据驱动化)→ [GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md)(生成式视频仿真层)→ [Cosmos](paper_cosmos.md)(把保真度做成可调配的基础设施)→ [CF-VLA](paper_2512.24426_cfvla.md)(在保真度足够时把反事实评估送入闭环)。同步交叉的实验材料见 [`../../../labs/lab09_drivevlm_dual_pipeline.ipynb`](../../../labs/lab09_drivevlm_dual_pipeline.ipynb) 与 [`../../../labs/lab10_cfvla_counterfactual_replanner.ipynb`](../../../labs/lab10_cfvla_counterfactual_replanner.ipynb)。 +域随机化 (Tobin 等) → [CARLA](paper_carla_leaderboard.md)(驾驶仿真的第一代基线)→ [MetaDrive](paper_metadrive.md)(程序化场景 + 标准化基准)→ [Trajeglish](paper_trajeglish.md) / [MoST](paper_most_simagents.md)(NPC 数据驱动化)→ [GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md)(生成式视频仿真层)→ [Cosmos](paper_cosmos.md)(把保真度做成可调配的基础设施)→ [CF-VLA](paper_2512.24426_cfvla.md)(在保真度足够时把反事实评估送入闭环)。同步交叉的实验材料见 [`../../../labs/lab09_drivevlm_dual_pipeline.ipynb`](../../../labs/lab09_drivevlm_dual_pipeline.ipynb) 与 [`../../../labs/lab10_cfvla_counterfactual_replanner.ipynb`](../../../labs/lab10_cfvla_counterfactual_replanner.ipynb)。 diff --git a/docs/data/cards/extended/insight_temporal_aggregation_buys_what_depth_sensor_buys.md b/docs/data/cards/extended/insight_temporal_aggregation_buys_what_depth_sensor_buys.md index 72274f6..3c79389 100644 --- a/docs/data/cards/extended/insight_temporal_aggregation_buys_what_depth_sensor_buys.md +++ b/docs/data/cards/extended/insight_temporal_aggregation_buys_what_depth_sensor_buys.md @@ -21,7 +21,7 @@ $$\mathbf{b}_\text{virtual}(t_1, t_2) = \mathbf{p}_\text{ego}(t_2) - \mathbf{p}_ | 经典 SfM | 多帧匹配 + 三角化恢复稀疏点云 | | 视觉 SLAM | 关键帧 + Bundle Adjustment 恢复轨迹与稀疏地图 | | 单目深度估计 | 多帧 MVSNet、自监督深度 (monodepth2) | -| BEV 感知 | [BEVDet4D](paper_bevdet4d.md) / [BEVFormer](paper_li2022bevformer.md) 跨帧 warp BEV 特征 | +| BEV 感知 | [BEVDet4D](paper_bevdet4d.md) / [BEVFormer](../paper_li2022bevformer.md) 跨帧 warp BEV 特征 | | 流式 3D 检测 | [StreamPETR](paper_streampetr.md) 把 query 当循环状态跨帧传递 | | 神经场景重建 | NeRF / Gaussian Splatting 依赖多视角观测约束 3D 场 | | 视频流光场 | NeRV / Plenoxels 等隐式视频表示也基于时序约束 | @@ -51,4 +51,4 @@ $$\mathbf{b}_\text{virtual}(t_1, t_2) = \mathbf{p}_\text{ego}(t_2) - \mathbf{p}_ ## 推演链路 -经典多视几何 (Hartley & Zisserman) → 视觉 SLAM (ORB-SLAM 等) → 自监督单目深度 (monodepth2) → [BEVDet4D](paper_bevdet4d.md) (把时序融合到 BEV 检测) → [BEVFormer](paper_li2022bevformer.md) (用 transformer cross-attention 做时序聚合) → [StreamPETR](paper_streampetr.md) (流式 query 跨帧传递) → [EmerNeRF](paper_emernerf.md) (把这条洞察推到极致:用纯多帧观测重建动态 3D 场)。 +经典多视几何 (Hartley & Zisserman) → 视觉 SLAM (ORB-SLAM 等) → 自监督单目深度 (monodepth2) → [BEVDet4D](paper_bevdet4d.md) (把时序融合到 BEV 检测) → [BEVFormer](../paper_li2022bevformer.md) (用 transformer cross-attention 做时序聚合) → [StreamPETR](paper_streampetr.md) (流式 query 跨帧传递) → [EmerNeRF](paper_emernerf.md) (把这条洞察推到极致:用纯多帧观测重建动态 3D 场)。 diff --git a/docs/data/cards/extended/insight_tokenization_collapses_modality_gap.md b/docs/data/cards/extended/insight_tokenization_collapses_modality_gap.md index 7cb3f25..0bf8409 100644 --- a/docs/data/cards/extended/insight_tokenization_collapses_modality_gap.md +++ b/docs/data/cards/extended/insight_tokenization_collapses_modality_gap.md @@ -31,7 +31,7 @@ $$p_\theta(t_{ 一个 Sora 风格的视频扩散模型,从来没有被显式喂过牛顿第二定律或菲涅尔光照方程,却在足够大的视频数据上自动学到了惯性、碰撞、阴影、物体持久性等近似物理规律。这一观察让 [GAIA-1](paper_gaia1.md)、[DriveDreamer](paper_drivedreamer.md)、[Cosmos](paper_cosmos.md) 被重新定位为机器人与驾驶的通用 simulator,而不仅仅是内容生成器。 +> 一个 Sora 风格的视频扩散模型,从来没有被显式喂过牛顿第二定律或菲涅尔光照方程,却在足够大的视频数据上自动学到了惯性、碰撞、阴影、物体持久性等近似物理规律。这一观察让 [GAIA-1](../paper_gaia1.md)、[DriveDreamer](../paper_drivedreamer.md)、[Cosmos](paper_cosmos.md) 被重新定位为机器人与驾驶的通用 simulator,而不仅仅是内容生成器。 ## 抽象内核 @@ -17,7 +17,7 @@ $$\mathcal{L}_\text{vid} = \mathbb{E}_{x_0,\,k,\,\epsilon}\!\Big[\|\epsilon - \e | 领域 | 模型 | 学到的隐式规律 | |---|---|---| | 一般视频 | [Sora](paper_sora.md), Veo, Cosmos | 惯性、相对运动、阴影、物体持久性、流体形变 | -| 驾驶 | [GAIA-1](paper_gaia1.md), [DriveDreamer](paper_drivedreamer.md) | 车辆轨迹连续、车道线几何、刹车灯光迹 | +| 驾驶 | [GAIA-1](../paper_gaia1.md), [DriveDreamer](../paper_drivedreamer.md) | 车辆轨迹连续、车道线几何、刹车灯光迹 | | 机器人 | RT-2 video 预测、UniSim | 抓取动力学、接触响应 | | 流体 | Karras 等扩散流体仿真 | 不可压缩 Navier–Stokes 近似 | | 蛋白结构 | RFdiffusion | 几何约束、能量低洼 | @@ -46,4 +46,4 @@ $$\mathcal{L}_\text{vid} = \mathbb{E}_{x_0,\,k,\,\epsilon}\!\Big[\|\epsilon - \e ## 推演链路 -DDPM (Ho 等 2020) → Video Diffusion Models (Ho 等 2022) → Imagen Video / Make-A-Video → [Sora](paper_sora.md)(首份让"视频扩散是物理引擎"的口号进入主流的工作)→ [GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md)(驾驶版本)→ [Cosmos](paper_cosmos.md)(NVIDIA 把它做成物理 AI 平台)→ 物理一致性评测基准(Phenaki-Eval、PhyBench 一类工作)开始量化这种隐式物理。 +DDPM (Ho 等 2020) → Video Diffusion Models (Ho 等 2022) → Imagen Video / Make-A-Video → [Sora](paper_sora.md)(首份让"视频扩散是物理引擎"的口号进入主流的工作)→ [GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md)(驾驶版本)→ [Cosmos](paper_cosmos.md)(NVIDIA 把它做成物理 AI 平台)→ 物理一致性评测基准(Phenaki-Eval、PhyBench 一类工作)开始量化这种隐式物理。 diff --git a/docs/data/cards/extended/insight_world_models_let_planning_be_done_in_imagination.md b/docs/data/cards/extended/insight_world_models_let_planning_be_done_in_imagination.md index a5c6814..6fa55be 100644 --- a/docs/data/cards/extended/insight_world_models_let_planning_be_done_in_imagination.md +++ b/docs/data/cards/extended/insight_world_models_let_planning_be_done_in_imagination.md @@ -41,7 +41,7 @@ $$\max_\pi\ \mathbb{E}_{(o,a)\sim p^\star}\!\big[\sum_t \gamma^t r_t\big]$$ $$\max_\pi\ \mathbb{E}_{z_0\sim q_\theta(\cdot\mid s_0)}\!\Big[\sum_{t=0}^{H-1}\gamma^t \hat r_t\Big] \ -\ \lambda \cdot \mathbb E_{(s,a)\sim d_\pi}\!\big[D_\text{KL}(p^\star \,\|\, \hat p_\theta)\big]$$ -第二项是"模型与现实之差"的 KL 项惩罚,只要它有界,imagination 上的最优策略就是真实环境上的近似最优策略,搜索可以延伸到真实交互无法触及的 $H$。这是 Dreamer / MuZero / DriveDreamer 共享的逻辑。MuZero 把这一逻辑推到极致——甚至不需要重建观测,只在隐空间维持足够的预测一致性;Dreamer 把它从离散游戏扩到连续控制;[DriveDreamer](paper_drivedreamer.md) 与 [GAIA-1](paper_gaia1.md) 把视频空间的世界模型移植到驾驶。 +第二项是"模型与现实之差"的 KL 项惩罚,只要它有界,imagination 上的最优策略就是真实环境上的近似最优策略,搜索可以延伸到真实交互无法触及的 $H$。这是 Dreamer / MuZero / DriveDreamer 共享的逻辑。MuZero 把这一逻辑推到极致——甚至不需要重建观测,只在隐空间维持足够的预测一致性;Dreamer 把它从离散游戏扩到连续控制;[DriveDreamer](../paper_drivedreamer.md) 与 [GAIA-1](../paper_gaia1.md) 把视频空间的世界模型移植到驾驶。 ## 在不同领域的具现 @@ -51,7 +51,7 @@ $$\max_\pi\ \mathbb{E}_{z_0\sim q_\theta(\cdot\mid s_0)}\!\Big[\sum_{t=0}^{H-1}\ | 雅达利 / 控制 | 模拟器 | latent dynamics + actor-critic (Dreamer V1–V3) | | 蛋白结构 | 实验测定 | AlphaFold 在序列空间内搜索折叠 | | 视频生成 | 真实拍摄 | Sora / Cosmos 的视频扩散 simulator | -| 自动驾驶 | 真实路采 + 仿真 / 真车里程 | [GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md) 的视频世界模型 / [Cosmos](paper_cosmos.md) | +| 自动驾驶 | 真实路采 + 仿真 / 真车里程 | [GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md) 的视频世界模型 / [Cosmos](paper_cosmos.md) | | LLM 推理 | API 调用 | Chain-of-Thought / Tree-of-Thoughts 在自己生成的中间步骤上搜索 | | 机器人操作 | 真实抓取 | Decision Diffuser 在轨迹空间生成可行规划 | | 神经科学 | 真实试验 | brain emulation models | @@ -80,4 +80,4 @@ $$\max_\pi\ \mathbb{E}_{z_0\sim q_\theta(\cdot\mid s_0)}\!\Big[\sum_{t=0}^{H-1}\ ## 推演链路 -[`paper:world_models`](paper_world_models.md)(VAE + MDN-RNN 在赛车 + Doom 上证明可行)→ [DreamerV3](paper_dreamer_v3.md)(latent imagination 推到 Atari + DMC,统一超参的通用世界模型)→ [MuZero](paper_muzero.md)(去掉显式重建,仍能搜索)→ [Diffuser](paper_diffuser.md) / Decision Diffuser(轨迹作为去噪对象)→ [GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md)(视频生成式世界模型登场驾驶)→ [Cosmos](paper_cosmos.md) / [Sora](paper_sora.md)(视频基础模型被重新解读为通用 simulator)→ [CF-VLA](paper_2512.24426_cfvla.md)(VLA 把世界模型作为反事实批评者)。 +[`paper:world_models`](paper_world_models.md)(VAE + MDN-RNN 在赛车 + Doom 上证明可行)→ [DreamerV3](paper_dreamer_v3.md)(latent imagination 推到 Atari + DMC,统一超参的通用世界模型)→ [MuZero](paper_muzero.md)(去掉显式重建,仍能搜索)→ [Diffuser](../paper_diffuser.md) / Decision Diffuser(轨迹作为去噪对象)→ [GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md)(视频生成式世界模型登场驾驶)→ [Cosmos](paper_cosmos.md) / [Sora](paper_sora.md)(视频基础模型被重新解读为通用 simulator)→ [CF-VLA](paper_2512.24426_cfvla.md)(VLA 把世界模型作为反事实批评者)。 diff --git a/docs/data/cards/extended/move_lift_2d_to_3d.md b/docs/data/cards/extended/move_lift_2d_to_3d.md index 36a2ffb..52eb5db 100644 --- a/docs/data/cards/extended/move_lift_2d_to_3d.md +++ b/docs/data/cards/extended/move_lift_2d_to_3d.md @@ -25,7 +25,7 @@ - **+ 自监督预训练** → 在百万小时无标注视频上学几何先验,再把 lift-splat 接到下游 (近似 DINOv3 + BEVFormer 路线)。 - **+ 占据预测** → 用 3D voxel grid 直接预测每个体素的占据概率,得到统一的 [可行驶区域 + 障碍物 + 语义] 表征 (Tesla Occupancy Network 路线)。 - **+ 时间 recurrent** → 在 BEVFormer 上叠 streaming 模式,得到流式 BEV (StreamPETR)。 -- **+ 端到端规划** → 把升维结果直接喂给共享 query 头,得到 [UniAD](paper_2212.10156_uniad.md)、[VADv2](paper_vadv2.md)。 +- **+ 端到端规划** → 把升维结果直接喂给共享 query 头,得到 [UniAD](paper_2212.10156_uniad.md)、[VADv2](../paper_vadv2.md)。 - **+ 神经辐射场** → 把升维替换为可渲染的 3D 场,得到 [EmerNeRF / DrivingGaussian](paper_emernerf.md) 路线。 ## 物理直觉 / Physical intuition @@ -43,7 +43,7 @@ ## 推演溯源 / Components needed to invent this move from scratch 1. **可微数学** —— 把分布的"抬升"操作写成可反向传播的算子。 -2. **集合预测接口** —— 由 [DETR](paper_carion2020.md) 提供:让模型可以在 BEV 上输出一组互不重复的对象。 +2. **集合预测接口** —— 由 [DETR](../paper_carion2020.md) 提供:让模型可以在 BEV 上输出一组互不重复的对象。 3. **多视几何先验** —— 经典视觉里的相机模型 + 三角化的几何直觉。 4. **跨视角共享 BEV 的需求** —— 由 [模块化感知到规划流水线](paradigm_modular_perception_to_planning_pipeline.md) 的痛点 (跨相机融合) 直接推出。 5. **可微深度分布的可行性证据** —— LSS 2020 给出了第一份。 diff --git a/docs/data/cards/extended/paper_alvinn.md b/docs/data/cards/extended/paper_alvinn.md index fd195e2..d15e056 100644 --- a/docs/data/cards/extended/paper_alvinn.md +++ b/docs/data/cards/extended/paper_alvinn.md @@ -5,8 +5,8 @@ title_zh: "ALVINN(最早的神经网络端到端驾驶)" kind: paper tier: S authors: [Pomerleau, D. A.] -venue: "NeurIPS 1989" -year: 1989 +venue: "NeurIPS 1988" +year: 1988 topic: e2e_ad phase: prereq deep_links: @@ -15,7 +15,7 @@ deep_links: # ALVINN(最早的神经网络端到端驾驶) -> 1989 年,Pomerleau 用一个三层全连接网络,直接把 30×32 的相机图像(外加一路激光测距)映射到方向盘转角,让卡车在真实道路上自主行驶。这是端到端驾驶范式的起点:不分模块、不写规则,让一个可训练函数吞掉从像素到控制的全部映射。 +> 1988 年,Pomerleau 用一个三层全连接网络,直接把 30×32 的相机图像(外加一路激光测距)映射到方向盘转角,让卡车在真实道路上自主行驶。这是端到端驾驶范式的起点:不分模块、不写规则,让一个可训练函数吞掉从像素到控制的全部映射。 ## 一个最小公式 / Math anchor $$ @@ -25,19 +25,19 @@ $$ $I\in\mathbb{R}^{30\times 32}$ 是降采样后的灰度图像,$a$ 是离散化为 45 个方向单元的转向输出,$f_\theta$ 是单隐层(29 个隐单元)的全连接网络。训练就是在人类驾驶演示上做行为克隆——一个最朴素的监督回归。这正是 [`paradigm:imitation_learning`](paradigm_imitation_learning.md) 的最小可执行形式。 ## 它在图谱里的位置 -ALVINN 是整条端到端驾驶谱系的源头:它 manifest 了 [`paradigm:imitation_learning`](paradigm_imitation_learning.md),并直接 motivate 了三十年后的 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md)。它与 [模块化感知-规划流水线](paradigm_modular_perception_to_planning_pipeline.md) 形成根本对照:后者把驾驶切成感知/预测/规划逐段手工设计,而 ALVINN 在 1989 年就赌"让一个网络端到端学完"。下游的 [Learning by Cheating](paper_lbc.md)、[TCP](paper_tcp_carla.md)、[TransFuser](../paper_transfuser.md) 都是这条赌注在算力与数据成熟后的兑现。 +ALVINN 是整条端到端驾驶谱系的源头:它 manifest 了 [`paradigm:imitation_learning`](paradigm_imitation_learning.md),并直接 motivate 了三十年后的 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md)。它与 [模块化感知-规划流水线](paradigm_modular_perception_to_planning_pipeline.md) 形成根本对照:后者把驾驶切成感知/预测/规划逐段手工设计,而 ALVINN 在 1988 年就赌"让一个网络端到端学完"。下游的 [Learning by Cheating](paper_lbc.md)、[TCP](paper_tcp_carla.md)、[TransFuser](../paper_transfuser.md) 都是这条赌注在算力与数据成熟后的兑现。 ## 架构 / 方法直觉 -backbone 是 960 输入 → 29 隐 → 45 输出的全连接网络,外加一个 8×32 的"道路强度反馈"输入回路。最关键的工程贡献不是网络本身,而是 Pomerleau 意识到纯演示数据的覆盖太窄——人开车几乎不会偏离车道,于是模型从没见过"如何从偏离中纠回"。他的解法是用图像变换(geometric transformation)人工合成大量"车处于偏移位姿"的样本及对应的纠偏转角,把单条专家轨迹扩成一个覆盖偏移状态的训练分布。这正是后来 [DAgger](paper_ross2011_dagger.md) 与影子模式数据飞轮要解决的同一问题的早期手工版。 +backbone 是一个三层全连接网络:输入层由 30×32 的视频视网膜(960 单元)、一片 8×32 的激光测距视网膜与 1 个道路强度反馈单元拼成,连到 29 个隐单元,再到 45 个输出单元(44 路转角候选 + 1 路道路强度反馈)。最关键的工程贡献不是网络本身,而是 Pomerleau 意识到纯演示数据的覆盖太窄——人开车几乎不会偏离车道,于是模型从没见过"如何从偏离中纠回"。他的解法是用图像变换(geometric transformation)人工合成大量"车处于偏移位姿"的样本及对应的纠偏转角,把单条专家轨迹扩成一个覆盖偏移状态的训练分布。这正是后来 [DAgger](paper_ross2011_dagger.md) 与影子模式数据飞轮要解决的同一问题的早期手工版。 ## 工程上真正要注意什么 -- 1989 年的算力下,整个网络在 Sun-3/160 上以约 2 Hz 推理,车速被压到步行级——端到端不是"快",而是"可行性证明"。 +- 1988 年的算力下,整个网络在 Sun-3/160 上以约 2 Hz 推理,车速被压到步行级——端到端不是"快",而是"可行性证明"。 - 合成偏移样本是成败关键:去掉它,模型一旦轻微偏离就发散,复合误差迅速放大(见 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md))。 - 输入分辨率 30×32 不是性能瓶颈而是算力妥协,提示"端到端"早期受限于硬件而非思想。 - 单一道路类型上训练的网络无法泛化到新路况,泛化问题从第一天起就存在。 ## Bitter-Lesson 视角 -ALVINN 是 Bitter Lesson 在驾驶上提前三十年的预演:它放弃车道线检测、放弃显式几何,用一个端到端学习的函数取代手工管线。它在 1989 年"过早正确"——思想对了,但缺数据、缺算力、缺仿真闭环。后来主流转向模块化感知,直到深度学习与大规模数据回归,[NVIDIA PilotNet](../paper_transfuser.md) 一类工作才让 ALVINN 的赌注真正兑现。教训是:方法的对错与时代的算力供给耦合,"对的太早"和"错"在结果上长期难以区分。 +ALVINN 是 Bitter Lesson 在驾驶上提前三十年的预演:它放弃车道线检测、放弃显式几何,用一个端到端学习的函数取代手工管线。它在 1988 年"过早正确"——思想对了,但缺数据、缺算力、缺仿真闭环。后来主流转向模块化感知,直到深度学习与大规模数据回归,从 NVIDIA PilotNet 到 [TransFuser](../paper_transfuser.md) 一类端到端工作才让 ALVINN 的赌注真正兑现。教训是:方法的对错与时代的算力供给耦合,"对的太早"和"错"在结果上长期难以区分。 ## 接下来读什么 - [Learning by Cheating](paper_lbc.md) — 端到端在仿真里的现代复兴,用特权教师解决数据覆盖 diff --git a/docs/data/cards/extended/paper_chinchilla.md b/docs/data/cards/extended/paper_chinchilla.md index 0ffab74..f9be0e1 100644 --- a/docs/data/cards/extended/paper_chinchilla.md +++ b/docs/data/cards/extended/paper_chinchilla.md @@ -30,7 +30,7 @@ $$ ## 在图谱里的位置 -Chinchilla 是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 的标尺——把"scaling 应该往哪走"从直觉变成可量化的最优比例。它扩展 [GPT-3](paper_gpt3.md) 的 scaling laws 工作(Kaplan 2020),并直接喂养 [`insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models`](insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md) 与 [`insight:scaling_laws_predict_capability_emergence`](insight_scaling_laws_predict_capability_emergence.md) 在驾驶领域的部署判断。它影响所有后续开源大模型的训练配方——[LLaMA](paper_llama.md) 1 部分尊重 Chinchilla 比例,LLaMA 3 进一步把 token 推到远超 Chinchilla 比例(15T token / 70B 参数),是"后 Chinchilla 时代"训练数据驱动的工业新规范。 +Chinchilla 是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 的标尺——把"scaling 应该往哪走"从直觉变成可量化的最优比例。它扩展 [GPT-3](../paper_gpt3.md) 的 scaling laws 工作(Kaplan 2020),并直接喂养 [`insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models`](insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md) 与 [`insight:scaling_laws_predict_capability_emergence`](insight_scaling_laws_predict_capability_emergence.md) 在驾驶领域的部署判断。它影响所有后续开源大模型的训练配方——[LLaMA](paper_llama.md) 1 部分尊重 Chinchilla 比例,LLaMA 3 进一步把 token 推到远超 Chinchilla 比例(15T token / 70B 参数),是"后 Chinchilla 时代"训练数据驱动的工业新规范。 ## 工程上真正要注意什么 @@ -46,7 +46,7 @@ Chinchilla 是 Bitter Lesson 在 LLM 训练上的精细化标尺——它显示" ## 接下来读什么 -- [GPT-3](paper_gpt3.md) — Chinchilla 修正的对象 +- [GPT-3](../paper_gpt3.md) — Chinchilla 修正的对象 - [LLaMA](paper_llama.md) — 第一份在开源上部分遵循 Chinchilla 的工作 - [`insight:scaling_laws_predict_capability_emergence`](insight_scaling_laws_predict_capability_emergence.md) - [`insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models`](insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md) diff --git a/docs/data/cards/extended/paper_ddpm.md b/docs/data/cards/extended/paper_ddpm.md index aa95e18..8f4c3ac 100644 --- a/docs/data/cards/extended/paper_ddpm.md +++ b/docs/data/cards/extended/paper_ddpm.md @@ -36,7 +36,7 @@ $$ ## 在图谱里的位置 -DDPM 是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 中"生成式建模"分支的母版。它通过预测噪声残差直接落在 [`insight:residual_learning_unlocks_arbitrary_depth`](insight_residual_learning_unlocks_arbitrary_depth.md) 的方法学谱系——每一步去噪都是残差学习的具现。它喂养 [Diffusion Policy](paper_diffusion_policy_chi2023.md)(动作扩散)、[Diffuser](paper_diffuser.md)(规划扩散)、[Cosmos](paper_cosmos.md) / [DriveDreamer](paper_drivedreamer.md)(视频世界模型)等多条驾驶相关支线。它实现 [`move:add_noise_then_denoise_for_score_based_generation`](move_add_noise_then_denoise_for_score_based_generation.md) 这条 move。 +DDPM 是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 中"生成式建模"分支的母版。它通过预测噪声残差直接落在 [`insight:residual_learning_unlocks_arbitrary_depth`](insight_residual_learning_unlocks_arbitrary_depth.md) 的方法学谱系——每一步去噪都是残差学习的具现。它喂养 [Diffusion Policy](paper_diffusion_policy_chi2023.md)(动作扩散)、[Diffuser](../paper_diffuser.md)(规划扩散)、[Cosmos](paper_cosmos.md) / [DriveDreamer](../paper_drivedreamer.md)(视频世界模型)等多条驾驶相关支线。它实现 [`move:add_noise_then_denoise_for_score_based_generation`](move_add_noise_then_denoise_for_score_based_generation.md) 这条 move。 ## 工程上真正要注意什么 @@ -54,7 +54,7 @@ DDPM 是 Bitter Lesson 在生成建模上的标志胜利——把图像生成的 ## 接下来读什么 - [Diffusion Policy](paper_diffusion_policy_chi2023.md) — 动作扩散 -- [Diffuser](paper_diffuser.md) — 规划扩散 +- [Diffuser](../paper_diffuser.md) — 规划扩散 - [`insight:residual_learning_unlocks_arbitrary_depth`](insight_residual_learning_unlocks_arbitrary_depth.md) — 残差学习的方法学 - [Cosmos](paper_cosmos.md) — 视频扩散世界模型 - [LoRA](paper_lora.md) — 用残差思想做高效微调 diff --git a/docs/data/cards/extended/paper_depth_anything.md b/docs/data/cards/extended/paper_depth_anything.md index 3dae3f2..f9ae7c2 100644 --- a/docs/data/cards/extended/paper_depth_anything.md +++ b/docs/data/cards/extended/paper_depth_anything.md @@ -38,7 +38,7 @@ Depth Anything 是 [`paradigm:scaling_data_with_self_supervision`](paradigm_scal - 仓库 LiheYoung/Depth-Anything 提供 ViT-S / ViT-B / ViT-L 三档权重,ONNX / TensorRT 部署友好。 - V2 强化了细节锐度(边缘、薄结构)并降低了对训练数据光照偏差的依赖。 - 输出是相对深度(无绝对量纲),下游使用前需用稀疏 LiDAR、立体匹配或已知物体尺寸做尺度对齐。 -- 与 [DINOv2](paper_dinov2.md) 的骨干共享,在自监督预训练 + 半监督蒸馏的组合上和 [DINOv3](paper_2508.10104_dinov3.md) 是兄弟工作。 +- 与 [DINOv2](../paper_dinov2.md) 的骨干共享,在自监督预训练 + 半监督蒸馏的组合上和 [DINOv3](paper_2508.10104_dinov3.md) 是兄弟工作。 - License:Apache-2.0(V1)/ Apache-2.0(V2)。 ## Bitter-Lesson 视角 diff --git a/docs/data/cards/extended/paper_he2015_resnet.md b/docs/data/cards/extended/paper_he2015_resnet.md index 382c4e3..cd04ad8 100644 --- a/docs/data/cards/extended/paper_he2015_resnet.md +++ b/docs/data/cards/extended/paper_he2015_resnet.md @@ -29,7 +29,7 @@ $$ ## 在图谱里的位置 -ResNet 是 [`paradigm:scaling_data_with_self_supervision`](paradigm_scaling_data_with_self_supervision.md) 与 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 的隐式技术地基——没有残差就没有可堆叠到百层的视觉骨干。它直接喂养 [`insight:residual_learning_unlocks_arbitrary_depth`](insight_residual_learning_unlocks_arbitrary_depth.md) 这一跨学科洞察,并作为 [`move:residual_connection`](move_residual_connection.md) 的方法学源头。本图谱里所有 Transformer 块都默认带残差——那是 ResNet 的遗产;[UniAD](paper_2212.10156_uniad.md) / [PlanT](paper_2210.14222_plant.md) 早期的图像 encoder 常用 ResNet,被 [ViT](paper_vit.md) / [DINOv3](paper_2508.10104_dinov3.md) 替换;[Spike-driven Transformer](paper_2307.01694_spike_driven_transformer.md) 专门重排了残差位置以维持脉冲二值性。 +ResNet 是 [`paradigm:scaling_data_with_self_supervision`](paradigm_scaling_data_with_self_supervision.md) 与 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 的隐式技术地基——没有残差就没有可堆叠到百层的视觉骨干。它直接喂养 [`insight:residual_learning_unlocks_arbitrary_depth`](insight_residual_learning_unlocks_arbitrary_depth.md) 这一跨学科洞察,并作为 [`move:residual_connection`](move_residual_connection.md) 的方法学源头。本图谱里所有 Transformer 块都默认带残差——那是 ResNet 的遗产;[UniAD](paper_2212.10156_uniad.md) / [PlanT](paper_2210.14222_plant.md) 早期的图像 encoder 常用 ResNet,被 [ViT](../paper_vit.md) / [DINOv3](paper_2508.10104_dinov3.md) 替换;[Spike-driven Transformer](paper_2307.01694_spike_driven_transformer.md) 专门重排了残差位置以维持脉冲二值性。 ## 工程上真正要注意什么 @@ -47,6 +47,6 @@ ResNet 是"用算法 trick 让算力能继续 scale 下去"的典型——它不 - [`move:residual_connection`](move_residual_connection.md) — 残差作为方法学原语 - [`insight:residual_learning_unlocks_arbitrary_depth`](insight_residual_learning_unlocks_arbitrary_depth.md) — 跨领域复用的统一视角 -- [Vaswani 2017 Transformer](paper_vaswani2017.md) — 在序列模型里复用残差子层 +- [Vaswani 2017 Transformer](../paper_vaswani2017.md) — 在序列模型里复用残差子层 - [DINOv3](paper_2508.10104_dinov3.md) — 自监督替换 ResNet 作为视觉骨干 - [Spike-driven Transformer](paper_2307.01694_spike_driven_transformer.md) — 把残差搬到脉冲网络 diff --git a/docs/data/cards/extended/paper_lbc.md b/docs/data/cards/extended/paper_lbc.md index be323b5..ca2344e 100644 --- a/docs/data/cards/extended/paper_lbc.md +++ b/docs/data/cards/extended/paper_lbc.md @@ -27,7 +27,7 @@ $$ $\phi(s)$ 是仿真器内的特权状态(鸟瞰真值布局、他车位姿),$I(s)$ 是相机像素。关键在阶段二的期望取在**学生自己访问的状态分布** $d_{\pi^{\text{sens}}}$ 上:学生在任意状态都能向"在场"的教师索取监督(教师可在 off-policy 状态上即时给出动作),这等价于一次取之不尽的在线 [DAgger](paper_ross2011_dagger.md),从而把协变量偏移压住。教师还输出**所有高层指令(直行/左转/右转/跟随)下的动作**,学生因此在一帧里就拿到多分支监督。 ## 它在图谱里的位置 -LBC 是 [`paradigm:imitation_learning`](paradigm_imitation_learning.md) 在仿真闭环里的现代复兴,也是 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md) 的一个里程碑实现。它把 [ALVINN](paper_alvinn.md) 三十年前手工合成偏移样本要解决的"覆盖与纠偏"问题,换成"特权教师任意状态可标注"的优雅工程解。它直接 validate 了 [`paper:carla_leaderboard`](paper_carla_leaderboard.md) 作为闭环评测台,并与 [TransFuser](../paper_transfuser.md)、[TCP](paper_tcp_carla.md) 同属 CARLA 强 baseline 谱系。蒸馏思想上它与 [GameFormer](paper_gameformer.md) 的层级师生回路遥相呼应。 +LBC 是 [`paradigm:imitation_learning`](paradigm_imitation_learning.md) 在仿真闭环里的现代复兴,也是 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md) 的一个里程碑实现。它把 [ALVINN](paper_alvinn.md) 三十年前手工合成偏移样本要解决的"覆盖与纠偏"问题,换成"特权教师任意状态可标注"的优雅工程解。它在 CARLA 原始 benchmark 与 NoCrash 协议上做闭环评测([CARLA Leaderboard](paper_carla_leaderboard.md) 是这条闭环评测脉络后来的标准化延续),并与 [TransFuser](../paper_transfuser.md)、[TCP](paper_tcp_carla.md) 同属 CARLA 强 baseline 谱系。蒸馏思想上它与 [GameFormer](paper_gameformer.md) 的层级师生回路遥相呼应。 ## 架构 / 方法直觉 两个 agent 共享一套高层指令接口。**教师**输入鸟瞰真值栅格(道路、车道、他车、红绿灯),输出未来若干 waypoint,因为没有感知噪声,它能学得极稳。**学生**是相机 CNN,输出同样的 waypoint,再交给一个简单的 PID/横纵向控制器跟踪。蒸馏不是只对最终选定动作做监督,而是对教师在**每个可能指令分支**上的输出都做监督——这把"白盒教师"当成一个可在任意状态、任意条件下查询的标注器,信息密度远高于一条 on-policy 专家轨迹。学生因此既见到了正常行驶,也见到了自己偏离后教师给出的纠偏动作。 diff --git a/docs/data/cards/extended/paper_llama.md b/docs/data/cards/extended/paper_llama.md index 05ceb92..b0667ee 100644 --- a/docs/data/cards/extended/paper_llama.md +++ b/docs/data/cards/extended/paper_llama.md @@ -31,7 +31,7 @@ $N$ 是参数量,$D$ 是 token 数,$C$ 是固定算力预算。Chinchilla ## 在图谱里的位置 -LLaMA 系列是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 在开源侧的支柱,把 [`insight:open_weight_release_compounds_research_velocity`](insight_open_weight_release_compounds_research_velocity.md) 推到极致。它和 [GPT-3](paper_gpt3.md) / [GPT-4](paper_gpt4.md) 形成"闭源 SOTA vs 开源基线"的对照;和 [Mistral](paper_mistral.md) / [Qwen](paper_qwen.md) 是同时代的开源同侪;为 [LLaVA](paper_llava.md)、[Agent-Driver](paper_2311.10813_agent_driver.md)、[OpenVLA](paper_openvla.md) 提供文本主干。它直接催生 [`move:gather_diverse_pretraining_data_then_filter_by_quality`](move_gather_diverse_pretraining_data_then_filter_by_quality.md) 的工程范式。 +LLaMA 系列是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 在开源侧的支柱,把 [`insight:open_weight_release_compounds_research_velocity`](insight_open_weight_release_compounds_research_velocity.md) 推到极致。它和 [GPT-3](../paper_gpt3.md) / [GPT-4](paper_gpt4.md) 形成"闭源 SOTA vs 开源基线"的对照;和 [Mistral](paper_mistral.md) / [Qwen](paper_qwen.md) 是同时代的开源同侪;为 [LLaVA](../paper_llava.md)、[Agent-Driver](paper_2311.10813_agent_driver.md)、[OpenVLA](paper_openvla.md) 提供文本主干。它直接催生 [`move:gather_diverse_pretraining_data_then_filter_by_quality`](move_gather_diverse_pretraining_data_then_filter_by_quality.md) 的工程范式。 ## 工程上真正要注意什么 @@ -47,7 +47,7 @@ LLaMA 是 Bitter Lesson 在 NLP 上的标志性胜利——架构创新很少( ## 接下来读什么 -- [GPT-3](paper_gpt3.md) — 闭源母版 +- [GPT-3](../paper_gpt3.md) — 闭源母版 - [Mistral / Mixtral](paper_mistral.md) — 同时代的稀疏专家路线 - [Qwen](paper_qwen.md) — 同时代的中文开源主力 - [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) diff --git a/docs/data/cards/extended/paper_qwen.md b/docs/data/cards/extended/paper_qwen.md index 91caf4a..dfdf751 100644 --- a/docs/data/cards/extended/paper_qwen.md +++ b/docs/data/cards/extended/paper_qwen.md @@ -35,7 +35,7 @@ $$ ## 在图谱里的位置 -Qwen 系列是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 中"中文 + 多模态开源"的代表,把 [`insight:open_weight_release_compounds_research_velocity`](insight_open_weight_release_compounds_research_velocity.md) 推到中文驾驶 VLM 上。它和 [LLaMA](paper_llama.md) 形成"中文 vs 英文"开源对照;和 [LLaVA](paper_llava.md) 形成"统一 LM + 视觉 token vs 投影桥接"的架构对照;并为国内 [Agent-Driver](paper_2311.10813_agent_driver.md) 类工作提供文本主干。它响应 [`insight:tokenization_collapses_modality_gap`](insight_tokenization_collapses_modality_gap.md) 在中文多模态上的具现。 +Qwen 系列是 [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 中"中文 + 多模态开源"的代表,把 [`insight:open_weight_release_compounds_research_velocity`](insight_open_weight_release_compounds_research_velocity.md) 推到中文驾驶 VLM 上。它和 [LLaMA](paper_llama.md) 形成"中文 vs 英文"开源对照;和 [LLaVA](../paper_llava.md) 形成"统一 LM + 视觉 token vs 投影桥接"的架构对照;并为国内 [Agent-Driver](paper_2311.10813_agent_driver.md) 类工作提供文本主干。它响应 [`insight:tokenization_collapses_modality_gap`](insight_tokenization_collapses_modality_gap.md) 在中文多模态上的具现。 ## 工程上真正要注意什么 @@ -53,6 +53,6 @@ Qwen 是 Bitter Lesson 在中文 NLP 上的副本:用通用 Transformer + 海 - [LLaMA](paper_llama.md) — 英文开源对照 - [Mistral / Mixtral](paper_mistral.md) — 稀疏专家路线 -- [LLaVA](paper_llava.md) — 视觉投影桥接的对照架构 +- [LLaVA](../paper_llava.md) — 视觉投影桥接的对照架构 - [Agent-Driver](paper_2311.10813_agent_driver.md) — 国内 LLM 驾驶 agent 的代表 - [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) diff --git a/docs/data/cards/extended/paper_schulman2016_gae.md b/docs/data/cards/extended/paper_schulman2016_gae.md index bce4420..2e2b186 100644 --- a/docs/data/cards/extended/paper_schulman2016_gae.md +++ b/docs/data/cards/extended/paper_schulman2016_gae.md @@ -26,7 +26,7 @@ $$ $\delta_t$ 是单步 TD 残差(用学到的价值函数 $V$ 做 baseline)。GAE 把未来一系列 TD 残差按 $(\gamma\lambda)^l$ 指数加权累加。两个极端值揭示它的本质:$\lambda=0$ 时 $\hat{A}_t=\delta_t$,退化为单步 TD,偏差最大、方差最小;$\lambda=1$ 时 $\hat{A}_t=\sum_l \gamma^l r_{t+l}-V(s_t)$,退化为蒙特卡洛优势,无偏但方差随 horizon 爆炸。中间的 $\lambda$(实践中常取 0.95–0.98)在这条谱上取一个甜点。 ## 它在图谱里的位置 -GAE 是 on-policy 策略梯度的"优势估计标准件",直接喂给 [TRPO](paper_schulman2015_trpo.md) 与 [PPO](paper_schulman2017_ppo.md)——这两篇论文报告的实验全部默认开启 GAE。它与 [DQN](paper_mnih2015_dqn.md) 那条 value-based / off-policy 路线形成对照:GAE 假设数据来自当前策略(on-policy),而 DQN 用 replay buffer 复用旧数据。它也是任何"actor-critic + 自举价值函数"框架的母版,[SAC](paper_sac.md) 虽走 off-policy 路线但同样依赖"用 critic 当 baseline 降方差"这一核心思想。 +GAE 是 on-policy 策略梯度的"优势估计标准件":GAE 论文本身就用 [TRPO](paper_schulman2015_trpo.md) 作为优化器做实验,此后它成为 [PPO](paper_schulman2017_ppo.md) 等 on-policy 方法的默认优势估计(原始 TRPO 在 2015 年尚用 single-path / vine 估计,2016 年的 GAE 才补上这一环)。它与 [DQN](paper_mnih2015_dqn.md) 那条 value-based / off-policy 路线形成对照:GAE 假设数据来自当前策略(on-policy),而 DQN 用 replay buffer 复用旧数据。它也是任何"actor-critic + 自举价值函数"框架的母版,[SAC](paper_sac.md) 虽走 off-policy 路线但同样依赖"用 critic 当 baseline 降方差"这一核心思想。 ## 架构 / 方法直觉 GAE 不是一个新网络,而是一个估计量。它把两个独立的近似误差解耦:价值函数 $V$ 的偏差,与采样回报的方差。$\gamma$ 是 MDP 自带的折扣因子(决定"看多远"),$\lambda$ 是 GAE 额外引入的"信用分配长度"旋钮(决定"信任 bootstrap 多深")。关键洞察是:如果 $V$ 已经相当准,就该多信任它(小 $\lambda$,少累加真实回报);如果 $V$ 很糙,就该多用真实回报纠偏(大 $\lambda$)。实现上它等价于对 TD($\lambda$) 的 eligibility-trace 做反向递推,一行 `adv = delta + gamma*lam*adv` 从轨迹末尾往前扫即可,几乎零额外开销。 diff --git a/docs/data/cards/extended/paper_tesla_ai_day.md b/docs/data/cards/extended/paper_tesla_ai_day.md index b34d040..be7b318 100644 --- a/docs/data/cards/extended/paper_tesla_ai_day.md +++ b/docs/data/cards/extended/paper_tesla_ai_day.md @@ -20,7 +20,7 @@ deep_links: ## 在图谱里的位置 -Tesla AI Day 是 [`paradigm:camera_first_autonomy`](paradigm_camera_first_autonomy.md) 与 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 在工业上最完整的实例——纯相机量产证据 + 数据引擎闭环。它直接喂养 [`paper:occupancy_networks_tesla`](paper_occupancy_networks_tesla.md)(AI Day 公开的占用网络);为 [`insight:data_engine_loop_is_more_valuable_than_static_dataset`](insight_data_engine_loop_is_more_valuable_than_static_dataset.md) 提供唯一公开的工业级证据;并被 [UniAD](paper_2212.10156_uniad.md)、[BEVFormer](paper_li2022bevformer.md) 等学术栈作为"端到端可能性"的存在性证明。 +Tesla AI Day 是 [`paradigm:camera_first_autonomy`](paradigm_camera_first_autonomy.md) 与 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 在工业上最完整的实例——纯相机量产证据 + 数据引擎闭环。它直接喂养 [`paper:occupancy_networks_tesla`](paper_occupancy_networks_tesla.md)(AI Day 公开的占用网络);为 [`insight:data_engine_loop_is_more_valuable_than_static_dataset`](insight_data_engine_loop_is_more_valuable_than_static_dataset.md) 提供唯一公开的工业级证据;并被 [UniAD](paper_2212.10156_uniad.md)、[BEVFormer](../paper_li2022bevformer.md) 等学术栈作为"端到端可能性"的存在性证明。 ## 一个最小公式 / Math anchor diff --git a/docs/data/cards/extended/paradigm_camera_first_autonomy.md b/docs/data/cards/extended/paradigm_camera_first_autonomy.md index d2137c8..c752091 100644 --- a/docs/data/cards/extended/paradigm_camera_first_autonomy.md +++ b/docs/data/cards/extended/paradigm_camera_first_autonomy.md @@ -45,7 +45,7 @@ flowchart LR | 工作 | 在范式里的角色 | |---|---| | [Lift-Splat-Shoot](paper_lift_splat_shoot.md) | 第一份可微的"像素到 BEV"提升算子 | -| [BEVFormer](paper_li2022bevformer.md) | 把 BEV 升维写成时空 cross-attention | +| [BEVFormer](../paper_li2022bevformer.md) | 把 BEV 升维写成时空 cross-attention | | [BEVFusion](paper_bevfusion.md) | 在共享 BEV 空间里融合相机与 LiDAR | | [DETR3D](paper_detr3d.md) | 用稀疏对象 query 直接出 3D 检测,跳过显式 BEV | | [StreamPETR](paper_streampetr.md) | 把 query 作为循环状态跨帧传递,流式长时序感知 | @@ -57,8 +57,8 @@ flowchart LR - **多相机标定与同步**:八路相机的厘米级外参误差会被 2D→3D 升维算子放大成分米级 BEV 错位,直接压缩规划器的安全裕度;微秒级硬同步缺失会让高速运动时不同视角的同一目标被解读为多个目标,出现"时间混叠"的虚假距离。这两件事是相机优先方案的隐性硬约束。 - **海量驾驶视频**:百万小时未标注视频是训练时序网络与自监督主干的燃料,数量级低于这条线时, [`insight:temporal_aggregation_buys_what_depth_sensor_buys`](insight_temporal_aggregation_buys_what_depth_sensor_buys.md) 描述的"用时序换深度"就不成立。 - **可微的 2D→3D 提升算子**:见 [把 2D 图像特征抬升到 3D](move_lift_2d_to_3d.md)。LSS 一族的关键不是几何精确,而是让深度分布作为可学习张量进入梯度回路。 -- **强主干**:[ViT](paper_vit.md) 提供可扩展架构,[DINOv3](paper_2508.10104_dinov3.md) 提供自监督特征。 -- **集合预测接口**:由 [DETR](paper_carion2020.md) 提供,让 BEV 上的对象输出可微。 +- **强主干**:[ViT](../paper_vit.md) 提供可扩展架构,[DINOv3](paper_2508.10104_dinov3.md) 提供自监督特征。 +- **集合预测接口**:由 [DETR](../paper_carion2020.md) 提供,让 BEV 上的对象输出可微。 - **占用真值的自动产线**:用离线大模型 + LiDAR 把占用栅格自动标出,再让车端纯相机网络去拟合。这条产线决定了"占用替代检测框"能否摆脱人工标注的瓶颈。 ## 未解决的痛点 @@ -78,9 +78,9 @@ flowchart LR ## 一条研究路径建议 -1. 把 [Lift-Splat-Shoot](paper_lift_splat_shoot.md) 与 [BEVFormer](paper_li2022bevformer.md) 两篇逐行读完,把"升维"这一算子在自己的代码里写一遍。 +1. 把 [Lift-Splat-Shoot](paper_lift_splat_shoot.md) 与 [BEVFormer](../paper_li2022bevformer.md) 两篇逐行读完,把"升维"这一算子在自己的代码里写一遍。 2. 跑 [`labs/lab05_dinov3_features_minidata`](../../../labs/lab05_dinov3_features_minidata.ipynb),感受冻结自监督主干给下游 BEV 头的实际增益。 -3. 在 nuScenes 上把 [BEVFormer](paper_li2022bevformer.md) baseline 跑出 mAP / NDS,然后逐步替换主干、加时序、加占用头。 +3. 在 nuScenes 上把 [BEVFormer](../paper_li2022bevformer.md) baseline 跑出 mAP / NDS,然后逐步替换主干、加时序、加占用头。 4. 阅读 [Tesla AI Day](paper_tesla_ai_day.md) 的占用网络部分,理解工业部署的真实瓶颈是延迟与误检率,不是 mAP。 5. 跟 [BEV 融合模块化感知流水线再发现](validation_trace_modular_perception_pipeline_with_bev_fusion.md) 对照,把工业落地的接口约束补回设计里。 6. 走向开放问题:把 [神经场景重建](paradigm_neural_scene_reconstruction_as_engine.md) 与相机优先合流,试验"重建即仿真即数据增广"。 diff --git a/docs/data/cards/extended/paradigm_counterfactual_data_centric_safety.md b/docs/data/cards/extended/paradigm_counterfactual_data_centric_safety.md index 6158f3b..b5221ab 100644 --- a/docs/data/cards/extended/paradigm_counterfactual_data_centric_safety.md +++ b/docs/data/cards/extended/paradigm_counterfactual_data_centric_safety.md @@ -17,11 +17,11 @@ | [SHIFT](paper_shift_dataset.md) | 早期的 sim-to-real 中跨域偏移数据集 | | [V2X-Sim](paper_v2x_sim.md) | 多代理协同的合成数据 | | Tesla 数据引擎 | 工业届的工程化先例 (auto-labelling + scenario mining) | -| [DriveDreamer](paper_drivedreamer.md) | 提供条件化生成的视频底座 | +| [DriveDreamer](../paper_drivedreamer.md) | 提供条件化生成的视频底座 | ## 必备组件 / Required building blocks -* 一个高保真世界/视频生成模型:[GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md) / [Cosmos](paper_cosmos.md)。 +* 一个高保真世界/视频生成模型:[GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md) / [Cosmos](paper_cosmos.md)。 * 反事实编辑算子:能在场景里"插入、删除、移位"对象。`move:augment_via_counterfactual_object_insertion`。 * 闭环评估器:在合成场景里能跑完整规划→控制 loop,看模型怎样反应。 * 故障案例本体:建立一个"危险情形"的形式化字典,用于条件化生成。 @@ -49,6 +49,6 @@ ## 推荐起步路径 1. [CF-VLA 卡片](paper_2512.24426_cfvla.md):把这条范式的当代代表读熟。 -2. [GAIA-1 / DriveDreamer](paper_drivedreamer.md):理解世界模型作为合成器。 +2. [GAIA-1 / DriveDreamer](../paper_drivedreamer.md):理解世界模型作为合成器。 3. 读 [`insight:long_tail_solved_by_synthesis_not_data_alone`](insight_long_tail_solved_by_synthesis_not_data_alone.md)。 4. 跑 [`labs/lab10_cfvla_counterfactual_replanner`](../../../labs/lab10_cfvla_counterfactual_replanner.ipynb)。 diff --git a/docs/data/cards/extended/paradigm_differentiable_end_to_end_imitation.md b/docs/data/cards/extended/paradigm_differentiable_end_to_end_imitation.md index c535a43..6e8f547 100644 --- a/docs/data/cards/extended/paradigm_differentiable_end_to_end_imitation.md +++ b/docs/data/cards/extended/paradigm_differentiable_end_to_end_imitation.md @@ -42,16 +42,16 @@ flowchart LR |---|---|---| | [UniAD](paper_2212.10156_uniad.md) | dense BEV + 共享 query | 显式 5 个任务头共用 BEV | | [PlanT](paper_2210.14222_plant.md) | 对象级稀疏 token | 赌"司机本来只看少数对象" | -| [VADv2](paper_vadv2.md) | 向量化 + 概率规划 | 用向量化替代 BEV,规划改成概率分布 | -| [TransFuser](paper_transfuser.md) | 多模态融合 BC | 早期奠基;融合 LiDAR-camera | +| [VADv2](../paper_vadv2.md) | 向量化 + 概率规划 | 用向量化替代 BEV,规划改成概率分布 | +| [TransFuser](../paper_transfuser.md) | 多模态融合 BC | 早期奠基;融合 LiDAR-camera | | [InterFuser](paper_interfuser.md) | 显式中间监督 | 在 CARLA 上加强可解释性 | | [DriveVLM](paper_2402.12289_drivevlm.md) | 在 UniAD 之上接 VLM | 把语言推理叠在端到端骨架之上 | ## 这条范式靠什么活下来 / Conditions that make it work * 大规模真实驾驶日志(nuScenes、Waymo Open Motion、Argoverse 2、内部车队) -* 可微 BEV 表征([BEVFormer](paper_li2022bevformer.md) 提供事实标准) -* 可微集合预测 query([DETR](paper_carion2020.md) 起源) +* 可微 BEV 表征([BEVFormer](../paper_li2022bevformer.md) 提供事实标准) +* 可微集合预测 query([DETR](../paper_carion2020.md) 起源) * 充足算力(≥ 8×A100 训一次) * 离线指标 + 闭环仿真协同评估([nuPlan](paper_nuplan.md)、[NAVSIM](paper_navsim.md)、[CARLA Leaderboard](paper_carla_leaderboard.md)) @@ -71,7 +71,7 @@ flowchart LR ## 一条研究路径建议 / A starter trail -1. 把 [BEVFormer](paper_li2022bevformer.md) 与 [DETR](paper_carion2020.md) 的卡片读熟,理解共享 query 与集合预测。 +1. 把 [BEVFormer](../paper_li2022bevformer.md) 与 [DETR](../paper_carion2020.md) 的卡片读熟,理解共享 query 与集合预测。 2. 跑 [`lab03_uniad_query_intuition`](../../../labs/lab03_uniad_query_intuition.ipynb),亲手观察"是否共用 query"对联合性能的影响。 3. 阅读 [PlanT 卡片](paper_2210.14222_plant.md),理解"稀疏对象级"作为对照实验。 4. 把视线投向 [`validation:trace_unified_planning_oriented_e2e_driving`](validation_trace_unified_planning_oriented_e2e_driving.md),看看从更小的零件如何把这一切再推演一次。 diff --git a/docs/data/cards/extended/paradigm_foundation_model_axis.md b/docs/data/cards/extended/paradigm_foundation_model_axis.md index b6dec67..71552b5 100644 --- a/docs/data/cards/extended/paradigm_foundation_model_axis.md +++ b/docs/data/cards/extended/paradigm_foundation_model_axis.md @@ -4,7 +4,7 @@ ## 这条范式押注什么 -1. **同一套底层架构能横跨所有模态**:transformer 主干 + tokenizer 适配层 + 多模态 cross-attention,足以覆盖文本、图像、视频、动作。这条押注由 Vaswani 等人最初的 attention 论文打开,被 [GPT-3](paper_gpt3.md)、ViT、CLIP、LLaVA 沿同一架构反复验证。 +1. **同一套底层架构能横跨所有模态**:transformer 主干 + tokenizer 适配层 + 多模态 cross-attention,足以覆盖文本、图像、视频、动作。这条押注由 Vaswani 等人最初的 attention 论文打开,被 [GPT-3](../paper_gpt3.md)、ViT、CLIP、LLaVA 沿同一架构反复验证。 2. **预训练胜过任务定制**:通用基础模型在下游任务上的能力,往往超过同算力的任务专用模型,参见 [`insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models`](insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md)。 3. **下游模型是底座的轻量微调**:DriveVLM、Agent-Driver、CF-VLA 在驾驶领域的成功都依附于通用底座,证明驾驶不该再单独训练巨型主干。 4. **跨模态对齐是免费的入场券**:一旦视觉与语言在 CLIP 阶段对齐,下游 VLA 可以省去重新对齐的成本。 @@ -14,12 +14,12 @@ | 轴上节点 | 角色 | |---|---| -| [GPT-3](paper_gpt3.md) / [GPT-4](paper_gpt4.md) | 语言侧的封闭旗舰 | +| [GPT-3](../paper_gpt3.md) / [GPT-4](paper_gpt4.md) | 语言侧的封闭旗舰 | | [LLaMA 系列](paper_llama.md) | 语言侧的开源主力 | | [CLIP](paper_clip.md) | 视觉—语言对齐底座 | -| [LLaVA / Qwen-VL](paper_llava.md) | 开源 VLM 样板 | -| [SAM](paper_sam.md) | 视觉分割侧的基础模型 | -| [Cosmos](paper_cosmos.md) / [GAIA-1](paper_gaia1.md) | 视频与世界模型侧基础模型 | +| [LLaVA / Qwen-VL](../paper_llava.md) | 开源 VLM 样板 | +| [SAM](../paper_sam.md) | 视觉分割侧的基础模型 | +| [Cosmos](paper_cosmos.md) / [GAIA-1](../paper_gaia1.md) | 视频与世界模型侧基础模型 | | [DriveVLM](paper_2402.12289_drivevlm.md) / [EMMA](paper_emma.md) | 驾驶侧 VLA | | [CF-VLA](paper_2512.24426_cfvla.md) | 反事实增强的 VLA | | [Agent-Driver](paper_2311.10813_agent_driver.md) / [DiLu](paper_2309.16292_dilu.md) | 把基础模型包成驾驶 agent | @@ -49,7 +49,7 @@ ## 一条研究路径建议 1. 读 [`insight:scaling_laws_predict_capability_emergence`](insight_scaling_laws_predict_capability_emergence.md) 与 [`validation:trace_few_shot_in_context_learning_at_scale`](validation_trace_few_shot_in_context_learning_at_scale.md),建立 scaling 的直觉。 -2. 沿 [GPT-3](paper_gpt3.md) → [CLIP](paper_clip.md) → [LLaVA](paper_llava.md) 顺序读三份经典,对齐 scaling 与多模态的演化。 +2. 沿 [GPT-3](../paper_gpt3.md) → [CLIP](paper_clip.md) → [LLaVA](../paper_llava.md) 顺序读三份经典,对齐 scaling 与多模态的演化。 3. 读 [DriveVLM 卡片](paper_2402.12289_drivevlm.md) 与 [`validation:trace_vision_language_action_dual_loop`](validation_trace_vision_language_action_dual_loop.md),看基础模型如何被嫁接到驾驶。 4. 跑一次开源 VLM 的下游微调,体会"通用 → 专用"的劳动量分布。 5. 对比 [`paradigm:vla_paradigm`](paradigm_vla_paradigm.md) 与 [`paradigm:llm_agent_paradigm`](paradigm_llm_agent_paradigm.md),理解动作直接生成与工具循环两种落地路径。 diff --git a/docs/data/cards/extended/paradigm_foundation_model_zero_shot_driving_agent.md b/docs/data/cards/extended/paradigm_foundation_model_zero_shot_driving_agent.md index 5c57ad6..577bffe 100644 --- a/docs/data/cards/extended/paradigm_foundation_model_zero_shot_driving_agent.md +++ b/docs/data/cards/extended/paradigm_foundation_model_zero_shot_driving_agent.md @@ -13,8 +13,8 @@ | 工作 | 角色 | |---|---| -| [GPT-3](paper_gpt3.md) | scaling laws 与 in-context learning 的奠基 | -| [LLaVA / Qwen-VL](paper_llava.md) | 把视觉接到语言模型的开源样板 | +| [GPT-3](../paper_gpt3.md) | scaling laws 与 in-context learning 的奠基 | +| [LLaVA / Qwen-VL](../paper_llava.md) | 把视觉接到语言模型的开源样板 | | [Agent-Driver](paper_2311.10813_agent_driver.md) | LLM 作为驾驶决策核心的第一份完整工作 | | [DiLu](paper_2309.16292_dilu.md) | 知识驱动 + 反思循环 | | [DriveVLM / DriveVLM-Dual](paper_2402.12289_drivevlm.md) | dual-system 范式的代表 | @@ -55,6 +55,6 @@ ## 推荐起步 -1. [GPT-3 卡片](paper_gpt3.md) → [LLaVA 卡片](paper_llava.md) → [DriveVLM 卡片](paper_2402.12289_drivevlm.md)。 +1. [GPT-3 卡片](../paper_gpt3.md) → [LLaVA 卡片](../paper_llava.md) → [DriveVLM 卡片](paper_2402.12289_drivevlm.md)。 2. 跑 [`labs/lab07_dilu_llm_decision_loop`](../../../labs/lab07_dilu_llm_decision_loop.ipynb) 与 [`labs/lab08_agent_driver_tool_calling`](../../../labs/lab08_agent_driver_tool_calling.ipynb)。 3. 读 [`validation:trace_vision_language_action_dual_loop`](validation_trace_vision_language_action_dual_loop.md) 与 [`validation:trace_counterfactual_vla_replanner`](validation_trace_counterfactual_vla_replanner.md)。 diff --git a/docs/data/cards/extended/paradigm_imitation_learning.md b/docs/data/cards/extended/paradigm_imitation_learning.md index 5281318..1f16ae3 100644 --- a/docs/data/cards/extended/paradigm_imitation_learning.md +++ b/docs/data/cards/extended/paradigm_imitation_learning.md @@ -16,7 +16,7 @@ |---|---|---| | [DAgger](paper_ross2011_dagger.md) | 在线 BC | 用专家迭代标注当前策略下的状态,治协变量偏移 | | [Diffusion Policy](paper_diffusion_policy_chi2023.md) | 生成式 BC | 用扩散模型生成动作序列,自然表达多模态 | -| [TransFuser](paper_transfuser.md) | 多模态融合 BC | 在 CARLA 上融合 LiDAR-camera,端到端模仿 | +| [TransFuser](../paper_transfuser.md) | 多模态融合 BC | 在 CARLA 上融合 LiDAR-camera,端到端模仿 | | [InterFuser](paper_interfuser.md) | 显式监督 + Transformer | 加中间监督头提升 CARLA 可解释性 | ## 靠什么活下来 @@ -48,5 +48,5 @@ 1. 在 CartPole 上手写 BC,故意让训练集只覆盖正常区间,亲眼看测试时漂移如何累积。 2. 实现 [DAgger](paper_ross2011_dagger.md),对比 BC 在长 horizon 任务上的差距,对应读 [再发现:DAgger](validation_trace_dataset_aggregation_for_imitation.md)。 3. 跑 [Diffusion Policy](paper_diffusion_policy_chi2023.md),理解多模态损失的工程价值。 -4. 阅读 [TransFuser](paper_transfuser.md) 与 [InterFuser](paper_interfuser.md) 两份 CARLA 上的模仿基线。 +4. 阅读 [TransFuser](../paper_transfuser.md) 与 [InterFuser](paper_interfuser.md) 两份 CARLA 上的模仿基线。 5. 把视线投到 [洞察:模仿学习无法从复合误差中自我恢复](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md),理解模仿学习的原则性边界。 diff --git a/docs/data/cards/extended/paradigm_llm_agent_paradigm.md b/docs/data/cards/extended/paradigm_llm_agent_paradigm.md index 8d8f817..e18dc23 100644 --- a/docs/data/cards/extended/paradigm_llm_agent_paradigm.md +++ b/docs/data/cards/extended/paradigm_llm_agent_paradigm.md @@ -28,7 +28,7 @@ - **结构化工具接口**:每个工具都需要明确的 input / output schema,否则 LM 解析失败率激增。 - **场景与经验库**:驾驶 agent 离不开案例库与交规库,参见 [DiLu](paper_2309.16292_dilu.md) 的设计。 - **检索与记忆系统**:向量检索 + 长期事件日志构成 agent 的"海马体"。 -- **CoT 推理的稳定性**:参考 [GPT-3](paper_gpt3.md) 后续工作给出的 CoT prompting 经验,链式推理需要严格的格式约束。 +- **CoT 推理的稳定性**:参考 [GPT-3](../paper_gpt3.md) 后续工作给出的 CoT prompting 经验,链式推理需要严格的格式约束。 - **延迟预算**:每次循环至少几秒,因此 agent 通常作为慢回路而非主回路,参见 [`insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control`](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md)。 ## 未解决的痛点 diff --git a/docs/data/cards/extended/paradigm_model_based_world_imagination_planning.md b/docs/data/cards/extended/paradigm_model_based_world_imagination_planning.md index aa0d6a9..a765b15 100644 --- a/docs/data/cards/extended/paradigm_model_based_world_imagination_planning.md +++ b/docs/data/cards/extended/paradigm_model_based_world_imagination_planning.md @@ -16,8 +16,8 @@ | [World Models (Ha & Schmidhuber 2018)](paper_world_models.md) | 把这一思路在像素级别上证伪 | | [Dreamer V1–V3](paper_dreamer_v3.md) | latent imagination 训练,证明从图像观测可以恢复出可用世界模型 | | [MuZero](paper_muzero.md) | 在没有显式环境模型时也能蒸馏出一个隐式模型 + 搜索 | -| [GAIA-1](paper_gaia1.md) | 视频生成世界模型应用到驾驶 | -| [DriveDreamer](paper_drivedreamer.md) | 控制条件化的驾驶视频扩散世界模型 | +| [GAIA-1](../paper_gaia1.md) | 视频生成世界模型应用到驾驶 | +| [DriveDreamer](../paper_drivedreamer.md) | 控制条件化的驾驶视频扩散世界模型 | | [Cosmos](paper_cosmos.md) | NVIDIA 的物理世界基础模型 | | [CF-VLA](paper_2512.24426_cfvla.md) | 把世界模型作为反事实重规划的批评者 | @@ -53,7 +53,7 @@ ## 推荐一条起步路径 / Starter trail 1. [World Models 1.0 卡片](paper_world_models.md):奠基直觉。 -2. [GAIA-1 卡片](paper_gaia1.md):第一份大规模驾驶视频世界模型证据。 +2. [GAIA-1 卡片](../paper_gaia1.md):第一份大规模驾驶视频世界模型证据。 3. 跑一个最小 Dreamer 复现,验证 imagination rollout 可用。 4. 读 [CF-VLA](paper_2512.24426_cfvla.md) 看世界模型作为评估器的现实应用。 5. 把它跟 [`paradigm:differentiable_end_to_end_imitation`](paradigm_differentiable_end_to_end_imitation.md) 拼成 hybrid。 diff --git a/docs/data/cards/extended/paradigm_neural_scene_reconstruction_as_engine.md b/docs/data/cards/extended/paradigm_neural_scene_reconstruction_as_engine.md index c40529f..5283fea 100644 --- a/docs/data/cards/extended/paradigm_neural_scene_reconstruction_as_engine.md +++ b/docs/data/cards/extended/paradigm_neural_scene_reconstruction_as_engine.md @@ -20,8 +20,8 @@ | [3D Gaussian Splatting](paper_3dgs.md) | 用显式高斯椭球替换 MLP,渲染速度提升两个数量级 | | [EmerNeRF](paper_emernerf.md) | 驾驶场景的自监督动静解耦神经辐射场 | | [DrivingGaussian](paper_drivinggaussian.md) | 把高斯泼溅扩展到动态驾驶场景 | -| [GAIA-1](paper_gaia1.md) | 不走重建路径,而用生成式视频世界模型;构成对位 | -| [DriveDreamer](paper_drivedreamer.md) | 控制条件化的视频扩散世界模型,与重建路线互补 | +| [GAIA-1](../paper_gaia1.md) | 不走重建路径,而用生成式视频世界模型;构成对位 | +| [DriveDreamer](../paper_drivedreamer.md) | 控制条件化的视频扩散世界模型,与重建路线互补 | | [Cosmos](paper_cosmos.md) | NVIDIA 把物理与重建合流的基础模型尝试 | ## 靠什么活下来 @@ -29,7 +29,7 @@ - **多传感器同步驾驶日志**:相机、LiDAR、IMU、轮速、GPS 在毫秒级对齐。 - **快速可微渲染器**:CUDA 实现的栅格化或 ray marching 加速。 - **动静分割与建图工具链**:基于 LiDAR 流的运动物体剔除、长时序静态地图融合。 -- **强 2D 主干提供初始化特征**:[DINOv3](paper_2508.10104_dinov3.md) 与 [SAM](paper_sam.md) 等给重建提供语义先验。 +- **强 2D 主干提供初始化特征**:[DINOv3](paper_2508.10104_dinov3.md) 与 [SAM](../paper_sam.md) 等给重建提供语义先验。 - **跨场景的相机轨迹规划**:为了仿真使用,需要能在重建出的场景里采样新轨迹。 - **大算力**:一个城市路段的高斯重建动辄数千个高斯椭球,十几秒视频要数小时优化。 @@ -43,7 +43,7 @@ ## 与其它范式的关系 -- 与 [把规划放到世界模型的想象里](paradigm_model_based_world_imagination_planning.md) 是孪生:前者用显式重建,后者用隐式视频生成;两条线在 [GAIA-1](paper_gaia1.md) 与 [DriveDreamer](paper_drivedreamer.md) 短暂合流。 +- 与 [把规划放到世界模型的想象里](paradigm_model_based_world_imagination_planning.md) 是孪生:前者用显式重建,后者用隐式视频生成;两条线在 [GAIA-1](../paper_gaia1.md) 与 [DriveDreamer](../paper_drivedreamer.md) 短暂合流。 - 与 [闭环数据引擎中心化开发](paradigm_closed_loop_data_engine_centric_development.md) 互补:神经重建是数据引擎里"长尾合成"模块的最强工具。 - 是 [相机优先的自动驾驶](paradigm_camera_first_autonomy.md) 的训练侧支撑:重建出的场景给纯相机网络提供合成多视角数据。 - 与 [反事实数据中心化安全](paradigm_counterfactual_data_centric_safety.md) 紧密配合:数字孪生天然是反事实扰动的载体。 diff --git a/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md b/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md index 6e9e2d6..580ba9f 100644 --- a/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md +++ b/docs/data/cards/extended/paradigm_scaling_data_with_self_supervision.md @@ -17,7 +17,7 @@ | [BYOL / SimSiam](paper_byol.md) | 非对比的 siamese 自监督 | | [DINO / DINOv2 / DINOv3](paper_2508.10104_dinov3.md) | 自蒸馏 + 多视图,得到强大零样本特征 | | [MAE / BEiT](paper_mae.md) | 掩码图像建模 | -| [BERT / GPT-3](paper_gpt3.md) | 把同一配方做到语言上 | +| [BERT / GPT-3](../paper_gpt3.md) | 把同一配方做到语言上 | | [CLIP](paper_clip.md) | 跨模态对比 | | [VICReg / Barlow Twins](paper_vicreg.md) | 信息论视角的非对比 | @@ -56,6 +56,6 @@ ## 推荐起步 -1. [DINOv3 卡片](paper_2508.10104_dinov3.md) → [DINOv2 卡片](paper_dinov2.md)。 +1. [DINOv3 卡片](paper_2508.10104_dinov3.md) → [DINOv2 卡片](../paper_dinov2.md)。 2. 跑 [`labs/lab05_dinov3_features_minidata`](../../../labs/lab05_dinov3_features_minidata.ipynb)。 3. 读 [`insight:masked_prediction_yields_self_supervised_signal`](insight_masked_prediction_yields_self_supervised_signal.md)。 diff --git a/docs/data/cards/extended/paradigm_simulator_first_synthetic_data_centric.md b/docs/data/cards/extended/paradigm_simulator_first_synthetic_data_centric.md index 6475fc3..efa0c96 100644 --- a/docs/data/cards/extended/paradigm_simulator_first_synthetic_data_centric.md +++ b/docs/data/cards/extended/paradigm_simulator_first_synthetic_data_centric.md @@ -19,7 +19,7 @@ | [SHIFT](paper_shift_dataset.md) | 连续域偏移合成数据集,强迫研究对照天气与日夜 | | [V2X-Sim](paper_v2x_sim.md) | 车路云协同仿真,提供单车视角难以获得的协作信号 | | [3D 高斯泼溅用于驾驶(StreetGaussians 等)](paper_gs_for_ad.md) | 从真实日志重建可编辑的 3D 场景,作为"半合成"层 | -| [DriveDreamer](paper_drivedreamer.md) / [GAIA-1](paper_gaia1.md) | 视频扩散世界模型作为合成器 | +| [DriveDreamer](../paper_drivedreamer.md) / [GAIA-1](../paper_gaia1.md) | 视频扩散世界模型作为合成器 | | Tesla 数据引擎、Waymo Carcraft | 工业级合成 + 影子回放的实践参考 | ## 靠什么活下来 @@ -45,8 +45,8 @@ ## 一条研究路径建议 1. 读 [CARLA Leaderboard 2.0](paper_carla_leaderboard.md) 与 [MetaDrive](paper_metadrive.md),理解"闭环任务"的形式定义。 -2. 在 [Bench2Drive / NAVSIM](benchmarks_ad.md) 上跑一个最小端到端基线,建立 sim-to-real 直觉。 +2. 在 [Bench2Drive / NAVSIM](../benchmarks_ad.md) 上跑一个最小端到端基线,建立 sim-to-real 直觉。 3. 复读 [`insight:simulator_realism_is_lower_bound_on_training_value`](insight_simulator_realism_is_lower_bound_on_training_value.md),把"保真度"分解为感知、几何、他车行为三个轴。 -4. 复现 [DriveDreamer](paper_drivedreamer.md) 的最小条件化生成,把它接入一个 BEV planner,对比离线/闭环分数。 +4. 复现 [DriveDreamer](../paper_drivedreamer.md) 的最小条件化生成,把它接入一个 BEV planner,对比离线/闭环分数。 5. 接 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md),把合成器升级为反事实编辑器。 6. 阅读 [`problem:closed_loop_simulation_fidelity_gap`](problem_closed_loop_simulation_fidelity_gap.md),给自己一份对当前局限的清醒认识。 diff --git a/docs/data/cards/extended/paradigm_vla_paradigm.md b/docs/data/cards/extended/paradigm_vla_paradigm.md index 3734c77..d7270b1 100644 --- a/docs/data/cards/extended/paradigm_vla_paradigm.md +++ b/docs/data/cards/extended/paradigm_vla_paradigm.md @@ -43,9 +43,9 @@ flowchart LR | [RT-2](paper_rt2.md) | PaLI-X / PaLM-E + 动作 token | 把互联网 VLM 直接微调为机器人控制器 | | [OpenVLA](paper_openvla.md) | LLaMA + DINOv2 + SigLIP | 开源 7B 主干,跨多机器人本体 | | [EMMA](paper_emma.md) | Gemini + 驾驶轨迹 token | Waymo 把多任务驾驶塞进单一 VLA | -| [DriveVLM / DriveVLM-Dual](paper_2402.12289_drivevlm.md) | [LLaVA](paper_llava.md) + meta-action + dual system | 把 dual-system 与 VLA 接合 | +| [DriveVLM / DriveVLM-Dual](paper_2402.12289_drivevlm.md) | [LLaVA](../paper_llava.md) + meta-action + dual system | 把 dual-system 与 VLA 接合 | | [CF-VLA](paper_2512.24426_cfvla.md) | DriveVLM + 反事实生成 | 反事实数据闭环接进 VLA | -| [LINGO-2](paper_lingo2.md) | Wayve 的语言条件驾驶 | 自然语言指令直接驱动车控 | +| [LINGO-2](../paper_lingo2.md) | Wayve 的语言条件驾驶 | 自然语言指令直接驱动车控 | ## 靠什么活下来 diff --git a/docs/data/cards/extended/paradigm_world_model_paradigm.md b/docs/data/cards/extended/paradigm_world_model_paradigm.md index 4cfd8a9..cf33024 100644 --- a/docs/data/cards/extended/paradigm_world_model_paradigm.md +++ b/docs/data/cards/extended/paradigm_world_model_paradigm.md @@ -50,8 +50,8 @@ policy 与 value 在此之上以 latent rollout 训练,$\beta$ 控制表示空 | [World Models (Ha & Schmidhuber)](paper_world_models.md) | VAE + MDN-RNN + controller 的祖型 | | Dreamer V1–V3 | latent imagination + actor-critic,证明 RL 可在 latent 跑 | | MuZero | 没有显式重建也能蒸馏出隐式 transition + 搜索 | -| [GAIA-1](paper_gaia1.md) | 9B 视频 + 文本 + action 联合生成世界模型 | -| [DriveDreamer](paper_drivedreamer.md) | BEV layout / action / text 条件的视频扩散世界模型 | +| [GAIA-1](../paper_gaia1.md) | 9B 视频 + 文本 + action 联合生成世界模型 | +| [DriveDreamer](../paper_drivedreamer.md) | BEV layout / action / text 条件的视频扩散世界模型 | | [Cosmos](paper_cosmos.md) | NVIDIA 推出的物理 AI 通用世界基础模型 | | [Sora](paper_sora.md) | 视频扩散基础模型,被重新解读为通用 simulator | | [CF-VLA](paper_2512.24426_cfvla.md) | 把世界模型用作反事实推演的批评者 | @@ -80,7 +80,7 @@ policy 与 value 在此之上以 latent rollout 训练,$\beta$ 控制表示空 ## 一条研究路径建议 1. [World Models 卡片](paper_world_models.md):奠基直觉,理解 VAE + MDN-RNN + controller。 -2. [GAIA-1 卡片](paper_gaia1.md) 与 [DriveDreamer 卡片](paper_drivedreamer.md):理解视频生成世界模型怎样接驾驶任务。 +2. [GAIA-1 卡片](../paper_gaia1.md) 与 [DriveDreamer 卡片](../paper_drivedreamer.md):理解视频生成世界模型怎样接驾驶任务。 3. 跑一个最小 Dreamer 复现 (例如 CartPole + visual obs),验证 latent imagination 收敛。 4. 复读 [`insight:world_models_let_planning_be_done_in_imagination`](insight_world_models_let_planning_be_done_in_imagination.md) 与 [`validation:trace_world_model_in_latent_imagination`](validation_trace_world_model_in_latent_imagination.md)。 5. 读 [CF-VLA](paper_2512.24426_cfvla.md) 看世界模型作为评估器的现实应用。 diff --git a/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md b/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md index 3330cc6..8153a32 100644 --- a/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md +++ b/docs/data/cards/extended/problem_behavior_cloning_compounds_errors_over_time.md @@ -50,7 +50,7 @@ Ross & Bagnell 在 2011 年用一个简洁的 bound 把现象写清楚:若单 | [DAgger](paper_ross2011_dagger.md) | 让学生跑、专家纠正、聚合 | 实车专家成本不可控 | | 对抗模仿 ([GAIL / AIRL](paper_gail.md)) | 判别器逼近占据测度的差距 | 训练不稳定、对超参敏感 | | 特权教师蒸馏 ([Roach](paper_roach.md)) | 用 RL 教师在仿真里产出修正样本 | 教师本身的策略限定 | -| [TransFuser](paper_transfuser.md) | 多模态融合 + 闭环 finetune | 仍在仿真域,sim-to-real 未解 | +| [TransFuser](../paper_transfuser.md) | 多模态融合 + 闭环 finetune | 仍在仿真域,sim-to-real 未解 | | 模型预测控制 + BC | 用规则 MPC 兜底,BC 输出参考轨迹 | 失去端到端可微优势 | | 反事实增广 ([CF-VLA](paper_2512.24426_cfvla.md)) | 在偏离分支上合成修正动作 | 受合成器保真度限制 | | [`move:warm_start_rl_with_imitation_then_anneal`](move_warm_start_rl_with_imitation_then_anneal.md) | 模仿预训 + RL 继续优化 | RL 的安全约束仍是瓶颈 | @@ -63,4 +63,4 @@ Ross & Bagnell 在 2011 年用一个简洁的 bound 把现象写清楚:若单 ## 与之相关的研究路径 -它和 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 互为镜像,是这一洞察的现象层证据。它和 [`insight:human_demonstrations_compress_implicit_reward_function`](insight_human_demonstrations_compress_implicit_reward_function.md) 形成张力——模仿能压缩奖励、却不能压缩纠错,因此需要 RL / 对齐补足。它的工程出口指向 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 与 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)。论文谱系上 [Ross 2011 DAgger](paper_ross2011_dagger.md)、[Roach](paper_roach.md)、[TransFuser](paper_transfuser.md)、[Diffusion Policy](paper_diffusion_policy_chi2023.md) 都把这一问题列作主要动机;动手验证可见 [`../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb`](../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb)。 +它和 [`insight:imitation_learning_alone_cannot_recover_from_compounding_errors`](insight_imitation_learning_alone_cannot_recover_from_compounding_errors.md) 互为镜像,是这一洞察的现象层证据。它和 [`insight:human_demonstrations_compress_implicit_reward_function`](insight_human_demonstrations_compress_implicit_reward_function.md) 形成张力——模仿能压缩奖励、却不能压缩纠错,因此需要 RL / 对齐补足。它的工程出口指向 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 与 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)。论文谱系上 [Ross 2011 DAgger](paper_ross2011_dagger.md)、[Roach](paper_roach.md)、[TransFuser](../paper_transfuser.md)、[Diffusion Policy](paper_diffusion_policy_chi2023.md) 都把这一问题列作主要动机;动手验证可见 [`../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb`](../../../labs/lab02_cs285_bc_vs_dagger_minicar.ipynb)。 diff --git a/docs/data/cards/extended/problem_closed_loop_simulation_fidelity_gap.md b/docs/data/cards/extended/problem_closed_loop_simulation_fidelity_gap.md index 11c2b41..bfa036c 100644 --- a/docs/data/cards/extended/problem_closed_loop_simulation_fidelity_gap.md +++ b/docs/data/cards/extended/problem_closed_loop_simulation_fidelity_gap.md @@ -34,7 +34,7 @@ flowchart TD ## 现象 -闭环仿真要同时模拟感知输入、其它交通参与者的反应、动力学积分与控制延迟。任何一个环节失真,都让仿真里训练或评估的策略在真实路况下表现迥异。最直接的证据是:在 [CARLA Leaderboard 2.0](paper_carla_leaderboard.md) 上做到顶尖路线完成率的策略,迁到真车上经常出现 NPC 没有反应过的几何,导致行为异常;nuPlan 的离线分数与 [Bench2Drive](benchmarks_ad.md) 的闭环成绩在同一模型上经常给出相互矛盾的排名。Wayve 在 GAIA-1 论文里也专门讨论过"想象空间的真实度天花板"。 +闭环仿真要同时模拟感知输入、其它交通参与者的反应、动力学积分与控制延迟。任何一个环节失真,都让仿真里训练或评估的策略在真实路况下表现迥异。最直接的证据是:在 [CARLA Leaderboard 2.0](paper_carla_leaderboard.md) 上做到顶尖路线完成率的策略,迁到真车上经常出现 NPC 没有反应过的几何,导致行为异常;nuPlan 的离线分数与 [Bench2Drive](../benchmarks_ad.md) 的闭环成绩在同一模型上经常给出相互矛盾的排名。Wayve 在 GAIA-1 论文里也专门讨论过"想象空间的真实度天花板"。 闭环差距通常被分成三层:传感器渲染差距 (sensor sim-to-real)、单车动力学差距 (dynamics sim-to-real)、社交行为差距 (agent sim-to-real)。前两层近年因为 NeRF、3D 高斯泼溅、视频扩散等技术大幅缩小,社交行为差距仍是结构性瓶颈。 @@ -51,7 +51,7 @@ flowchart TD | 半解 | 怎样接近 | 它没解决什么 | |---|---|---| | 高保真感知仿真 ([3D 高斯泼溅 / StreetGaussians](paper_gs_for_ad.md)) | 从真实日志重建可编辑场景 | 几何渲染逼真,他车策略依旧脚本化 | -| 视频扩散世界模型 ([GAIA-1](paper_gaia1.md) / [DriveDreamer](paper_drivedreamer.md)) | 用生成模型直接产出未来视频 | 长时一致性 + 物体身份漂移,[`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md) 的边界条件 | +| 视频扩散世界模型 ([GAIA-1](../paper_gaia1.md) / [DriveDreamer](../paper_drivedreamer.md)) | 用生成模型直接产出未来视频 | 长时一致性 + 物体身份漂移,[`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md) 的边界条件 | | 代理行为生成 ([Trajeglish](paper_trajeglish.md), [MoST](paper_most_simagents.md)) | 让 NPC 用 token 语言模型驱动 | 代理之间的多智能体一致性仍未稳定 | | 闭环 + 离线联合评分 ([nuPlan](paper_nuplan.md)) | 把两种评估的相关性作为模型选择信号 | 权重选择仍是经验艺术,[`problem:offline_metric_does_not_predict_closed_loop_performance`](problem_offline_metric_does_not_predict_closed_loop_performance.md) 提醒它不是终局 | | Sim-to-real 微调 (residual policy) | 在真实数据上微调仿真训练策略 | 真实数据稀缺时仍需仿真,循环依赖 | @@ -65,4 +65,4 @@ flowchart TD ## 与之相关的研究路径 -它直接喂养 [`paradigm:simulator_first_synthetic_data_centric`](paradigm_simulator_first_synthetic_data_centric.md) 与 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)。它和 [`problem:offline_metric_does_not_predict_closed_loop_performance`](problem_offline_metric_does_not_predict_closed_loop_performance.md) 互为孪生:闭环差距是离线代理失真的另一面。它和 [`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md) 形成关键张力——扩散学到的物理是不是"够用的物理",是当前最值得正面回答的问题。在论文谱系上,[Trajeglish](paper_trajeglish.md)、[MoST](paper_most_simagents.md)、[GAIA-1](paper_gaia1.md)、[DriveDreamer](paper_drivedreamer.md) 都把这一缺口列为主要动机;[`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 把"用真实闭环回放检验仿真"作为工程化的常规配置。 +它直接喂养 [`paradigm:simulator_first_synthetic_data_centric`](paradigm_simulator_first_synthetic_data_centric.md) 与 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)。它和 [`problem:offline_metric_does_not_predict_closed_loop_performance`](problem_offline_metric_does_not_predict_closed_loop_performance.md) 互为孪生:闭环差距是离线代理失真的另一面。它和 [`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md) 形成关键张力——扩散学到的物理是不是"够用的物理",是当前最值得正面回答的问题。在论文谱系上,[Trajeglish](paper_trajeglish.md)、[MoST](paper_most_simagents.md)、[GAIA-1](../paper_gaia1.md)、[DriveDreamer](../paper_drivedreamer.md) 都把这一缺口列为主要动机;[`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 把"用真实闭环回放检验仿真"作为工程化的常规配置。 diff --git a/docs/data/cards/extended/problem_label_efficiency_for_3d_annotation.md b/docs/data/cards/extended/problem_label_efficiency_for_3d_annotation.md index 033a587..3b613a1 100644 --- a/docs/data/cards/extended/problem_label_efficiency_for_3d_annotation.md +++ b/docs/data/cards/extended/problem_label_efficiency_for_3d_annotation.md @@ -33,4 +33,4 @@ ## 与之相关的研究路径 -它直接喂养 [`insight:foundation_features_transfer_without_finetune`](insight_foundation_features_transfer_without_finetune.md) 与 [`insight:multi_view_geometry_as_free_supervision`](insight_multi_view_geometry_as_free_supervision.md);并和 [`paradigm:scaling_data_with_self_supervision`](paradigm_scaling_data_with_self_supervision.md)、[`paradigm:neural_scene_reconstruction_as_engine`](paradigm_neural_scene_reconstruction_as_engine.md) 形成工程出口。它和 [`problem:annotation_inconsistency_across_datasets`](problem_annotation_inconsistency_across_datasets.md) 互锁——标签效率与标签一致性是同一管线的两面。它和 [`problem:long_tail_object_categories_in_open_world`](problem_long_tail_object_categories_in_open_world.md) 共生:长尾物体单条标注成本极高,使整体覆盖度永远滞后。论文谱系上 [DINOv2](paper_dinov2.md) / [DINOv3](paper_2508.10104_dinov3.md)、[Depth Anything](paper_depth_anything.md)、[SAM](paper_sam.md) 都把"减少人工"列为主要目标;动手验证 [`../../../labs/lab05_dinov3_features_minidata.ipynb`](../../../labs/lab05_dinov3_features_minidata.ipynb)。 +它直接喂养 [`insight:foundation_features_transfer_without_finetune`](insight_foundation_features_transfer_without_finetune.md) 与 [`insight:multi_view_geometry_as_free_supervision`](insight_multi_view_geometry_as_free_supervision.md);并和 [`paradigm:scaling_data_with_self_supervision`](paradigm_scaling_data_with_self_supervision.md)、[`paradigm:neural_scene_reconstruction_as_engine`](paradigm_neural_scene_reconstruction_as_engine.md) 形成工程出口。它和 [`problem:annotation_inconsistency_across_datasets`](problem_annotation_inconsistency_across_datasets.md) 互锁——标签效率与标签一致性是同一管线的两面。它和 [`problem:long_tail_object_categories_in_open_world`](problem_long_tail_object_categories_in_open_world.md) 共生:长尾物体单条标注成本极高,使整体覆盖度永远滞后。论文谱系上 [DINOv2](../paper_dinov2.md) / [DINOv3](paper_2508.10104_dinov3.md)、[Depth Anything](paper_depth_anything.md)、[SAM](../paper_sam.md) 都把"减少人工"列为主要目标;动手验证 [`../../../labs/lab05_dinov3_features_minidata.ipynb`](../../../labs/lab05_dinov3_features_minidata.ipynb)。 diff --git a/docs/data/cards/extended/problem_long_horizon_credit_assignment_in_driving.md b/docs/data/cards/extended/problem_long_horizon_credit_assignment_in_driving.md index 534ad59..3569945 100644 --- a/docs/data/cards/extended/problem_long_horizon_credit_assignment_in_driving.md +++ b/docs/data/cards/extended/problem_long_horizon_credit_assignment_in_driving.md @@ -49,7 +49,7 @@ flowchart TD |---|---|---| | [MuZero](paper_muzero.md) | 学隐式动力学 + MCTS 把信用分摊到搜索路径 | 仿真保真度限制;驾驶域复杂 NPC 难以建模 | | 世界模型 + actor-critic ([Dreamer V3](paper_dreamer_v3.md)) | 在 latent imagination 中做长 rollout | 需要梦境保真,[`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md) 边界条件 | -| [Diffuser / Decision Diffuser](paper_diffuser.md) | 把规划当条件扩散,端到端 score 整段轨迹 | 长视野时计算成本 + 多模态行为捕捉仍是挑战 | +| [Diffuser / Decision Diffuser](../paper_diffuser.md) | 把规划当条件扩散,端到端 score 整段轨迹 | 长视野时计算成本 + 多模态行为捕捉仍是挑战 | | n-step / GAE | 经典 RL 中减小信用分配方差的手段 | 长视野下仍受奖励稀疏限制 | | Hindsight experience replay | 把每条轨迹重标签为成功 | 驾驶的负事件不能 hindsight 改写 | | 子目标分层 RL | 把长任务拆成中间目标 | 子目标定义本身依赖人工先验 | @@ -63,4 +63,4 @@ flowchart TD ## 与之相关的研究路径 -它直接喂养 [`insight:world_models_let_planning_be_done_in_imagination`](insight_world_models_let_planning_be_done_in_imagination.md) 与 [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md)——把"信号稀"用"搜索深"换。它和 [`problem:exploration_in_safety_critical_systems`](problem_exploration_in_safety_critical_systems.md) 形成孪生约束:探索受限让长视野信号更难获得。它和 [`problem:planning_horizon_vs_compute_budget_tradeoff`](problem_planning_horizon_vs_compute_budget_tradeoff.md) 互补:信用分配解决"哪里值得搜",时域 / 算力权衡解决"能搜多深"。论文谱系上 [MuZero](paper_muzero.md)、[Diffuser](paper_diffuser.md)、[Dreamer V3](paper_dreamer_v3.md) 都把这一问题列作主要动机;[`paradigm:model_based_world_imagination_planning`](paradigm_model_based_world_imagination_planning.md) 与 [`paradigm:sequence_modeling_for_decision`](paradigm_sequence_modeling_for_decision.md) 提供两条互补工程化路径。 +它直接喂养 [`insight:world_models_let_planning_be_done_in_imagination`](insight_world_models_let_planning_be_done_in_imagination.md) 与 [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md)——把"信号稀"用"搜索深"换。它和 [`problem:exploration_in_safety_critical_systems`](problem_exploration_in_safety_critical_systems.md) 形成孪生约束:探索受限让长视野信号更难获得。它和 [`problem:planning_horizon_vs_compute_budget_tradeoff`](problem_planning_horizon_vs_compute_budget_tradeoff.md) 互补:信用分配解决"哪里值得搜",时域 / 算力权衡解决"能搜多深"。论文谱系上 [MuZero](paper_muzero.md)、[Diffuser](../paper_diffuser.md)、[Dreamer V3](paper_dreamer_v3.md) 都把这一问题列作主要动机;[`paradigm:model_based_world_imagination_planning`](paradigm_model_based_world_imagination_planning.md) 与 [`paradigm:sequence_modeling_for_decision`](paradigm_sequence_modeling_for_decision.md) 提供两条互补工程化路径。 diff --git a/docs/data/cards/extended/problem_long_horizon_reasoning_with_finite_context_window.md b/docs/data/cards/extended/problem_long_horizon_reasoning_with_finite_context_window.md index 53ac10f..2e48188 100644 --- a/docs/data/cards/extended/problem_long_horizon_reasoning_with_finite_context_window.md +++ b/docs/data/cards/extended/problem_long_horizon_reasoning_with_finite_context_window.md @@ -20,7 +20,7 @@ | 半解 | 怎样接近 | 它没解决什么 | |---|---|---| | 大窗口 LLM ([Gemini 1.5 Pro](paper_gemini.md), [Claude](paper_claude.md)) | 把窗口拉到 1 M+ | 推理延迟与显存仍随长度增长 | -| Sparse / Linear / Mamba attention ([Flash](paper_flashattention.md), [Mamba](paper_mamba.md), [Linear](paper_linear_attention.md)) | 把复杂度降到亚二次 | 长程精度退化,尤其在中段 token | +| Sparse / Linear / Mamba attention ([Flash](paper_flashattention.md), [Mamba](../paper_mamba.md), [Linear](paper_linear_attention.md)) | 把复杂度降到亚二次 | 长程精度退化,尤其在中段 token | | Retrieval-augmented memory ([`move:use_retrieval_augmented_memory_to_extend_context`](move_use_retrieval_augmented_memory_to_extend_context.md)) | 把过去存外部向量库按需取 | retrieval 召回不完整时模型失忆 | | 层次化子目标 ([`move:long_horizon_via_hierarchical_subgoal`](move_long_horizon_via_hierarchical_subgoal.md)) | 把长时任务切成短时段 | 子目标划分本身仍需人工设计 | | 循环隐状态 ([`move:carry_recurrent_hidden_state_across_long_videos`](move_carry_recurrent_hidden_state_across_long_videos.md)) | 用 RNN / SSM 维持常数状态 | 状态压缩信息损失,深远过去模糊 | @@ -35,4 +35,4 @@ ## 与之相关的研究路径 -它的直接对策路径是 [`move:carry_recurrent_hidden_state_across_long_videos`](move_carry_recurrent_hidden_state_across_long_videos.md) (状态压缩) + [`move:use_retrieval_augmented_memory_to_extend_context`](move_use_retrieval_augmented_memory_to_extend_context.md) (外部检索) + [`move:long_horizon_via_hierarchical_subgoal`](move_long_horizon_via_hierarchical_subgoal.md) (层次化分解)。它和 [`paradigm:llm_agent_paradigm`](paradigm_llm_agent_paradigm.md)、[`paradigm:vla_paradigm`](paradigm_vla_paradigm.md) 互为约束——agent 范式假设模型能跨步追踪 task progress,本问题是这一假设的现实瓶颈。它和 [`insight:agent_loop_is_just_iterated_conditional_generation`](insight_agent_loop_is_just_iterated_conditional_generation.md) 共生:迭代生成在窗口爆炸时退化为只看最近一步,长时一致性消失。在论文链上 [Gemini](paper_gemini.md)、[Mamba](paper_mamba.md)、[FlashAttention](paper_flashattention.md) 是主线,[`problem:planning_horizon_vs_compute_budget_tradeoff`](problem_planning_horizon_vs_compute_budget_tradeoff.md) 是其在计算预算维度的孪生。 +它的直接对策路径是 [`move:carry_recurrent_hidden_state_across_long_videos`](move_carry_recurrent_hidden_state_across_long_videos.md) (状态压缩) + [`move:use_retrieval_augmented_memory_to_extend_context`](move_use_retrieval_augmented_memory_to_extend_context.md) (外部检索) + [`move:long_horizon_via_hierarchical_subgoal`](move_long_horizon_via_hierarchical_subgoal.md) (层次化分解)。它和 [`paradigm:llm_agent_paradigm`](paradigm_llm_agent_paradigm.md)、[`paradigm:vla_paradigm`](paradigm_vla_paradigm.md) 互为约束——agent 范式假设模型能跨步追踪 task progress,本问题是这一假设的现实瓶颈。它和 [`insight:agent_loop_is_just_iterated_conditional_generation`](insight_agent_loop_is_just_iterated_conditional_generation.md) 共生:迭代生成在窗口爆炸时退化为只看最近一步,长时一致性消失。在论文链上 [Gemini](paper_gemini.md)、[Mamba](../paper_mamba.md)、[FlashAttention](paper_flashattention.md) 是主线,[`problem:planning_horizon_vs_compute_budget_tradeoff`](problem_planning_horizon_vs_compute_budget_tradeoff.md) 是其在计算预算维度的孪生。 diff --git a/docs/data/cards/extended/problem_occlusion_reasoning_without_dense_lidar.md b/docs/data/cards/extended/problem_occlusion_reasoning_without_dense_lidar.md index 4992ad4..8615c4a 100644 --- a/docs/data/cards/extended/problem_occlusion_reasoning_without_dense_lidar.md +++ b/docs/data/cards/extended/problem_occlusion_reasoning_without_dense_lidar.md @@ -20,7 +20,7 @@ |---|---|---| | 类别无关占用 ([`move:replace_class_specific_box_with_class_agnostic_occupancy`](move_replace_class_specific_box_with_class_agnostic_occupancy.md)) | 用 free / occupied / unknown 三态体素 | unknown 的概率分布并未量化 | | 隐空间预测运动后再解码 ([`move:learn_motion_in_latent_space_then_decode`](move_learn_motion_in_latent_space_then_decode.md)) | 把"还看不见的人"作为隐变量预测 | 多模态下采样易塌缩 | -| 视频世界模型 ([DriveDreamer](paper_drivedreamer.md), [GAIA-1](paper_gaia1.md)) | 直接生成被遮挡区域的潜在内容 | 物体身份持久性差 | +| 视频世界模型 ([DriveDreamer](../paper_drivedreamer.md), [GAIA-1](../paper_gaia1.md)) | 直接生成被遮挡区域的潜在内容 | 物体身份持久性差 | | 雷达 + 相机融合 | 雷达穿透 + 相机识别 | 雷达分辨率有限,对低速 / 静止行人易漏 | | 多视图时序聚合 (BEV + temporal) | 用前几帧未遮挡的观测填补当前帧 | 长时遮挡仍然是盲区 | | 几何 + 类别先验 (前车后必有空间) | 用驾驶常识做兜底 | 在真正复杂场景里失效 | @@ -33,4 +33,4 @@ ## 与之相关的研究路径 -它的孪生洞察是 [`insight:occupancy_unifies_static_and_dynamic_scene`](insight_occupancy_unifies_static_and_dynamic_scene.md)(提供表征结构)与 [`insight:uncertainty_calibration_is_prerequisite_for_safe_delegation`](insight_uncertainty_calibration_is_prerequisite_for_safe_delegation.md)(提供不确定性条件)。它和 [`problem:long_tail_object_categories_in_open_world`](problem_long_tail_object_categories_in_open_world.md) 互补——前者关心"在视野里但不认识",本条关心"不在视野里但可能存在"。它的工程出口是 [`paradigm:camera_first_autonomy`](paradigm_camera_first_autonomy.md) 与 [`paradigm:world_model_paradigm`](paradigm_world_model_paradigm.md) 的占用 / 生成路线;论文谱系上 [DETR3D](paper_detr3d.md)、[SurroundOcc](paper_surroundocc.md)、[DriveDreamer](paper_drivedreamer.md)、[GAIA-1](paper_gaia1.md) 都把它作为关键失败模式之一。 +它的孪生洞察是 [`insight:occupancy_unifies_static_and_dynamic_scene`](insight_occupancy_unifies_static_and_dynamic_scene.md)(提供表征结构)与 [`insight:uncertainty_calibration_is_prerequisite_for_safe_delegation`](insight_uncertainty_calibration_is_prerequisite_for_safe_delegation.md)(提供不确定性条件)。它和 [`problem:long_tail_object_categories_in_open_world`](problem_long_tail_object_categories_in_open_world.md) 互补——前者关心"在视野里但不认识",本条关心"不在视野里但可能存在"。它的工程出口是 [`paradigm:camera_first_autonomy`](paradigm_camera_first_autonomy.md) 与 [`paradigm:world_model_paradigm`](paradigm_world_model_paradigm.md) 的占用 / 生成路线;论文谱系上 [DETR3D](paper_detr3d.md)、[SurroundOcc](paper_surroundocc.md)、[DriveDreamer](../paper_drivedreamer.md)、[GAIA-1](../paper_gaia1.md) 都把它作为关键失败模式之一。 diff --git a/docs/data/cards/extended/problem_offline_metric_does_not_predict_closed_loop_performance.md b/docs/data/cards/extended/problem_offline_metric_does_not_predict_closed_loop_performance.md index 88eeddd..9bfe292 100644 --- a/docs/data/cards/extended/problem_offline_metric_does_not_predict_closed_loop_performance.md +++ b/docs/data/cards/extended/problem_offline_metric_does_not_predict_closed_loop_performance.md @@ -43,13 +43,13 @@ $$\text{闭环表现}(\pi)\ =\ \mathbb{E}_{s \sim p_\pi}\!\big[\,\text{event}(s) 下面这几条 *研究方向* 都把这一问题当作起点: 1. **可证可信的闭环代理**:找到一类闭环安全的可证性度量,使其与离线代理之间有形式化的 lower-bound 关系。 -2. **世界模型作为代理评估器**:用 [GAIA-1](paper_gaia1.md)/[DriveDreamer](paper_drivedreamer.md) 一类的视频世界模型生成闭环 rollout,对比真车跑分。 +2. **世界模型作为代理评估器**:用 [GAIA-1](../paper_gaia1.md)/[DriveDreamer](../paper_drivedreamer.md) 一类的视频世界模型生成闭环 rollout,对比真车跑分。 3. **反事实压力测试**:在 [CF-VLA](paper_2512.24426_cfvla.md) 思路下,用合成对抗场景定向探查模型边界。 4. **离线分数的多目标剖分**:把 L2/Collision 拆成"专家分布上的拟合"与"自我分布上的安全"两个独立目标,并显式约束它们的差距。 5. **闭环回放正则化**:在训练时直接惩罚模型在自我状态分布上的失败。 ## 它跟其它节点的连接 -- 反向:被多份开创性工作 [UniAD](paper_2212.10156_uniad.md)、[VADv2](paper_vadv2.md)、[CF-VLA](paper_2512.24426_cfvla.md) 选作主要痛点。 +- 反向:被多份开创性工作 [UniAD](paper_2212.10156_uniad.md)、[VADv2](../paper_vadv2.md)、[CF-VLA](paper_2512.24426_cfvla.md) 选作主要痛点。 - 正向:动机驱动 `move:design_closed_loop_metric_correlated_with_real_world_safety`,进而推出 [闭环数据引擎为中心的开发范式](paradigm_closed_loop_data_engine_centric_development.md)。 - 平行:跟 [`problem:rare_safety_critical_events_dominate_real_risk_but_are_under_represented`](problem_rare_safety_critical_events_dominate_real_risk_but_are_under_represented.md) 互为孪生:罕见性是离线指标失真的最主要源。 diff --git a/docs/data/cards/extended/problem_open_world_corner_case_synthesis_for_training.md b/docs/data/cards/extended/problem_open_world_corner_case_synthesis_for_training.md index 5da8143..cc52423 100644 --- a/docs/data/cards/extended/problem_open_world_corner_case_synthesis_for_training.md +++ b/docs/data/cards/extended/problem_open_world_corner_case_synthesis_for_training.md @@ -4,7 +4,7 @@ ## 现象 -主流驾驶数据集(nuScenes、Waymo Open、Argoverse 2)合计约 $10^4$ 小时驾驶时长,事故率级别的危险事件几乎全部缺席。视频世界模型路线([GAIA-1](paper_gaia1.md)、[DriveDreamer](paper_drivedreamer.md)、[Cosmos](paper_cosmos.md))已能生成 5–20 秒的高保真驾驶视频,外加可控场景属性(天气、密度、行人)。但即便合成 10 万条 corner case 视频喂进训练,下游闭环成绩的提升常常不到 5%;同时模型在合成场景上的指标显著优于真实场景上的对应指标,提示"在合成数据上 overfit"已经发生。CF-VLA、PRISM-1 等工作把这一痛点列为合成数据中心化范式的主要瓶颈。 +主流驾驶数据集(nuScenes、Waymo Open、Argoverse 2)合计约 $10^4$ 小时驾驶时长,事故率级别的危险事件几乎全部缺席。视频世界模型路线([GAIA-1](../paper_gaia1.md)、[DriveDreamer](../paper_drivedreamer.md)、[Cosmos](paper_cosmos.md))已能生成 5–20 秒的高保真驾驶视频,外加可控场景属性(天气、密度、行人)。但即便合成 10 万条 corner case 视频喂进训练,下游闭环成绩的提升常常不到 5%;同时模型在合成场景上的指标显著优于真实场景上的对应指标,提示"在合成数据上 overfit"已经发生。CF-VLA、PRISM-1 等工作把这一痛点列为合成数据中心化范式的主要瓶颈。 ## 为什么难 @@ -19,7 +19,7 @@ | 半解 | 怎样接近 | 它没解决什么 | |---|---|---| -| 视频扩散世界模型 ([GAIA-1](paper_gaia1.md), [Cosmos](paper_cosmos.md)) | 大规模视频预训练 + 动作条件 | 生成的危险仍是训练分布的内插,未知盲区不可达 | +| 视频扩散世界模型 ([GAIA-1](../paper_gaia1.md), [Cosmos](paper_cosmos.md)) | 大规模视频预训练 + 动作条件 | 生成的危险仍是训练分布的内插,未知盲区不可达 | | 反事实生成 ([CF-VLA](paper_2512.24426_cfvla.md), [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md)) | 给已知日志做"如果他车这样"的扰动 | 扰动参数化由人决定,无法逼出真正未见几何 | | 离线场景扰动 ([`move:augment_dataset_via_offline_scenario_perturbation`](move_augment_dataset_via_offline_scenario_perturbation.md)) | 在真实日志上改其它代理速度 / 朝向 | 改不了视觉外观,覆盖几何而非语义 | | 程序化场景库 (SMARTS / MetaDrive scenario zoo) | 把"危险" 枚举成参数化合约 | 本体依赖人类对"危险" 的当前理解 | diff --git a/docs/data/cards/extended/problem_planning_horizon_vs_compute_budget_tradeoff.md b/docs/data/cards/extended/problem_planning_horizon_vs_compute_budget_tradeoff.md index 5813d03..cb58f7e 100644 --- a/docs/data/cards/extended/problem_planning_horizon_vs_compute_budget_tradeoff.md +++ b/docs/data/cards/extended/problem_planning_horizon_vs_compute_budget_tradeoff.md @@ -4,7 +4,7 @@ ## 现象 -车载规划栈的硬约束:planner 必须在每个控制周期内完成。典型工程预算是 30–100 ms 内输出 5–10 秒后的轨迹。MPC 树搜索的状态数随分支因子 $b$ 与深度 $h$ 呈 $O(b^h)$ 增长,连续动作空间的 sampling-based planner (LQR-tree, RRT*) 则在收敛速率与轨迹质量之间留下灰带。一旦把 RL 类长视野规划接入栈尾,[MuZero](paper_muzero.md) 风格的 MCTS 在 simulation budget 不足时直接退化为短视策略;ChauffeurNet / [Diffuser](paper_diffuser.md) 选择把整段轨迹建为生成对象,把时域并行化,但代价是放弃细粒度反馈。工业界 (Waymo, Cruise, Mobileye) 公开过的方案都用"快慢分层规划"折中,但分层边界的设计仍然是经验艺术。 +车载规划栈的硬约束:planner 必须在每个控制周期内完成。典型工程预算是 30–100 ms 内输出 5–10 秒后的轨迹。MPC 树搜索的状态数随分支因子 $b$ 与深度 $h$ 呈 $O(b^h)$ 增长,连续动作空间的 sampling-based planner (LQR-tree, RRT*) 则在收敛速率与轨迹质量之间留下灰带。一旦把 RL 类长视野规划接入栈尾,[MuZero](paper_muzero.md) 风格的 MCTS 在 simulation budget 不足时直接退化为短视策略;ChauffeurNet / [Diffuser](../paper_diffuser.md) 选择把整段轨迹建为生成对象,把时域并行化,但代价是放弃细粒度反馈。工业界 (Waymo, Cruise, Mobileye) 公开过的方案都用"快慢分层规划"折中,但分层边界的设计仍然是经验艺术。 ## 为什么难 @@ -34,4 +34,4 @@ ## 与之相关的研究路径 -它和 [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md)、[`insight:dual_system_handles_latency_quality_tradeoff`](insight_dual_system_handles_latency_quality_tradeoff.md) 共同构成"用更多算力换更深视野"的方法学三角。它和 [`problem:long_horizon_credit_assignment_in_driving`](problem_long_horizon_credit_assignment_in_driving.md) 互补——前者关心"能搜多深",后者关心"哪里值得搜"。它和 [`problem:latency_budget_for_large_model_in_realtime_control`](problem_latency_budget_for_large_model_in_realtime_control.md) 互锁:大模型介入会直接撞延迟天花板,[`insight:event_sparse_compute_matches_energy_budget`](insight_event_sparse_compute_matches_energy_budget.md) 与神经形态思路是一条可能的出口。它和 [`paradigm:optimal_control`](paradigm_optimal_control.md)、[`paradigm:model_based_world_imagination_planning`](paradigm_model_based_world_imagination_planning.md) 在工程化层面合流。论文谱系上 [CILQR](paper_cilqr.md)、[MPC 教科书](paper_mpc_book.md)、[Diffuser](paper_diffuser.md)、[MuZero](paper_muzero.md) 都把这一权衡列作主要设计变量;动手验证 [`../../../labs/lab04_plant_object_level_planner.ipynb`](../../../labs/lab04_plant_object_level_planner.ipynb)。 +它和 [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md)、[`insight:dual_system_handles_latency_quality_tradeoff`](insight_dual_system_handles_latency_quality_tradeoff.md) 共同构成"用更多算力换更深视野"的方法学三角。它和 [`problem:long_horizon_credit_assignment_in_driving`](problem_long_horizon_credit_assignment_in_driving.md) 互补——前者关心"能搜多深",后者关心"哪里值得搜"。它和 [`problem:latency_budget_for_large_model_in_realtime_control`](problem_latency_budget_for_large_model_in_realtime_control.md) 互锁:大模型介入会直接撞延迟天花板,[`insight:event_sparse_compute_matches_energy_budget`](insight_event_sparse_compute_matches_energy_budget.md) 与神经形态思路是一条可能的出口。它和 [`paradigm:optimal_control`](paradigm_optimal_control.md)、[`paradigm:model_based_world_imagination_planning`](paradigm_model_based_world_imagination_planning.md) 在工程化层面合流。论文谱系上 [CILQR](paper_cilqr.md)、[MPC 教科书](paper_mpc_book.md)、[Diffuser](../paper_diffuser.md)、[MuZero](paper_muzero.md) 都把这一权衡列作主要设计变量;动手验证 [`../../../labs/lab04_plant_object_level_planner.ipynb`](../../../labs/lab04_plant_object_level_planner.ipynb)。 diff --git a/docs/data/cards/extended/problem_rare_safety_critical_events_dominate_real_risk_but_are_under_represented.md b/docs/data/cards/extended/problem_rare_safety_critical_events_dominate_real_risk_but_are_under_represented.md index 0470659..89ceb27 100644 --- a/docs/data/cards/extended/problem_rare_safety_critical_events_dominate_real_risk_but_are_under_represented.md +++ b/docs/data/cards/extended/problem_rare_safety_critical_events_dominate_real_risk_but_are_under_represented.md @@ -64,4 +64,4 @@ flowchart TD ## 与之相关的研究路径 -它直接喂养 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md) 与 [`paradigm:simulator_first_synthetic_data_centric`](paradigm_simulator_first_synthetic_data_centric.md)。它和 [`insight:long_tail_solved_by_synthesis_not_data_alone`](insight_long_tail_solved_by_synthesis_not_data_alone.md) 共生,提供了后者的核心论据。它的孪生问题是 [`problem:offline_metric_does_not_predict_closed_loop_performance`](problem_offline_metric_does_not_predict_closed_loop_performance.md):罕见性是离线指标失真的最主要源。它和 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 互为表里——闭环引擎的核心工程动机就是把罕见事件的发现速度从月级压到日级。在论文谱系上,[CF-VLA](paper_2512.24426_cfvla.md)、[DriveDreamer](paper_drivedreamer.md)、[GAIA-1](paper_gaia1.md)、[Trajeglish](paper_trajeglish.md) 都把这一痛点列为主要动机。 +它直接喂养 [`paradigm:counterfactual_data_centric_safety`](paradigm_counterfactual_data_centric_safety.md) 与 [`paradigm:simulator_first_synthetic_data_centric`](paradigm_simulator_first_synthetic_data_centric.md)。它和 [`insight:long_tail_solved_by_synthesis_not_data_alone`](insight_long_tail_solved_by_synthesis_not_data_alone.md) 共生,提供了后者的核心论据。它的孪生问题是 [`problem:offline_metric_does_not_predict_closed_loop_performance`](problem_offline_metric_does_not_predict_closed_loop_performance.md):罕见性是离线指标失真的最主要源。它和 [`paradigm:closed_loop_data_engine_centric_development`](paradigm_closed_loop_data_engine_centric_development.md) 互为表里——闭环引擎的核心工程动机就是把罕见事件的发现速度从月级压到日级。在论文谱系上,[CF-VLA](paper_2512.24426_cfvla.md)、[DriveDreamer](../paper_drivedreamer.md)、[GAIA-1](../paper_gaia1.md)、[Trajeglish](paper_trajeglish.md) 都把这一痛点列为主要动机。 diff --git a/docs/data/cards/extended/validation_trace_bird_eye_view_transformer_with_temporal_aggregation.md b/docs/data/cards/extended/validation_trace_bird_eye_view_transformer_with_temporal_aggregation.md index 9aa07ca..7de4a90 100644 --- a/docs/data/cards/extended/validation_trace_bird_eye_view_transformer_with_temporal_aggregation.md +++ b/docs/data/cards/extended/validation_trace_bird_eye_view_transformer_with_temporal_aggregation.md @@ -35,9 +35,9 @@ graph TB | 类别 | 节点 | 在再发明里的角色 | |---|---|---| -| 论文 | [Attention Is All You Need](paper_vaswani2017.md) | 提供 cross-attention 的基础数学 | -| 论文 | [ViT](paper_vit.md) | 提供 patch token 化的图像主干 | -| 论文 | [DETR](paper_carion2020.md) | 提供"对象 query 作为可微输出接口"的范式 | +| 论文 | [Attention Is All You Need](../paper_vaswani2017.md) | 提供 cross-attention 的基础数学 | +| 论文 | [ViT](../paper_vit.md) | 提供 patch token 化的图像主干 | +| 论文 | [DETR](../paper_carion2020.md) | 提供"对象 query 作为可微输出接口"的范式 | | 概念 | Transformer 架构 | 让 BEV 格点也能写成序列 | | 概念 | DETR object query | 让"我关心的实体"显式可学习 | | 概念 | BEV 感知 | 提供鸟瞰图作为统一坐标系的需求 | @@ -56,7 +56,7 @@ graph TB $$\text{BEV}(u,v) = \sum_{i=1}^{N_\text{cam}} \sum_{j=1}^{N_\text{pillar}} \text{DefAttn}\big(q_{u,v},\; \mathcal{P}_i(u,v,z_j),\; F_i\big)$$ 其中 $\mathcal{P}_i$ 是从 3D 点到第 $i$ 路相机的投影函数。 6. **加入时间维度**:由 [时序聚合能换取深度传感器所能换取的](insight_temporal_aggregation_buys_what_depth_sensor_buys.md) 给出动机。让当前帧 BEV query 先跟上一帧 warped BEV 做 self-attention,再跟当前帧多相机做 cross-attention。 -7. **沿用 DETR 的检测头**:由 [DETR](paper_carion2020.md) 提供;在 BEV 特征上接一个稀疏 query 集合预测 3D 框。 +7. **沿用 DETR 的检测头**:由 [DETR](../paper_carion2020.md) 提供;在 BEV 特征上接一个稀疏 query 集合预测 3D 框。 → 必然得到 BEVFormer:稠密 BEV query + 空间 deformable cross-attention + 时间 self-attention + DETR3D 风格的检测头。 @@ -67,7 +67,7 @@ BEVFormer 一旦成立,它就成了下游一连串工作的脚手架: - → [BEVFusion](paper_bevfusion.md) 在 BEV 空间里融合 LiDAR 特征。 - → [StreamPETR](paper_streampetr.md) 把时间 self-attention 进一步流式化,query 跨帧循环。 - → [UniAD](paper_2212.10156_uniad.md) 沿 BEVFormer 路径把跟踪、运动、规划共享同一组 BEV query。 -- → [VADv2](paper_vadv2.md) 把 BEV 进一步向量化,跳过显式格点。 +- → [VADv2](../paper_vadv2.md) 把 BEV 进一步向量化,跳过显式格点。 - → 占用预测家族(Tesla Occupancy Network、SurroundOcc 等)沿用 BEV query 思想,只是把 2D 格点换成 3D 体素。 ## 一条值得注意的反例 @@ -76,6 +76,6 @@ BEVFormer 的稠密 BEV query 在长距离稀疏场景下计算浪费严重— ## 推荐起步 -1. 读 [BEVFormer 卡片](paper_li2022bevformer.md) 与 [DETR 卡片](paper_carion2020.md),并把 [集合预测再发现](validation_trace_set_prediction_with_object_queries.md) 与 [ViT 再发现](validation_trace_image_transformer_via_patch_tokenization.md) 一并对照。 +1. 读 [BEVFormer 卡片](../paper_li2022bevformer.md) 与 [DETR 卡片](../paper_carion2020.md),并把 [集合预测再发现](validation_trace_set_prediction_with_object_queries.md) 与 [ViT 再发现](validation_trace_image_transformer_via_patch_tokenization.md) 一并对照。 2. 在 nuScenes mini 上复现一个最小 BEVFormer,只用单帧 + 单尺度,观察 BEV query 的空间分布。 3. 把时间 self-attention 加回去,在同样的 mini 数据上看 mAP / NDS 的边际变化,体会 [时序聚合能换取深度传感器所能换取的](insight_temporal_aggregation_buys_what_depth_sensor_buys.md) 在数字上的具体含义。 diff --git a/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md b/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md index c87b503..9d518db 100644 --- a/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md +++ b/docs/data/cards/extended/validation_trace_counterfactual_vla_replanner.md @@ -46,10 +46,10 @@ flowchart TD | 洞察 | [`insight:test_time_compute_substitutes_train_time_via_search`](insight_test_time_compute_substitutes_train_time_via_search.md) | 给出搜索式决策的方法论 | | 洞察 | [`insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control`](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md) | 让反事实重规划仅在罕见场景触发 | | 论文 | [`paper:2402.12289_drivevlm`](paper_2402.12289_drivevlm.md) | 提供稳定的 VLA 主干 | -| 论文 | [`paper:llava`](paper_llava.md) | 提供 VLM 底座 | +| 论文 | [`paper:llava`](../paper_llava.md) | 提供 VLM 底座 | | 论文 | [`paper:world_models`](paper_world_models.md) | 提供"在潜空间想象"的早期证据 | -| 论文 | [`paper:gaia1`](paper_gaia1.md) | 提供驾驶世界模型样板 | -| 论文 | [`paper:drivedreamer`](paper_drivedreamer.md) | 提供条件化视频生成器 | +| 论文 | [`paper:gaia1`](../paper_gaia1.md) | 提供驾驶世界模型样板 | +| 论文 | [`paper:drivedreamer`](../paper_drivedreamer.md) | 提供条件化视频生成器 | | 论文 | [`paper:rlhf_dpo`](paper_rlhf_dpo.md) | 提供偏好对齐工具 | ## 推演逻辑 @@ -77,7 +77,7 @@ CF-VLA 是数条范式的交汇点,因此其延伸方向众多: ## 一条值得注意的反例 -CF-VLA 推演链解释了"反事实搜索能提升长尾决策",但**没能预言生成器质量的天花板**。当世界模型对罕见物理交互(如冰面摩擦、紧急避让动力学)的生成不准时,反事实分支本身就是错误样本,反而误导 VLA。这一痛点直接驱动 [Cosmos](paper_cosmos.md) 与 [GAIA-2](paper_gaia1.md) 一类更高保真度世界模型的研究。另一个推演缺口是偏好数据的稀缺:罕见场景下的专家偏好标注既稀少又昂贵,难以支撑 DPO 训练。 +CF-VLA 推演链解释了"反事实搜索能提升长尾决策",但**没能预言生成器质量的天花板**。当世界模型对罕见物理交互(如冰面摩擦、紧急避让动力学)的生成不准时,反事实分支本身就是错误样本,反而误导 VLA。这一痛点直接驱动 [Cosmos](paper_cosmos.md) 与 [GAIA-2](../paper_gaia1.md) 一类更高保真度世界模型的研究。另一个推演缺口是偏好数据的稀缺:罕见场景下的专家偏好标注既稀少又昂贵,难以支撑 DPO 训练。 ## 推荐起步 diff --git a/docs/data/cards/extended/validation_trace_decision_transformer_offline_rl_via_sequence_modeling.md b/docs/data/cards/extended/validation_trace_decision_transformer_offline_rl_via_sequence_modeling.md index 3ad940f..46cffdf 100644 --- a/docs/data/cards/extended/validation_trace_decision_transformer_offline_rl_via_sequence_modeling.md +++ b/docs/data/cards/extended/validation_trace_decision_transformer_offline_rl_via_sequence_modeling.md @@ -6,8 +6,8 @@ | 类别 | 节点 | 角色 | |---|---|---| -| 论文 | [Attention Is All You Need](paper_vaswani2017.md) | 提供 self-attention 与因果掩码 | -| 论文 | [GPT-3](paper_gpt3.md) | 证明自回归 Transformer 在大语料下的 few-shot 能力 | +| 论文 | [Attention Is All You Need](../paper_vaswani2017.md) | 提供 self-attention 与因果掩码 | +| 论文 | [GPT-3](../paper_gpt3.md) | 证明自回归 Transformer 在大语料下的 few-shot 能力 | | 概念 | [Transformer 架构](../../concepts.md#transformer) | 序列建模骨架 | | 概念 | [模仿学习 / BC](../../concepts.md#imitation_learning) | 给出"用监督学习训策略"的基础范式 | | 移动 | [`move:tokenize_modalities`](move_tokenize_modalities.md) | 把状态、动作、奖励统一映射成 token | @@ -24,7 +24,7 @@ $$\mathcal{L}(\theta) = -\mathbb{E}_{(s, a, \hat{R}) \sim \mathcal{D}}\big[\log \pi_\theta(a_t \mid s_{1:t}, a_{1:t-1}, \hat{R}_{1:t})\big]$$ 由 [`insight:human_demonstrations_compress_implicit_reward_function`](insight_human_demonstrations_compress_implicit_reward_function.md) 提供合理性:示教里的奖励已被压缩,给定一个目标 return 即可"反查"对应的动作分布。 -4. **承认 GPT 已经给出现成的自回归 backbone**:由 [GPT-3](paper_gpt3.md) 与 [Vaswani 2017](paper_vaswani2017.md) 提供,把 (return, state, action) 三元组按时间拼接成 token 序列即可。 +4. **承认 GPT 已经给出现成的自回归 backbone**:由 [GPT-3](../paper_gpt3.md) 与 [Vaswani 2017](../paper_vaswani2017.md) 提供,把 (return, state, action) 三元组按时间拼接成 token 序列即可。 5. **用因果掩码做下一个 token 预测**:训练只在动作 token 上算交叉熵或回归损失,因果掩码保证因果性。 6. **测试时条件采样**:给定一个目标 return(通常取数据集 max return),让模型自回归地生成动作。 7. **统一 tokenize 状态、动作、奖励**:由 [`move:tokenize_modalities`](move_tokenize_modalities.md) 提供。连续动作可以直接当作向量 token,也可以离散化。 @@ -36,7 +36,7 @@ $$\mathcal{L}(\theta) = -\mathbb{E}_{(s, a, \hat{R}) \sim \mathcal{D}}\big[\log * [Trajeglish](paper_trajeglish.md) 与 [MoST](paper_most_simagents.md) 把 Decision Transformer 的思路推广到多 agent 驾驶场景。 * [CodeTrajectory](paper_codetraj.md) 把"动作 token"换成可执行代码片段。 -* [Diffuser](paper_diffuser.md) 用扩散模型替代自回归 Transformer,从离散 token 切到连续扩散。 +* [Diffuser](../paper_diffuser.md) 用扩散模型替代自回归 Transformer,从离散 token 切到连续扩散。 * 驾驶基础模型([基础模型零样本驾驶 agent](paradigm_foundation_model_zero_shot_driving_agent.md))的 backbone 选择基本沿用这条路线。 ## 一条值得注意的反例 @@ -47,4 +47,4 @@ Decision Transformer 在 D4RL antmaze 这样的稀疏奖励长视野任务上表 1. 读 [Chen 等 2021 Decision Transformer 论文](paper_decision_transformer.md)。 2. 实现一个最小 DT:D4RL halfcheetah-medium 数据集 + 4 层 GPT + return-to-go token,对比 BC 与 CQL。 -3. 阅读 [Trajectory Transformer](paper_trajectory_transformer.md) 与 [Diffuser](paper_diffuser.md),理解自回归与扩散两条具体路径。 +3. 阅读 [Trajectory Transformer](paper_trajectory_transformer.md) 与 [Diffuser](../paper_diffuser.md),理解自回归与扩散两条具体路径。 diff --git a/docs/data/cards/extended/validation_trace_diffusion_policy_as_score_based_action_sampler.md b/docs/data/cards/extended/validation_trace_diffusion_policy_as_score_based_action_sampler.md index 3625c26..c8da57e 100644 --- a/docs/data/cards/extended/validation_trace_diffusion_policy_as_score_based_action_sampler.md +++ b/docs/data/cards/extended/validation_trace_diffusion_policy_as_score_based_action_sampler.md @@ -6,7 +6,7 @@ | 类别 | 节点 | 角色 | |---|---|---| -| 论文 | [`paper:diffuser`](paper_diffuser.md) | 提供"轨迹本身就是被去噪的对象"这一关键例证 | +| 论文 | [`paper:diffuser`](../paper_diffuser.md) | 提供"轨迹本身就是被去噪的对象"这一关键例证 | | 概念 | DDPM / 反向去噪扩散 | 提供从 $x_T \sim \mathcal{N}(0,I)$ 反推 $x_0$ 的生成范式 | | 概念 | [`concept:imitation_learning`](../../concepts.md#模仿学习) | 提供状态—动作配对数据来源 | | 移动 | [`move:diffusion_denoise_sampling`](move_diffusion_denoise_sampling.md) | 把生成动作改写成迭代去噪 | @@ -20,7 +20,7 @@ 1. 起点:模仿学习常见的高斯或确定性策略 $\pi_\theta(a\mid o) = \mathcal{N}(\mu_\theta(o), \Sigma)$ 在多模态数据上失败,因为"左变道"和"右变道"被平均成"直行偏左"。 2. 由 `problem:multimodal_behavior_collapses_under_unimodal_policy` 推出:需要一个能表达任意分布的 conditional sampler。 3. 由 [`insight:diffusion_unifies_generation_and_decision`](insight_diffusion_unifies_generation_and_decision.md) 推出:图像生成里的扩散已经是 conditional sampler,那么动作也可以这样建模。 -4. 由 [`paper:diffuser`](paper_diffuser.md) 提供存在性证明:扩散模型可以学会生成可行的未来轨迹序列,那它也能生成短时动作。 +4. 由 [`paper:diffuser`](../paper_diffuser.md) 提供存在性证明:扩散模型可以学会生成可行的未来轨迹序列,那它也能生成短时动作。 5. 由 [`move:diffusion_denoise_sampling`](move_diffusion_denoise_sampling.md) 给出具体形式:训练一个去噪网络 $\epsilon_\theta(a^{(k)}, o, k)$,迭代地把噪声 $a^{(K)}$ 去噪成动作 $a^{(0)}$。损失即标准 DDPM: $$\mathcal{L}_\text{DP} = \mathbb{E}_{(o,a)\sim D,\,k,\,\epsilon}\Big[\|\epsilon - \epsilon_\theta\big(\sqrt{\bar\alpha_k}\,a + \sqrt{1-\bar\alpha_k}\,\epsilon,\,o,\,k\big)\|^2\Big]$$ @@ -38,7 +38,7 @@ $$\mathcal{L}_\text{DP} = \mathbb{E}_{(o,a)\sim D,\,k,\,\epsilon}\Big[\|\epsilon - 在测试时通过 classifier-free guidance 调节"避碰倾向"或"激进程度"。 - 与世界模型耦合后,每一步去噪可以在 [`paradigm:model_based_world_imagination_planning`](paradigm_model_based_world_imagination_planning.md) 的 latent rollout 上做评估,把硬约束做成 guidance score。 -下游可直接通往 [VADv2](paper_vadv2.md) 的概率式轨迹分布、[CF-VLA](paper_2512.24426_cfvla.md) 的反事实重规划,以及把扩散作为 [Sora](paper_sora.md) 一样的物理引擎使用的 [`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md)。 +下游可直接通往 [VADv2](../paper_vadv2.md) 的概率式轨迹分布、[CF-VLA](paper_2512.24426_cfvla.md) 的反事实重规划,以及把扩散作为 [Sora](paper_sora.md) 一样的物理引擎使用的 [`insight:world_model_video_diffusion_is_implicit_physics_engine`](insight_world_model_video_diffusion_is_implicit_physics_engine.md)。 ## 一条值得注意的反例 @@ -49,5 +49,5 @@ Diffusion Policy 在实时控制下的最大隐患是"多模态优势 vs 模式 ## 推荐起步 1. 复现一个最小 Diffusion Policy:玩具 2D push-T 任务 + 128 维去噪网络 + 10 步采样,跑出 BC 高斯策略打不过的多模态行为。 -2. 读 [Diffuser](paper_diffuser.md) 与 [`insight:diffusion_unifies_generation_and_decision`](insight_diffusion_unifies_generation_and_decision.md),对照"轨迹去噪"与"动作去噪"。 -3. 把 Diffusion Policy 套到 [Bench2Drive](benchmarks_ad.md) 的最小闭环,验证它在多模态车道选择上是否真的优于回归 baseline。 +2. 读 [Diffuser](../paper_diffuser.md) 与 [`insight:diffusion_unifies_generation_and_decision`](insight_diffusion_unifies_generation_and_decision.md),对照"轨迹去噪"与"动作去噪"。 +3. 把 Diffusion Policy 套到 [Bench2Drive](../benchmarks_ad.md) 的最小闭环,验证它在多模态车道选择上是否真的优于回归 baseline。 diff --git a/docs/data/cards/extended/validation_trace_few_shot_in_context_learning_at_scale.md b/docs/data/cards/extended/validation_trace_few_shot_in_context_learning_at_scale.md index 79b1026..6696297 100644 --- a/docs/data/cards/extended/validation_trace_few_shot_in_context_learning_at_scale.md +++ b/docs/data/cards/extended/validation_trace_few_shot_in_context_learning_at_scale.md @@ -14,7 +14,7 @@ | 移动 | `move:remove_finetuning_use_in_context_only` | 关键飞跃:直接用 prompt 而不更新参数 | | 洞察 | [`insight:scaling_laws_predict_capability_emergence`](insight_scaling_laws_predict_capability_emergence.md) | 给出投资算力的依据 | | 洞察 | [`insight:scaling_data_unlocks_capabilities_not_present_in_smaller_models`](insight_scaling_data_unlocks_capabilities_not_present_in_smaller_models.md) | 解释为什么 175B 比 1.5B 不只是"更准" | -| 论文 | [`paper:vaswani2017`](paper_vaswani2017.md) | 提供 transformer 主干 | +| 论文 | [`paper:vaswani2017`](../paper_vaswani2017.md) | 提供 transformer 主干 | | 论文 | `paper:bert` | 验证大规模预训练的可行性 | | 论文 | `paper:gpt2` | 证明零样本能力初现 | | 文章 | [`essay:bitter_lesson`](essay_bitter_lesson.md) | 提供"放大算力而非堆叠先验"的方法论 | @@ -22,7 +22,7 @@ ## 推演逻辑 -1. **接受 transformer + 自回归 LM 是稳定配方**:由 [`paper:vaswani2017`](paper_vaswani2017.md) 与 GPT-2 共同验证,无需再质疑架构。 +1. **接受 transformer + 自回归 LM 是稳定配方**:由 [`paper:vaswani2017`](../paper_vaswani2017.md) 与 GPT-2 共同验证,无需再质疑架构。 2. **认定预训练是免费午餐**:BERT 把"预训练 + 微调"打成行业默认,GPT-2 进一步暗示零样本能力随规模上升。 3. **从 scaling laws 读出投资曲线**:[`insight:scaling_laws_predict_capability_emergence`](insight_scaling_laws_predict_capability_emergence.md) 告诉你损失对 (D, P, C) 呈幂律下降,且当前曲线远未触底。 4. **决定扩展规模而非堆叠先验**:[`essay:bitter_lesson`](essay_bitter_lesson.md) 给出方法论的勇气,扩到 175B 不再是疯狂赌博。 @@ -48,6 +48,6 @@ GPT-3 推演链揭示了 scaling 的力量,但**没能预言它对几何精度 ## 推荐起步 -1. 读 [GPT-3 卡片](paper_gpt3.md) 与原论文表 3.2,亲眼看一次 few-shot 在不同规模下的能力跃升。 +1. 读 [GPT-3 卡片](../paper_gpt3.md) 与原论文表 3.2,亲眼看一次 few-shot 在不同规模下的能力跃升。 2. 在开源 [LLaMA](paper_llama.md) 7B / 13B / 70B 三档上复现一个最小 in-context 学习实验,体会规模门槛。 3. 顺着 [`insight:in_context_learning_emerges_at_scale`](insight_in_context_learning_emerges_at_scale.md) 与 [`validation:trace_llm_decision_agent_for_driving`](validation_trace_llm_decision_agent_for_driving.md) 走完一条"GPT-3 → Agent-Driver"的推演链。 diff --git a/docs/data/cards/extended/validation_trace_image_transformer_via_patch_tokenization.md b/docs/data/cards/extended/validation_trace_image_transformer_via_patch_tokenization.md index 6916e2e..fc096ce 100644 --- a/docs/data/cards/extended/validation_trace_image_transformer_via_patch_tokenization.md +++ b/docs/data/cards/extended/validation_trace_image_transformer_via_patch_tokenization.md @@ -6,7 +6,7 @@ | 类别 | 节点 | 在再发明里的角色 | |---|---|---| -| 论文 | [Attention Is All You Need](paper_vaswani2017.md) | 提供 self-attention 与 multi-head 的标准实现 | +| 论文 | [Attention Is All You Need](../paper_vaswani2017.md) | 提供 self-attention 与 multi-head 的标准实现 | | 概念 | Transformer 架构 | 把序列建模写成层叠 attention + FFN 的范式 | | 概念 | Self-attention | 让序列任意两位置 $O(1)$ 距离通信 | | 移动 | [Patch 分块与 token 化](move_patchify_tokenization.md) | 把 2D 像素网格压成 1D token 序列 | @@ -32,10 +32,10 @@ ViT 不只是一个 backbone,而是把"任何视觉任务都可以走 transformer 路线"这一普适性带进自动驾驶: -- → [DETR](paper_carion2020.md) 把 ViT 思想接到检测头上,用 query 输出对象集合。 -- → [BEVFormer](paper_li2022bevformer.md) 把 patch 替换成 BEV 格点,得到时空 BEV transformer。 +- → [DETR](../paper_carion2020.md) 把 ViT 思想接到检测头上,用 query 输出对象集合。 +- → [BEVFormer](../paper_li2022bevformer.md) 把 patch 替换成 BEV 格点,得到时空 BEV transformer。 - → [DINOv2 / DINOv3](paper_2508.10104_dinov3.md) 把 ViT 作为自监督的对象,得到通用视觉特征。 -- → [SAM](paper_sam.md) 用 ViT 主干做提示式分割,直接给自动驾驶提供开放词汇感知。 +- → [SAM](../paper_sam.md) 用 ViT 主干做提示式分割,直接给自动驾驶提供开放词汇感知。 - → [UniAD](paper_2212.10156_uniad.md) 沿 ViT → DETR → BEVFormer 链条把整个驾驶栈端到端化。 ## 一条值得注意的反例 @@ -44,6 +44,6 @@ ViT 在 ImageNet 直接训练时,精度长期不如同等参数量的 ResNet。T ## 推荐起步 -1. 读 [ViT 卡片](paper_vit.md) 与 [Transformer 卡片](paper_vaswani2017.md),把 patch embedding 实现写一遍。 +1. 读 [ViT 卡片](../paper_vit.md) 与 [Transformer 卡片](../paper_vaswani2017.md),把 patch embedding 实现写一遍。 2. 跑 [`labs/lab05_dinov3_features_minidata`](../../../labs/lab05_dinov3_features_minidata.ipynb),用 DINOv3 ViT 主干在小数据上看 linear probing 效果。 -3. 把 ViT 主干接到 [BEVFormer](paper_li2022bevformer.md) 上,观察换主干对 mAP / NDS 的边际收益。 +3. 把 ViT 主干接到 [BEVFormer](../paper_li2022bevformer.md) 上,观察换主干对 mAP / NDS 的边际收益。 diff --git a/docs/data/cards/extended/validation_trace_llm_decision_agent_for_driving.md b/docs/data/cards/extended/validation_trace_llm_decision_agent_for_driving.md index ebeca05..7557106 100644 --- a/docs/data/cards/extended/validation_trace_llm_decision_agent_for_driving.md +++ b/docs/data/cards/extended/validation_trace_llm_decision_agent_for_driving.md @@ -15,7 +15,7 @@ | 洞察 | [`insight:in_context_learning_emerges_at_scale`](insight_in_context_learning_emerges_at_scale.md) | 让 LM 无需重新训练即可学新场景 | | 洞察 | [`insight:agent_loop_is_just_iterated_conditional_generation`](insight_agent_loop_is_just_iterated_conditional_generation.md) | 提供 agent 框架的最小心智模型 | | 洞察 | `insight:symbolic_intermediate_enables_interpretability_and_transfer` | 解释为何符号中间层值得保留 | -| 论文 | [`paper:gpt3`](paper_gpt3.md) | 提供主干 LM 与 few-shot 能力 | +| 论文 | [`paper:gpt3`](../paper_gpt3.md) | 提供主干 LM 与 few-shot 能力 | | 论文 | [`paper:react`](paper_react.md) | 提供 reason-act-observe 循环 | | 论文 | [`paper:2210.14222_plant`](paper_2210.14222_plant.md) | 提供对象级规划接口的前驱 | | 论文 | [`paper:rlhf_dpo`](paper_rlhf_dpo.md) | 提供把 LM 偏好对齐到驾驶习惯的方法 | diff --git a/docs/data/cards/extended/validation_trace_modular_perception_pipeline_with_bev_fusion.md b/docs/data/cards/extended/validation_trace_modular_perception_pipeline_with_bev_fusion.md index 3dc5321..7c25aa1 100644 --- a/docs/data/cards/extended/validation_trace_modular_perception_pipeline_with_bev_fusion.md +++ b/docs/data/cards/extended/validation_trace_modular_perception_pipeline_with_bev_fusion.md @@ -6,9 +6,9 @@ | 类别 | 节点 | 在再发明里的角色 | |---|---|---| -| 论文 | [ViT](paper_vit.md) | 提供可扩展的图像编码主干 | -| 论文 | [DETR](paper_carion2020.md) | 提供 BEV 上的稀疏对象输出接口 | -| 论文 | [BEVFormer](paper_li2022bevformer.md) | 提供 BEV query 的时空 cross-attention 模式(作为家族成员) | +| 论文 | [ViT](../paper_vit.md) | 提供可扩展的图像编码主干 | +| 论文 | [DETR](../paper_carion2020.md) | 提供 BEV 上的稀疏对象输出接口 | +| 论文 | [BEVFormer](../paper_li2022bevformer.md) | 提供 BEV query 的时空 cross-attention 模式(作为家族成员) | | 论文 | [Tesla AI Day](paper_tesla_ai_day.md) | 提供工业级 BEV 融合的工程证据 | | 概念 | BEV 感知 | 提供鸟瞰图作为统一坐标系的需求 | | 概念 | DETR object query | 让对象输出可微 | @@ -28,7 +28,7 @@ 其中 $G$ 是 BEV 特征,$\mathcal{P}_i^{-1}$ 是第 $i$ 路相机的反向投影。 5. **把 BEV 特征作为模块化接口公开**:下游检测器、分割头、占用头、规划头都从 $G$ 出发。这一动作让整个流水线既保留模块化优势(可独立测试、可独立替换),又保留端到端可微的潜力(损失可一路反传到主干)。 6. **加入多模态融合**:LiDAR 点云通过 PointPillars 之类的栅格编码也变成 BEV 特征,与相机 BEV 直接 concat 或 cross-attend,见 [BEVFusion](paper_bevfusion.md)。这是 [在共享中间表示空间中融合多种模态](move_fuse_modalities_in_shared_intermediate_space.md) 的具体落地。 -7. **设计 BEV 检测头**:沿用 [DETR](paper_carion2020.md) 的 query + 匈牙利匹配,或沿用 CenterPoint 的 anchor-free 中心点回归。两条路径互为家族。 +7. **设计 BEV 检测头**:沿用 [DETR](../paper_carion2020.md) 的 query + 匈牙利匹配,或沿用 CenterPoint 的 anchor-free 中心点回归。两条路径互为家族。 8. **加显式深度监督加速收敛**:由 BEVDepth、BEVDet 提供。LiDAR 提供的稀疏深度真值用来监督 $p_i(d \mid u, v)$,把可微升维从"随机初始化"提速到"几个 epoch 就有合理深度估计"。 9. **接到工业流水线里**:用 [Tesla AI Day](paper_tesla_ai_day.md) 提供的占用栅格头作为最后一步,把 BEV 特征翻译为下游规划可直接消费的格式。 @@ -39,7 +39,7 @@ LSS / BEVFormer 家族是当下绝大多数纯相机或相机为主的工业栈的事实标准: - → [UniAD](paper_2212.10156_uniad.md) 把这条流水线进一步端到端化,让 BEV 特征不仅出检测,还直接出规划。 -- → [VADv2](paper_vadv2.md) 用向量化 BEV 跳过显式格点。 +- → [VADv2](../paper_vadv2.md) 用向量化 BEV 跳过显式格点。 - → [StreamPETR](paper_streampetr.md) 在 BEV 上做流式时序聚合。 - → 占用网络家族(Tesla / SurroundOcc / OccWorld)把 BEV 进一步堆成 3D 体素。 - → [DrivingGaussian](paper_drivinggaussian.md) 把 BEV 特征作为初始化,再去优化 3D 高斯。 @@ -50,6 +50,6 @@ BEV 融合的工业部署里,**逐像素深度分布是最脆弱的环节**。 ## 推荐起步 -1. 读 [LSS](paper_lift_splat_shoot.md) 与 [BEVFormer](paper_li2022bevformer.md) 两篇,把"升维"这一动作的两种实现并列阅读。 +1. 读 [LSS](paper_lift_splat_shoot.md) 与 [BEVFormer](../paper_li2022bevformer.md) 两篇,把"升维"这一动作的两种实现并列阅读。 2. 在 nuScenes mini 上复现 LSS 的 splat 步骤,可视化 BEV 特征图,理解深度分布对结果的敏感性。 -3. 把这一流水线接到 [DETR](paper_carion2020.md) 检测头与 [DINOv3](paper_2508.10104_dinov3.md) 主干上,跟 [BEV transformer 再发现](validation_trace_bird_eye_view_transformer_with_temporal_aggregation.md) 的形状做对照。 +3. 把这一流水线接到 [DETR](../paper_carion2020.md) 检测头与 [DINOv3](paper_2508.10104_dinov3.md) 主干上,跟 [BEV transformer 再发现](validation_trace_bird_eye_view_transformer_with_temporal_aggregation.md) 的形状做对照。 diff --git a/docs/data/cards/extended/validation_trace_neural_field_for_dynamic_driving_scene.md b/docs/data/cards/extended/validation_trace_neural_field_for_dynamic_driving_scene.md index b2e553a..2cb8f59 100644 --- a/docs/data/cards/extended/validation_trace_neural_field_for_dynamic_driving_scene.md +++ b/docs/data/cards/extended/validation_trace_neural_field_for_dynamic_driving_scene.md @@ -8,10 +8,10 @@ |---|---|---| | 论文 | [NeRF](paper_nerf.md) | 提供把场景写成隐式辐射场的基本数学 | | 论文 | [3D Gaussian Splatting](paper_3dgs.md) | 提供显式可微表示与 CUDA 栅格化加速 | -| 论文 | [ViT](paper_vit.md) | 提供给重建用的强 2D 主干 | -| 论文 | [SAM](paper_sam.md) | 提供动静分割的零样本能力 | -| 论文 | [GAIA-1](paper_gaia1.md) | 给出生成式视频世界模型的对位路径 | -| 论文 | [DriveDreamer](paper_drivedreamer.md) | 给出条件化视频扩散的工程证据 | +| 论文 | [ViT](../paper_vit.md) | 提供给重建用的强 2D 主干 | +| 论文 | [SAM](../paper_sam.md) | 提供动静分割的零样本能力 | +| 论文 | [GAIA-1](../paper_gaia1.md) | 给出生成式视频世界模型的对位路径 | +| 论文 | [DriveDreamer](../paper_drivedreamer.md) | 给出条件化视频扩散的工程证据 | | 论文 | [World Models](paper_world_models.md) | 提供"压缩世界 - 在压缩里规划"这一研究取向 | | 移动 | [潜空间想象 rollout](move_latent_imagination_rollout.md) | 给出仿真引擎使用神经场的接口模式 | | 洞察 | [可微渲染是一种通用的反问题求解器](insight_differentiable_rendering_is_universal_inverse_solver.md) | 解释为什么可以从图像反推场景 | @@ -30,7 +30,7 @@ 5. **用 3D Gaussian Splatting 替换隐式 MLP**:由 [3DGS](paper_3dgs.md) 提供的显式椭球表示让渲染速度提升两个数量级,这对驾驶场景的大空间尺度至关重要。DrivingGaussian 选了这条。 6. **配套 LiDAR 深度作为强先验**:稀疏 LiDAR 给出每条射线的部分深度真值,直接监督 NeRF 的 depth-rendering 或 GS 的 z-buffer,极大缩短优化时间。 7. **设计仿真用接口**:重建出来的场不应只能在原采样轨迹上渲染,而应能在新轨迹、新光照、新物体配置下产出训练或测试图像。这一动作把神经重建从"展品"升级为"仿真引擎",符合 [长尾问题靠合成而非单纯增加数据解决](insight_long_tail_solved_by_synthesis_not_data_alone.md) 的承诺。 -8. **接到世界模型的接口上**:渲染输出可以作为 [世界模型让规划在想象中进行](insight_world_models_let_planning_be_done_in_imagination.md) 路径里的"环境",也可以作为生成式 [GAIA-1](paper_gaia1.md) 的训练数据来源。 +8. **接到世界模型的接口上**:渲染输出可以作为 [世界模型让规划在想象中进行](insight_world_models_let_planning_be_done_in_imagination.md) 路径里的"环境",也可以作为生成式 [GAIA-1](../paper_gaia1.md) 的训练数据来源。 → 必然得到 EmerNeRF / DrivingGaussian:动静解耦 + 可微渲染 + LiDAR 深度监督 + 可重新采样的轨迹接口。 @@ -45,7 +45,7 @@ ## 一条值得注意的反例 -动态驾驶场重建的成功在路径外推上有硬墙。一旦渲染相机偏离原采集轨迹两到三米,渲染质量急剧下降,远端建筑出现"果冻"形变。这并非工程实现问题,而是观测覆盖不足的根本约束:重建只能在被观测过的视锥附近高保真。这意味着把神经场作为"完全可漫游的仿真器"是不现实的,它更适合作为"原轨迹邻域的高保真扰动器"。这一限制把它跟 [GAIA-1](paper_gaia1.md) 这种生成式世界模型分到了互补而非替代的位置。 +动态驾驶场重建的成功在路径外推上有硬墙。一旦渲染相机偏离原采集轨迹两到三米,渲染质量急剧下降,远端建筑出现"果冻"形变。这并非工程实现问题,而是观测覆盖不足的根本约束:重建只能在被观测过的视锥附近高保真。这意味着把神经场作为"完全可漫游的仿真器"是不现实的,它更适合作为"原轨迹邻域的高保真扰动器"。这一限制把它跟 [GAIA-1](../paper_gaia1.md) 这种生成式世界模型分到了互补而非替代的位置。 ## 推荐起步 diff --git a/docs/data/cards/extended/validation_trace_scalable_self_supervised_vision_backbone.md b/docs/data/cards/extended/validation_trace_scalable_self_supervised_vision_backbone.md index 8211312..2f945a7 100644 --- a/docs/data/cards/extended/validation_trace_scalable_self_supervised_vision_backbone.md +++ b/docs/data/cards/extended/validation_trace_scalable_self_supervised_vision_backbone.md @@ -6,8 +6,8 @@ | 类别 | 节点 | 在再发明里的角色 | |---|---|---| -| 论文 | [DINOv2](paper_dinov2.md) | 提供自蒸馏 + 多裁剪 + 多视图的稳定配方 | -| 论文 | [ViT](paper_vit.md) | 提供可扩展架构,参数量与算力线性挂钩 | +| 论文 | [DINOv2](../paper_dinov2.md) | 提供自蒸馏 + 多裁剪 + 多视图的稳定配方 | +| 论文 | [ViT](../paper_vit.md) | 提供可扩展架构,参数量与算力线性挂钩 | | 论文 | [MAE](paper_mae.md) | 提供掩码图像建模作为辅助监督的范式 | | 概念 | 自监督学习 | 把"结构本身就是监督"作为研究取向 | | 移动 | [掩码预测自监督任务](move_masking_for_pretext.md) | 让无标注图像直接给出训练信号 | @@ -36,8 +36,8 @@ DINOv3 的真正价值不在 ImageNet 精度,而在于它作为可冻结基座给驾驶感知的边际收益: - **冻结特征 + 轻量 head** → 在 nuScenes 检测、Cityscapes 分割上做线性 probing 即接近 SOTA。 -- **半监督 BEV finetune** → 用 1/10 标签量微调 [BEVFormer](paper_li2022bevformer.md),mAP 接近全量监督。 -- **零样本开放词汇感知** → 配合 [SAM](paper_sam.md) 的 mask + [CLIP](paper_clip.md) 的文本嵌入,可在不重训的情况下识别长尾物体。 +- **半监督 BEV finetune** → 用 1/10 标签量微调 [BEVFormer](../paper_li2022bevformer.md),mAP 接近全量监督。 +- **零样本开放词汇感知** → 配合 [SAM](../paper_sam.md) 的 mask + [CLIP](paper_clip.md) 的文本嵌入,可在不重训的情况下识别长尾物体。 - **驾驶领域适配** → 在百万小时未标注驾驶视频上继续 DINOv3 风格的自监督训练,得到驾驶专用的基础视觉主干。 ## 一条值得注意的反例 @@ -46,6 +46,6 @@ DINOv3 这类全局自监督特征在"小物体精细定位"上仍弱于专门 ## 推荐起步 -1. 读 [DINOv3 卡片](paper_2508.10104_dinov3.md) 与 [DINOv2 卡片](paper_dinov2.md),并对照 [MAE 卡片](paper_mae.md)。 +1. 读 [DINOv3 卡片](paper_2508.10104_dinov3.md) 与 [DINOv2 卡片](../paper_dinov2.md),并对照 [MAE 卡片](paper_mae.md)。 2. 跑 [`labs/lab05_dinov3_features_minidata`](../../../labs/lab05_dinov3_features_minidata.ipynb),把冻结 DINOv3 特征在小数据上做 linear probing。 3. 把同一份特征接到一个最小 BEV 头上,跟从头训练的同等参数量 ResNet 主干做对照,亲手验证 [基础模型预训练把数据与任务解耦](insight_foundation_pretraining_decouples_data_from_task.md) 的具体数字。 diff --git a/docs/data/cards/extended/validation_trace_self_attention_replaces_recurrence.md b/docs/data/cards/extended/validation_trace_self_attention_replaces_recurrence.md index e05d13e..092a4e3 100644 --- a/docs/data/cards/extended/validation_trace_self_attention_replaces_recurrence.md +++ b/docs/data/cards/extended/validation_trace_self_attention_replaces_recurrence.md @@ -38,7 +38,7 @@ - 注意力 = 跨格点的查询 - 残差 = 跨任务头的残差注入 -→ 自然得到 [BEVFormer](paper_li2022bevformer.md) / [DETR3D](paper_detr3d.md) / [UniAD](paper_2212.10156_uniad.md) 这条线。 +→ 自然得到 [BEVFormer](../paper_li2022bevformer.md) / [DETR3D](paper_detr3d.md) / [UniAD](paper_2212.10156_uniad.md) 这条线。 ## 它没能预测到什么 @@ -46,6 +46,6 @@ Transformer 的 in-context learning 能力、scaling laws、emergent capabilitie ## 推荐起步 -1. [Transformer 卡片](paper_vaswani2017.md) → [ViT 卡片](paper_vit.md) → [DETR 卡片](paper_carion2020.md)。 +1. [Transformer 卡片](../paper_vaswani2017.md) → [ViT 卡片](../paper_vit.md) → [DETR 卡片](../paper_carion2020.md)。 2. 跑一个最小 attention 实现,对比一个最小 LSTM 实现,体会两条路线的内在差异。 -3. 把这条推演投到 BEV 感知上,看你怎样独立再发明 [BEVFormer](paper_li2022bevformer.md)。 +3. 把这条推演投到 BEV 感知上,看你怎样独立再发明 [BEVFormer](../paper_li2022bevformer.md)。 diff --git a/docs/data/cards/extended/validation_trace_set_prediction_with_object_queries.md b/docs/data/cards/extended/validation_trace_set_prediction_with_object_queries.md index 64a0349..6bd741f 100644 --- a/docs/data/cards/extended/validation_trace_set_prediction_with_object_queries.md +++ b/docs/data/cards/extended/validation_trace_set_prediction_with_object_queries.md @@ -35,7 +35,7 @@ graph TB | 类别 | 节点 | 角色 | |---|---|---| -| 论文 | [`paper:vaswani2017`](paper_vaswani2017.md) | 提供 self-attention 与 cross-attention | +| 论文 | [`paper:vaswani2017`](../paper_vaswani2017.md) | 提供 self-attention 与 cross-attention | | 概念 | [`concept:transformer`](../../concepts.md#transformer) | 序列范式 | | 移动 | `move:treat_detection_as_set_prediction_with_learnable_queries` | 核心推演动作 | | 移动 | `move:set_prediction_with_hungarian` | 二分图匹配损失,去掉 NMS | @@ -51,16 +51,16 @@ graph TB 3. **重新定义任务**:把检测改写为 *从图像产生一个固定大小的对象集合*。 4. **采用 Hungarian 匹配作为损失**:由 `move:set_prediction_with_hungarian` 提供。 5. **用 query 作为"我关心的实体"**:由 `move:treat_detection_as_set_prediction_with_learnable_queries` 给出。 -6. **用 cross-attention 把 query 与图像特征连接**:由 [`paper:vaswani2017`](paper_vaswani2017.md) 提供基础。 +6. **用 cross-attention 把 query 与图像特征连接**:由 [`paper:vaswani2017`](../paper_vaswani2017.md) 提供基础。 → 必然得到 DETR:一个固定数量的 query 通过 cross-attention 与 CNN 特征对话,输出对象集合,匹配损失代替 NMS。 ## 这条 trace 在自动驾驶里的延伸 DETR 提供了"对象 query 作为可微输出接口"这一基本积木: -- → [BEVFormer](paper_li2022bevformer.md) 的 BEV query +- → [BEVFormer](../paper_li2022bevformer.md) 的 BEV query - → [UniAD](paper_2212.10156_uniad.md) 的多任务共享 query -- → [VADv2](paper_vadv2.md) 的向量化 query +- → [VADv2](../paper_vadv2.md) 的向量化 query - → [PlanT](paper_2210.14222_plant.md) 的对象 token 如果没有 DETR 这一支路,端到端自动驾驶很可能还停在"anchor + NMS + 多任务串联"的老结构里。 @@ -71,6 +71,6 @@ DETR 的训练效率长期不如 anchor-based 方法,这也是为什么后来 ## 推荐起步 -1. [DETR 卡片](paper_carion2020.md) 与 [`concept:detr_query`](../../concepts.md#detr-query)。 +1. [DETR 卡片](../paper_carion2020.md) 与 [`concept:detr_query`](../../concepts.md#detr-query)。 2. 写一个 50 行的简化 DETR:一组 16 个 query + 一个小 transformer + Hungarian loss。 3. 跑 [`labs/lab03_uniad_query_intuition`](../../../labs/lab03_uniad_query_intuition.ipynb) 看 query 如何在 BEV 上工作。 diff --git a/docs/data/cards/extended/validation_trace_unified_planning_oriented_e2e_driving.md b/docs/data/cards/extended/validation_trace_unified_planning_oriented_e2e_driving.md index 7cd3de2..012ee46 100644 --- a/docs/data/cards/extended/validation_trace_unified_planning_oriented_e2e_driving.md +++ b/docs/data/cards/extended/validation_trace_unified_planning_oriented_e2e_driving.md @@ -70,8 +70,8 @@ BC + 协变量偏移──prereq──┤ 下面这张清单是用图谱里其它节点来表达的判据,研究者可以用它做自查: -1. **BEV 几何投影** — 由 [`paper:li2022bevformer`](paper_li2022bevformer.md) 与 [`concept:bev`](../../concepts.md#bev感知) 描述,知道如何把六路图像 lift 到 BEV。 -2. **可微的输出接口** — 由 [`paper:carion2020`](paper_carion2020.md)、[`concept:detr_query`](../../concepts.md#detr-query) 与 `move:set_prediction_with_hungarian` 提供。 +1. **BEV 几何投影** — 由 [`paper:li2022bevformer`](../paper_li2022bevformer.md) 与 [`concept:bev`](../../concepts.md#bev感知) 描述,知道如何把六路图像 lift 到 BEV。 +2. **可微的输出接口** — 由 [`paper:carion2020`](../paper_carion2020.md)、[`concept:detr_query`](../../concepts.md#detr-query) 与 `move:set_prediction_with_hungarian` 提供。 3. **query 之间的横向通信** — 由 `move:cross_attention_query` 提供,配套 `insight:attention_is_typed_entity_communication`。 4. **多任务共享中介** — 由 `move:make_pipeline_differentiable_via_shared_latent` 提供。 5. **规划损失作为顶层指挥** — 由 `move:set_planning_loss_as_top_objective` 提供。 @@ -82,7 +82,7 @@ BC + 协变量偏移──prereq──┤ * **找空白**:当一个新方向的"推演溯源"链中缺少某个 move 或 insight,往往说明那是一个公认未被破解的开放问题。 * **测信号**:研究者把自家方法投到这份清单上,可以直接读出"我用上了哪些通用 move、躲开了哪些 insight"。 -* **找邻居**:UniAD 的同型替换链 → [PlanT](paper_2210.14222_plant.md)(保留 query 但去掉 BEV)、[VADv2](paper_vadv2.md)(保留端到端但用向量化)、[DriveVLM](paper_2402.12289_drivevlm.md)(把语言模型接到 UniAD 的接口之上)。 +* **找邻居**:UniAD 的同型替换链 → [PlanT](paper_2210.14222_plant.md)(保留 query 但去掉 BEV)、[VADv2](../paper_vadv2.md)(保留端到端但用向量化)、[DriveVLM](paper_2402.12289_drivevlm.md)(把语言模型接到 UniAD 的接口之上)。 ## 没出现在清单里、但很容易以为该出现的东西 diff --git a/docs/data/cards/extended/validation_trace_vision_language_action_dual_loop.md b/docs/data/cards/extended/validation_trace_vision_language_action_dual_loop.md index 6cb466b..232f36d 100644 --- a/docs/data/cards/extended/validation_trace_vision_language_action_dual_loop.md +++ b/docs/data/cards/extended/validation_trace_vision_language_action_dual_loop.md @@ -43,8 +43,8 @@ flowchart TD | 洞察 | [`insight:dual_system_fast_slow_loop_marries_reactive_and_deliberative_control`](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md) | 给出双回路的元设计 | | 洞察 | `insight:symbolic_intermediate_enables_interpretability_and_transfer` | 解释 meta-action 接口的必要性 | | 洞察 | [`insight:attention_is_typed_entity_communication`](insight_attention_is_typed_entity_communication.md) | 解释为何 BEV / agent query 可被 VLM 读取 | -| 论文 | [`paper:llava`](paper_llava.md) | 提供 VLM 主干 | -| 论文 | [`paper:li2022bevformer`](paper_li2022bevformer.md) | 提供快回路的 BEV 几何 | +| 论文 | [`paper:llava`](../paper_llava.md) | 提供 VLM 主干 | +| 论文 | [`paper:li2022bevformer`](../paper_li2022bevformer.md) | 提供快回路的 BEV 几何 | | 论文 | [`paper:2212.10156_uniad`](paper_2212.10156_uniad.md) | 提供快回路的端到端规划 | | 论文 | [`paper:2311.10813_agent_driver`](paper_2311.10813_agent_driver.md) | 提供 agent 工具循环的前驱 | @@ -66,7 +66,7 @@ flowchart TD DriveVLM-Dual 的双系统是后续大量工作的母体: - [CF-VLA](paper_2512.24426_cfvla.md) 在慢回路里加入反事实推理与世界模型,让 VLA 闭环到反事实数据生成上。 -- [EMMA](paper_emma.md) 与 [LINGO-2](paper_lingo2.md) 进一步用单一 VLA 主干承担更多任务,挑战双系统的必要性。 +- [EMMA](paper_emma.md) 与 [LINGO-2](../paper_lingo2.md) 进一步用单一 VLA 主干承担更多任务,挑战双系统的必要性。 - [`paradigm:vla_paradigm`](paradigm_vla_paradigm.md) 与 [`paradigm:foundation_model_zero_shot_driving_agent`](paradigm_foundation_model_zero_shot_driving_agent.md) 都把 DriveVLM-Dual 作为代表性家族成员。 - 蒸馏路径:慢回路的 CoT trace 提供高质量监督信号,可用于训练专用快回路头。 diff --git a/docs/data/cards/extended/validation_trace_vision_language_pretrained_dual_encoder.md b/docs/data/cards/extended/validation_trace_vision_language_pretrained_dual_encoder.md index b1edb94..2c02e0c 100644 --- a/docs/data/cards/extended/validation_trace_vision_language_pretrained_dual_encoder.md +++ b/docs/data/cards/extended/validation_trace_vision_language_pretrained_dual_encoder.md @@ -14,8 +14,8 @@ | 移动 | `move:harvest_internet_image_text_pairs` | 互联网图文对作为天然监督 | | 洞察 | `insight:contrastive_alignment_creates_zero_shot_transfer` | 解释为什么对齐空间天生具备零样本能力 | | 洞察 | `insight:foundation_pretraining_decouples_data_from_task` | 解释预训练为何免去任务定制 | -| 论文 | [`paper:vit`](paper_vit.md) | 提供可扩展的图像编码器 | -| 论文 | [`paper:vaswani2017`](paper_vaswani2017.md) | 提供文本编码器 | +| 论文 | [`paper:vit`](../paper_vit.md) | 提供可扩展的图像编码器 | +| 论文 | [`paper:vaswani2017`](../paper_vaswani2017.md) | 提供文本编码器 | | 论文 | `paper:simclr` / `paper:moco` | 验证对比学习在视觉里的稳定性 | | 资源 | 互联网图文配对(>1B 量级) | 让对比目标有足够负样本与多样性 | @@ -36,9 +36,9 @@ CLIP 的对比对齐是后续多模态驾驶模型的隐性前提: -- [LLaVA / Qwen-VL](paper_llava.md) 把 CLIP / SigLIP 视觉塔作为视觉前端,再接到语言模型。 +- [LLaVA / Qwen-VL](../paper_llava.md) 把 CLIP / SigLIP 视觉塔作为视觉前端,再接到语言模型。 - [DriveVLM](paper_2402.12289_drivevlm.md) 主干离不开 CLIP 风格的视觉—语言对齐预训练。 -- [SAM](paper_sam.md) 虽然走分割路线,但其文本提示能力同样依赖 CLIP 一类的对齐空间。 +- [SAM](../paper_sam.md) 虽然走分割路线,但其文本提示能力同样依赖 CLIP 一类的对齐空间。 - 驾驶场景描述、罕见物体检索、长尾标注挖掘都可借 CLIP 嵌入做相似度搜索。 - [`paradigm:foundation_model_axis`](paradigm_foundation_model_axis.md) 把 CLIP 列为视觉—语言两条轴的连接点。 @@ -50,4 +50,4 @@ CLIP 推演链解释了零样本分类与跨模态检索为何成立,但**它 1. 读 [CLIP 卡片](paper_clip.md),重点看零样本分类那一节的表格。 2. 在 OpenCLIP 上跑一次最小检索实验,把驾驶图片库的相似度查询亲手实现一遍。 -3. 顺着 [`paper:llava`](paper_llava.md) → [`paper:2402.12289_drivevlm`](paper_2402.12289_drivevlm.md) 把对齐空间是怎样被驾驶 VLM 继承的看清。 +3. 顺着 [`paper:llava`](../paper_llava.md) → [`paper:2402.12289_drivevlm`](paper_2402.12289_drivevlm.md) 把对齐空间是怎样被驾驶 VLM 继承的看清。 diff --git a/docs/data/cards/extended/validation_trace_world_model_in_latent_imagination.md b/docs/data/cards/extended/validation_trace_world_model_in_latent_imagination.md index ca0953a..ef03508 100644 --- a/docs/data/cards/extended/validation_trace_world_model_in_latent_imagination.md +++ b/docs/data/cards/extended/validation_trace_world_model_in_latent_imagination.md @@ -60,7 +60,7 @@ sequenceDiagram - 模型 = 视频扩散或 latent dynamics transformer - policy = 在 latent 上跑的 RL / planning -结果即 [GAIA-1](paper_gaia1.md) → [DriveDreamer](paper_drivedreamer.md) → [CF-VLA](paper_2512.24426_cfvla.md) 这条线。这一节点 *显式地告诉你*:把图谱里的 RL 经典工具加上 generative video model 的当代成熟工具,你就站到了驾驶世界模型路线的起点。 +结果即 [GAIA-1](../paper_gaia1.md) → [DriveDreamer](../paper_drivedreamer.md) → [CF-VLA](paper_2512.24426_cfvla.md) 这条线。这一节点 *显式地告诉你*:把图谱里的 RL 经典工具加上 generative video model 的当代成熟工具,你就站到了驾驶世界模型路线的起点。 ## 该工作在哪里"还不够" diff --git a/docs/data/generated/round4_driving.json b/docs/data/generated/round4_driving.json index ac448a1..af7494f 100644 --- a/docs/data/generated/round4_driving.json +++ b/docs/data/generated/round4_driving.json @@ -6,7 +6,7 @@ "kind": "paper", "tier": "S", "topic": "e2e_ad", - "year": 1989, + "year": 1988, "label": "ALVINN", "label_zh": "ALVINN(最早的神经网络端到端驾驶)", "summary_zh": "Pomerleau 1989 用单隐层全连接网络把相机图像直接映射到转向,是端到端驾驶与行为克隆的源头;并用合成偏移样本预演了协变量偏移的工程解。" @@ -83,45 +83,170 @@ } ], "edges": [ - {"source": "paper:alvinn", "target": "paradigm:imitation_learning", "rel": "manifests"}, - {"source": "paper:alvinn", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "motivates"}, - {"source": "paper:alvinn", "target": "paradigm:modular_perception_to_planning_pipeline", "rel": "contrasts"}, - {"source": "paper:alvinn", "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "rel": "motivates"}, - - {"source": "paper:lbc", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "manifests"}, - {"source": "paper:lbc", "target": "paper:alvinn", "rel": "extends"}, - {"source": "paper:lbc", "target": "paper:carla_leaderboard", "rel": "validates"}, - {"source": "paper:lbc", "target": "paradigm:imitation_learning", "rel": "manifests"}, - - {"source": "paper:tcp_carla", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "manifests"}, - {"source": "paper:tcp_carla", "target": "paper:lbc", "rel": "extends"}, - {"source": "paper:tcp_carla", "target": "paper:carla_leaderboard", "rel": "validates"}, - {"source": "paper:tcp_carla", "target": "paper:bench2drive", "rel": "feeds"}, - - {"source": "paper:gameformer", "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", "rel": "covers"}, - {"source": "paper:gameformer", "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "rel": "manifests"}, - {"source": "paper:gameformer", "target": "paradigm:differentiable_end_to_end_imitation", "rel": "extends"}, - {"source": "paper:gameformer", "target": "paper:tcp_carla", "rel": "contrasts"}, - - {"source": "paper:drivegpt", "target": "paradigm:foundation_model_zero_shot_driving_agent", "rel": "manifests"}, - {"source": "paper:drivegpt", "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", "rel": "manifests"}, - {"source": "paper:drivegpt", "target": "paper:gpt3", "rel": "extends"}, - {"source": "paper:drivegpt", "target": "paradigm:imitation_learning", "rel": "composes"}, - {"source": "paper:drivegpt", "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "rel": "manifests"}, - - {"source": "paper:codex", "target": "paper:gpt3", "rel": "extends"}, - {"source": "paper:codex", "target": "paradigm:llm_agent_paradigm", "rel": "enables"}, - {"source": "paper:codex", "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", "rel": "manifests"}, - {"source": "paper:codex", "target": "paradigm:knowledge_driven_reflective_agent", "rel": "enables"}, - - {"source": "paper:saycan", "target": "paradigm:llm_agent_paradigm", "rel": "manifests"}, - {"source": "paper:saycan", "target": "problem:grounding_language_token_to_continuous_physical_world", "rel": "covers"}, - {"source": "paper:saycan", "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", "rel": "manifests"}, - {"source": "paper:saycan", "target": "paper:rt2", "rel": "parallel"}, - {"source": "paper:saycan", "target": "paper:codex", "rel": "contrasts"}, - - {"source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "target": "move:add_entropy_bonus_to_encourage_exploration", "rel": "motivates"}, - {"source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", "rel": "feeds"}, - {"source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", "rel": "feeds"} + { + "source": "paper:alvinn", + "target": "paradigm:imitation_learning", + "rel": "manifests" + }, + { + "source": "paper:alvinn", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "motivates" + }, + { + "source": "paper:alvinn", + "target": "paradigm:modular_perception_to_planning_pipeline", + "rel": "contrasts" + }, + { + "source": "paper:alvinn", + "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "rel": "motivates" + }, + { + "source": "paper:lbc", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "manifests" + }, + { + "source": "paper:lbc", + "target": "paper:alvinn", + "rel": "extends" + }, + { + "source": "paper:lbc", + "target": "paper:carla_leaderboard", + "rel": "validates" + }, + { + "source": "paper:lbc", + "target": "paradigm:imitation_learning", + "rel": "manifests" + }, + { + "source": "paper:tcp_carla", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "manifests" + }, + { + "source": "paper:tcp_carla", + "target": "paper:lbc", + "rel": "extends" + }, + { + "source": "paper:tcp_carla", + "target": "paper:carla_leaderboard", + "rel": "validates" + }, + { + "source": "paper:tcp_carla", + "target": "paper:bench2drive", + "rel": "feeds" + }, + { + "source": "paper:gameformer", + "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", + "rel": "covers" + }, + { + "source": "paper:gameformer", + "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "rel": "manifests" + }, + { + "source": "paper:gameformer", + "target": "paradigm:differentiable_end_to_end_imitation", + "rel": "extends" + }, + { + "source": "paper:gameformer", + "target": "paper:tcp_carla", + "rel": "contrasts" + }, + { + "source": "paper:drivegpt", + "target": "paradigm:foundation_model_zero_shot_driving_agent", + "rel": "manifests" + }, + { + "source": "paper:drivegpt", + "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", + "rel": "manifests" + }, + { + "source": "paper:drivegpt", + "target": "paper:gpt3", + "rel": "extends" + }, + { + "source": "paper:drivegpt", + "target": "paradigm:imitation_learning", + "rel": "composes" + }, + { + "source": "paper:drivegpt", + "target": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "rel": "manifests" + }, + { + "source": "paper:codex", + "target": "paper:gpt3", + "rel": "extends" + }, + { + "source": "paper:codex", + "target": "paradigm:llm_agent_paradigm", + "rel": "enables" + }, + { + "source": "paper:codex", + "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "rel": "manifests" + }, + { + "source": "paper:codex", + "target": "paradigm:knowledge_driven_reflective_agent", + "rel": "enables" + }, + { + "source": "paper:saycan", + "target": "paradigm:llm_agent_paradigm", + "rel": "manifests" + }, + { + "source": "paper:saycan", + "target": "problem:grounding_language_token_to_continuous_physical_world", + "rel": "covers" + }, + { + "source": "paper:saycan", + "target": "insight:tool_use_extends_language_model_into_environment_grounded_actor", + "rel": "manifests" + }, + { + "source": "paper:saycan", + "target": "paper:rt2", + "rel": "parallel" + }, + { + "source": "paper:saycan", + "target": "paper:codex", + "rel": "contrasts" + }, + { + "source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "target": "move:add_entropy_bonus_to_encourage_exploration", + "rel": "motivates" + }, + { + "source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "target": "insight:tokenized_trajectories_let_planning_borrow_from_language_modeling", + "rel": "feeds" + }, + { + "source": "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes", + "target": "problem:multi_agent_interaction_modeling_in_dense_traffic", + "rel": "feeds" + } ] -} +} \ No newline at end of file diff --git a/docs/data/graph_extended.json b/docs/data/graph_extended.json index 0af6151..0aed7c5 100644 --- a/docs/data/graph_extended.json +++ b/docs/data/graph_extended.json @@ -6478,7 +6478,7 @@ "kind": "paper", "tier": "S", "topic": "e2e_ad", - "year": 1989, + "year": 1988, "label": "ALVINN", "label_zh": "ALVINN(最早的神经网络端到端驾驶)", "summary_zh": "Pomerleau 1989 用单隐层全连接网络把相机图像直接映射到转向,是端到端驾驶与行为克隆的源头;并用合成偏移样本预演了协变量偏移的工程解。", diff --git a/docs/data/research_lens.json b/docs/data/research_lens.json index e1122ce..76b0295 100644 --- a/docs/data/research_lens.json +++ b/docs/data/research_lens.json @@ -137,6 +137,90 @@ "experiment": "构造驾驶专用的反事实探针(轻微改动场景看决策是否一致),测基础模型零样本决策对场景扰动的稳定性,与少样本微调对比。可证伪点:若零样本决策已足够稳定且可执行,则驾驶专用对齐数据的必要性被削弱。", "isomorphism": "与 [RT-2](paper_rt2.md)、[VLA 范式](paradigm_vla_paradigm.md) 把通才基座迁移到具身控制同源;也对应 NLP 里\"预训练-提示\"取代任务专用模型、[洞察:基础特征免微调迁移](insight_foundation_features_transfer_without_finetune.md)。" }, + "insight:policy_improvement_bounded_by_distribution_shift": { + "assumption": "一个策略的真实表现,可以用它在自己访问到的状态分布上的表现来代理;只要新策略与采集数据的策略在状态分布上足够接近(总变差距离小),策略评估与改进的误差就有界。", + "failure": "一步看似不错的更新可能把状态分布推到数据未覆盖处,此时性能下界 $\\eta(\\pi)\\ge \\eta(\\pi_{\\text{old}})-C\\cdot\\mathbb{E}[D_{TV}(\\pi\\|\\pi_{\\text{old}})]$ 里的惩罚项 $C\\cdot D_{TV}$ 爆掉,\"改进\"变成灾难。距离一旦大到界失效,单调改进的保证就消失。", + "experiment": "在驾驶模仿任务里,按策略更新步长扫描诱导的状态分布偏移(用 $D_{TV}$ 或 rollout 状态的 KL 估计),画\"分布偏移-真实闭环回报变化\"曲线,定位界由紧变松的临界点。可证伪点:若大偏移更新仍稳定改进,则该界对实际驾驶过松、不具指导意义。", + "isomorphism": "统一了 [DAgger](paper_ross2011_dagger.md) 的 $\\mathcal{O}(\\varepsilon T)$ 界、[TRPO](paper_schulman2015_trpo.md)/[PPO](paper_schulman2017_ppo.md) 的信赖域、[离线 RL](paradigm_offline_rl.md) 的保守约束——它们都是同一句话\"别让分布跑太远\"在模仿、在线、离线三处的写法。" + }, + "insight:max_entropy_closes_policy_value_duality": { + "assumption": "给目标加一个熵正则 $\\max_\\pi \\mathbb{E}[\\sum_t r_t+\\alpha\\mathcal{H}(\\pi)]$ 后,最优策略恰好是价值的玻尔兹曼分布 $\\pi^\\star(a\\mid s)\\propto e^{Q(s,a)/\\alpha}$;于是\"前向求最优策略\"与\"反向由行为推奖励\"被同一个 softmax/配分函数连起来。", + "failure": "温度 $\\alpha\\to 0$ 时退化为硬 $\\max$、对偶关系塌缩;$\\alpha$ 太大则策略过度随机、丢掉最优性。配分函数 $\\sum_a e^{Q/\\alpha}$ 在连续高维动作上不可解析,需采样近似,近似误差会同时污染前向与反向两侧。", + "experiment": "在同一数据上分别用前向最大熵 RL 学策略、用 [MaxEnt IRL](paper_ziebart_max_ent_irl.md) 反推奖励,检验二者是否对同一 $Q$ 自洽(恢复的奖励重新前向求解应回到原策略)。可证伪点:若两侧无法在任何 $\\alpha$ 下自洽,则该对偶在该问题类上失效。", + "isomorphism": "与统计力学的玻尔兹曼分布/自由能、概率图模型的变分推断、[SAC](paper_sac.md) 的软价值迭代同构;它也是 [软 Q 学习与策略梯度等价](paper_sac.md) 这类结果的根。" + }, + "insight:q_learning_max_is_optimistically_biased": { + "assumption": "对带噪声的 $Q$ 估计取 $\\max_a$ 会系统性高估,因为 $\\mathbb{E}[\\max_a \\hat Q(s,a)]\\ge \\max_a \\mathbb{E}[\\hat Q(s,a)]$(Jensen 不等式作用于凸的 $\\max$)。只要不解耦\"选动作\"与\"估其值\",这个正偏差就会通过自举不断放大。", + "failure": "偏差随动作数与估计噪声增大,在自举里逐层累积,可让 $Q$ 发散;但当噪声极小或动作极少时偏差可忽略,此时强行 double 化反而增加方差、拖慢收敛。它不是\"永远要修\"的,而是噪声-动作数的函数。", + "experiment": "人为注入可控幅度的 $Q$ 估计噪声,测 vanilla DQN 与 [Double-Q](move_double_q_to_reduce_overestimation.md) 的高估量随噪声/动作数的增长曲线,定位 double 化真正带来净收益的区域。可证伪点:若某区域 double 化不降高估或反伤性能,则\"总该解耦\"被推翻。", + "isomorphism": "与统计里的\"赢者诅咒\"(winner's curse)、最大值无偏估计难题、A/B 测试里挑最好臂的乐观偏差同构;[Double-Q](move_double_q_to_reduce_overestimation.md) 的解耦正对应交叉验证式的\"选用不同数据估值\"。" + }, + "insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes": { + "assumption": "交通场景的未来本质上是多模态的——同一路口\"先过\"与\"先让\"都是合法解;因此把策略建成单峰分布(或回归一条轨迹)从结构上就拟合不了真实分布,无论数据和参数多大。", + "failure": "单峰模型会取多个模态的均值,落进低密度谷(\"以一半速度开进路口正中\")——既不安全也不可执行。但当场景其实是单模态(直道巡航)时,多模态机制带来的额外采样与训练成本就是纯浪费。", + "experiment": "在分叉/让行场景采集双峰人类示教,对比单峰回归、GMM 头、[扩散策略](paper_diffusion_policy_chi2023.md) 的模态覆盖率与\"均值塌缩到非法动作\"的发生率。可证伪点:若单峰模型在这些场景下也不塌缩,则多模态建模的必要性被推翻。", + "isomorphism": "与语音合成的多模态输出、图像生成里 L2 损失导致的模糊、混合密度网络(MDN)的提出动机同构;补救手段([把规划当条件生成](move_treat_planning_as_conditional_generation.md)、[加熵奖励](move_add_entropy_bonus_to_encourage_exploration.md))都是\"让输出分布容得下多个解\"。" + }, + "paper:airl": { + "assumption": "把判别器结构化成\"奖励项 + 势函数 shaping 项\",就能在对抗模仿里把真正可迁移的奖励 $r_\\theta(s)$ 从环境动力学里解耦出来,从而在动力学改变时仍然有效。相信奖励比策略更可迁移。", + "failure": "奖励的可识别性仍是病态的——shaping 不变性让无穷多奖励解释同一行为;对抗训练不稳定,判别器/生成器易失衡。可迁移性只在\"状态奖励 + 动力学可变\"这一特定假设下成立,状态分布偏移过大时照样崩。", + "experiment": "在训练动力学上学奖励,换一套测试动力学只用学到的奖励重训策略,测迁移成功率,并与直接迁移 [GAIL](paper_gail.md) 策略对比。可证伪点:若 GAIL 策略迁移得一样好,则 AIRL\"解耦可迁移奖励\"的卖点不成立。", + "isomorphism": "与 [MaxEnt IRL](paper_ziebart_max_ent_irl.md) 的能量模型、[GAIL](paper_gail.md) 的占用匹配同源;奖励+势函数的分解对应强化学习里的 reward shaping 不变性定理。" + }, + "paper:voxelnet": { + "assumption": "点云检测不必依赖手工特征:把空间体素化,用 VFE 层端到端学每个体素的特征,再交给 3D 卷积 + RPN,就能让\"特征学习 > 手工设计\"在 LiDAR 上复现。", + "failure": "稠密 3D 卷积在体素网格上计算与显存开销巨大、实时性差(这正是 [PointPillars](paper_pointpillars.md) 用柱状编码绕开的痛点);体素分辨率是精度-算力的硬折中,小目标在粗体素里被抹平。", + "experiment": "扫描体素分辨率,画\"小目标召回-推理延迟\"前沿,并与柱状(PointPillars)、点级(PointRCNN)表示同图对比。可证伪点:若存在一档体素分辨率同时压过柱状的速度与点级的精度,则\"表示选择是硬折中\"被推翻。", + "isomorphism": "体素化是把不规则点云投到规则网格以复用卷积,与图像 patch 化、占据栅格、SLAM 体素地图同构;VFE 则是 PointNet 式集合特征学习的局部版。" + }, + "paper:centerpoint": { + "assumption": "把 3D 目标当作 BEV 平面上的\"中心点\"来检测,比拟合带朝向的 3D 框更简单稳健——朝向、尺寸、速度都作为中心点的回归属性,且天然支持基于中心的跨帧跟踪。", + "failure": "中心点假设在密集遮挡、中心被截断或多目标中心重叠时退化;BEV 投影丢高度信息,对悬空/高架结构不友好;速度回归依赖相邻帧质量。", + "experiment": "在密集遮挡场景对比 anchor-based([VoxelNet](paper_voxelnet.md) 系)与 center-based 的漏检率,并测\"中心点跟踪\"在 ID switch 上相对匈牙利匹配跟踪的增益。可证伪点:若遮挡下 center-based 不占优,则其简化假设的价值受限。", + "isomorphism": "把检测从\"框\"简化到\"点\",与 2D 的 CenterNet、把检测当 [集合预测](move_treat_detection_as_set_prediction_with_learnable_queries.md) 一样是\"换一种更易学的目标表示\";中心点跟踪对应基于位置先验的数据关联。" + }, + "paper:byol": { + "assumption": "自监督表示学习不一定需要负样本:用在线网络预测目标网络(其权重是在线网络的滑动平均)的表示,配合 predictor 与 stop-gradient,就能避免表示塌缩。相信\"预测一个缓慢移动的自己\"足以塑造语义空间。", + "failure": "为何不塌缩至今无完全共识——predictor、BN、EMA 缺一就可能塌缩,鲁棒区间靠经验调出;对增广策略与 batch 统计敏感。\"不需要负样本\"是工程结论而非理论保证。", + "experiment": "逐一移除 predictor / stop-gradient / EMA,测表示是否塌缩(用特征秩或下游线性探针精度衡量),定位真正阻止塌缩的最小组件集。可证伪点:若移除全部三者仍不塌缩,则现有\"塌缩归因\"被推翻。", + "isomorphism": "与 [VICReg](paper_vicreg.md) 用方差-协方差正则显式防塌缩是同一问题的两种解;EMA 目标网络与 [DQN](paper_mnih2015_dqn.md) 的目标网络、半监督的 mean teacher 同构。" + }, + "paper:vicreg": { + "assumption": "防止自监督表示塌缩可以靠三个显式损失项:保持每维方差(variance)、拉近正对(invariance)、去相关各维(covariance)——无需负样本、无需 EMA、无需 stop-gradient 这些隐式技巧。把\"别塌缩\"写成可解释的正则。", + "failure": "三项的权重需要平衡,调不好则要么塌缩要么欠拟合;方差/协方差约束在 batch 维度估计,受 batch size 影响;显式正则未必比 [BYOL](paper_byol.md) 的隐式机制更省调参。", + "experiment": "把 VICReg 的三项逐一关掉,观测表示秩与下游精度的变化,验证\"方差项\"是否是防塌缩的关键项。可证伪点:若去掉方差项仍不塌缩,则其作为\"显式防塌缩\"的核心解释不成立。", + "isomorphism": "与 [BYOL](paper_byol.md)(隐式防塌缩)、Barlow Twins(去冗余)、信息最大化原则同源;去相关项对应经典的白化(whitening)与 ICA。" + }, + "paper:bahdanau2014_attention": { + "assumption": "序列到序列不必把整句压进一个定长向量:让解码每一步用一个学习到的对齐权重,对编码器各位置做加权求和(软对齐),就能突破定长瓶颈、按需取信息。", + "failure": "加性注意力对长序列仍是逐步 RNN 解码,无法并行、长程仍受循环限制(这正是 [Transformer](paper_vaswani2017.md) 用全自注意力 + 并行解决的);对齐是软的、可解释性有限。", + "experiment": "在不同源句长度上对比有无注意力的 seq2seq 翻译质量,定位定长瓶颈开始显著伤害性能的句长阈值。可证伪点:若长句下无注意力模型不退化,则\"定长瓶颈\"不是主要瓶颈。", + "isomorphism": "是 [Transformer 自注意力](paper_vaswani2017.md) 的直接前身,也是 [DETR](paper_carion2020.md) query、记忆网络寻址的雏形;软对齐本质是按内容寻址的可微查表。" + }, + "paper:preference_learning": { + "assumption": "当奖励难以手工指定时,人类的成对偏好比绝对评分更可靠、更易提供;用 Bradley-Terry 模型 $P(a\\succ b)=\\sigma(r_\\psi(a)-r_\\psi(b))$ 从比较里学一个奖励模型,再用 RL 优化它,就能对齐难以言说的目标。", + "failure": "学到的奖励模型一旦被策略过度优化就会被\"刷分\"(reward hacking / Goodhart);人类偏好有噪声、有偏、标注昂贵;奖励模型在策略探索到的新分布上外推不可靠。", + "experiment": "固定偏好数据训练奖励模型,让策略持续优化它,监测真实目标随\"代理奖励\"上升到何处开始背离(Goodhart 拐点),并测加 KL 正则能把拐点推后多少。可证伪点:若代理奖励与真实目标始终同向,则 over-optimization 风险被高估。", + "isomorphism": "是 RLHF / [DPO](../paper_rlhf_dpo.md) 的直接源头;Bradley-Terry 与计量经济学的离散选择模型、排序学习同构;奖励过优化对应经济学的 Goodhart 定律。" + }, + "paper:tcp_carla": { + "assumption": "轨迹规划与直接控制各有所长:轨迹分支稳、控制分支在紧急时反应快;把两者在同一网络里融合并按情境加权,能在 CARLA 闭环里同时拿到平顺与鲁棒。", + "failure": "两分支的加权策略是启发式,融合权重在分布外场景可能选错;端到端控制分支仍受模仿学习的因果混淆与协变量偏移之困。", + "experiment": "做分支消融:分别只用轨迹分支、只用控制分支、用融合,测平顺度与碰撞率,并构造紧急场景看控制分支的边际价值。可证伪点:若单分支在所有场景都不弱于融合,则双分支设计冗余。", + "isomorphism": "与 [快慢双系统](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md)、模型预测控制的\"长程参考轨迹 + 短程反馈跟踪\"分层同构;融合权重对应控制里的增益调度(gain scheduling)。" + }, + "paper:gameformer": { + "assumption": "密集交通里各车的未来相互依赖,不能各算各的;用 level-k 博弈的层级推理(我预测你、你预测我……逐层细化)配合 Transformer 解码,能显式建模交互式多智能体未来。", + "failure": "level-k 的层数是超参,真实驾驶员的理性层级不一且异质;博弈假设(共同理性、已知收益)在现实里常被违反;层级越深算力越大、收益递减。", + "experiment": "扫描推理层级 $k$,测密集交互场景(汇入、无保护左转)的联合预测精度随 $k$ 的增益,定位收益饱和的 $k^\\star$。可证伪点:若 $k=1$(不互相推理)已接近最优,则博弈式层级推理的必要性被推翻。", + "isomorphism": "与博弈论的迭代最优反应、行为经济学的 level-k 思维、多智能体 RL 的对手建模同构;它把 [多智能体交互建模问题](problem_multi_agent_interaction_modeling_in_dense_traffic.md) 写成可学习的层级解码。" + }, + "paper:codex": { + "assumption": "在海量代码上微调的语言模型能把自然语言意图翻译成可执行程序;于是\"动作\"可以是一段代码,决策被提升到可组合、可调用 API、可验证的符号层面。", + "failure": "生成的代码会语法对但语义错(幻觉 API、边界条件漏判),且对提示极敏感;功能正确性只能靠执行验证(pass@k 衡量的是\"采样 k 次至少一对\",非单次可靠)。在安全攸关回路里,一段错代码的代价远高于一句错话。", + "experiment": "把\"代码即动作\"接进一个可执行的驾驶决策沙盒,测单次执行成功率与 pass@k 的差距,并统计运行时错误类型分布。可证伪点:若单次成功率已接近 pass@k,则\"需多次采样 + 验证\"的工程必要性下降。", + "isomorphism": "与把规划写成可执行代码的 [知识驱动反思智能体](paradigm_knowledge_driven_reflective_agent.md)、[工具调用扩展语言模型](insight_tool_use_extends_language_model_into_environment_grounded_actor.md) 同源;代码作为动作空间对应程序合成与神经符号方法。" + }, "paper:alvinn": { "assumption": "人类驾驶演示 $\\mathcal{D}_\\text{expert}$ 覆盖了部署时会遇到的状态分布,于是把每帧 $(I,a)$ 当独立样本做监督回归 $\\min_\\theta\\mathbb{E}\\,\\|f_\\theta(I)-a\\|^2$ 就足以学会开车——感知与决策可以被一个端到端函数一次性吞掉,无需车道线检测等手工模块。", "failure": "纯演示几乎只含正常行驶,模型从没见过\"已偏离车道该如何回正\",一旦轻微出错就进入演示未覆盖区、误差以 $\\mathcal{O}(\\varepsilon T^2)$ 复合发散(见 [DAgger](paper_ross2011_dagger.md))。Pomerleau 的补救是用几何变换合成偏移位姿样本——这正暴露了\"端到端模仿不会自带纠偏\"这一原则性缺口。", diff --git a/tools/build_research_lens.py b/tools/build_research_lens.py index fccf1d4..4546caa 100644 --- a/tools/build_research_lens.py +++ b/tools/build_research_lens.py @@ -182,6 +182,104 @@ "isomorphism": r"""与 [RT-2](paper_rt2.md)、[VLA 范式](paradigm_vla_paradigm.md) 把通才基座迁移到具身控制同源;也对应 NLP 里"预训练-提示"取代任务专用模型、[洞察:基础特征免微调迁移](insight_foundation_features_transfer_without_finetune.md)。""", }, +"insight:policy_improvement_bounded_by_distribution_shift": { + "assumption": r"""一个策略的真实表现,可以用它在自己访问到的状态分布上的表现来代理;只要新策略与采集数据的策略在状态分布上足够接近(总变差距离小),策略评估与改进的误差就有界。""", + "failure": r"""一步看似不错的更新可能把状态分布推到数据未覆盖处,此时性能下界 $\eta(\pi)\ge \eta(\pi_{\text{old}})-C\cdot\mathbb{E}[D_{TV}(\pi\|\pi_{\text{old}})]$ 里的惩罚项 $C\cdot D_{TV}$ 爆掉,"改进"变成灾难。距离一旦大到界失效,单调改进的保证就消失。""", + "experiment": r"""在驾驶模仿任务里,按策略更新步长扫描诱导的状态分布偏移(用 $D_{TV}$ 或 rollout 状态的 KL 估计),画"分布偏移-真实闭环回报变化"曲线,定位界由紧变松的临界点。可证伪点:若大偏移更新仍稳定改进,则该界对实际驾驶过松、不具指导意义。""", + "isomorphism": r"""统一了 [DAgger](paper_ross2011_dagger.md) 的 $\mathcal{O}(\varepsilon T)$ 界、[TRPO](paper_schulman2015_trpo.md)/[PPO](paper_schulman2017_ppo.md) 的信赖域、[离线 RL](paradigm_offline_rl.md) 的保守约束——它们都是同一句话"别让分布跑太远"在模仿、在线、离线三处的写法。""", +}, + +"insight:max_entropy_closes_policy_value_duality": { + "assumption": r"""给目标加一个熵正则 $\max_\pi \mathbb{E}[\sum_t r_t+\alpha\mathcal{H}(\pi)]$ 后,最优策略恰好是价值的玻尔兹曼分布 $\pi^\star(a\mid s)\propto e^{Q(s,a)/\alpha}$;于是"前向求最优策略"与"反向由行为推奖励"被同一个 softmax/配分函数连起来。""", + "failure": r"""温度 $\alpha\to 0$ 时退化为硬 $\max$、对偶关系塌缩;$\alpha$ 太大则策略过度随机、丢掉最优性。配分函数 $\sum_a e^{Q/\alpha}$ 在连续高维动作上不可解析,需采样近似,近似误差会同时污染前向与反向两侧。""", + "experiment": r"""在同一数据上分别用前向最大熵 RL 学策略、用 [MaxEnt IRL](paper_ziebart_max_ent_irl.md) 反推奖励,检验二者是否对同一 $Q$ 自洽(恢复的奖励重新前向求解应回到原策略)。可证伪点:若两侧无法在任何 $\alpha$ 下自洽,则该对偶在该问题类上失效。""", + "isomorphism": r"""与统计力学的玻尔兹曼分布/自由能、概率图模型的变分推断、[SAC](paper_sac.md) 的软价值迭代同构;它也是 [软 Q 学习与策略梯度等价](paper_sac.md) 这类结果的根。""", +}, + +"insight:q_learning_max_is_optimistically_biased": { + "assumption": r"""对带噪声的 $Q$ 估计取 $\max_a$ 会系统性高估,因为 $\mathbb{E}[\max_a \hat Q(s,a)]\ge \max_a \mathbb{E}[\hat Q(s,a)]$(Jensen 不等式作用于凸的 $\max$)。只要不解耦"选动作"与"估其值",这个正偏差就会通过自举不断放大。""", + "failure": r"""偏差随动作数与估计噪声增大,在自举里逐层累积,可让 $Q$ 发散;但当噪声极小或动作极少时偏差可忽略,此时强行 double 化反而增加方差、拖慢收敛。它不是"永远要修"的,而是噪声-动作数的函数。""", + "experiment": r"""人为注入可控幅度的 $Q$ 估计噪声,测 vanilla DQN 与 [Double-Q](move_double_q_to_reduce_overestimation.md) 的高估量随噪声/动作数的增长曲线,定位 double 化真正带来净收益的区域。可证伪点:若某区域 double 化不降高估或反伤性能,则"总该解耦"被推翻。""", + "isomorphism": r"""与统计里的"赢者诅咒"(winner's curse)、最大值无偏估计难题、A/B 测试里挑最好臂的乐观偏差同构;[Double-Q](move_double_q_to_reduce_overestimation.md) 的解耦正对应交叉验证式的"选用不同数据估值"。""", +}, + +"insight:multi_modal_behavior_is_intrinsic_to_traffic_scenes": { + "assumption": r"""交通场景的未来本质上是多模态的——同一路口"先过"与"先让"都是合法解;因此把策略建成单峰分布(或回归一条轨迹)从结构上就拟合不了真实分布,无论数据和参数多大。""", + "failure": r"""单峰模型会取多个模态的均值,落进低密度谷("以一半速度开进路口正中")——既不安全也不可执行。但当场景其实是单模态(直道巡航)时,多模态机制带来的额外采样与训练成本就是纯浪费。""", + "experiment": r"""在分叉/让行场景采集双峰人类示教,对比单峰回归、GMM 头、[扩散策略](paper_diffusion_policy_chi2023.md) 的模态覆盖率与"均值塌缩到非法动作"的发生率。可证伪点:若单峰模型在这些场景下也不塌缩,则多模态建模的必要性被推翻。""", + "isomorphism": r"""与语音合成的多模态输出、图像生成里 L2 损失导致的模糊、混合密度网络(MDN)的提出动机同构;补救手段([把规划当条件生成](move_treat_planning_as_conditional_generation.md)、[加熵奖励](move_add_entropy_bonus_to_encourage_exploration.md))都是"让输出分布容得下多个解"。""", +}, + +"paper:airl": { + "assumption": r"""把判别器结构化成"奖励项 + 势函数 shaping 项",就能在对抗模仿里把真正可迁移的奖励 $r_\theta(s)$ 从环境动力学里解耦出来,从而在动力学改变时仍然有效。相信奖励比策略更可迁移。""", + "failure": r"""奖励的可识别性仍是病态的——shaping 不变性让无穷多奖励解释同一行为;对抗训练不稳定,判别器/生成器易失衡。可迁移性只在"状态奖励 + 动力学可变"这一特定假设下成立,状态分布偏移过大时照样崩。""", + "experiment": r"""在训练动力学上学奖励,换一套测试动力学只用学到的奖励重训策略,测迁移成功率,并与直接迁移 [GAIL](paper_gail.md) 策略对比。可证伪点:若 GAIL 策略迁移得一样好,则 AIRL"解耦可迁移奖励"的卖点不成立。""", + "isomorphism": r"""与 [MaxEnt IRL](paper_ziebart_max_ent_irl.md) 的能量模型、[GAIL](paper_gail.md) 的占用匹配同源;奖励+势函数的分解对应强化学习里的 reward shaping 不变性定理。""", +}, + +"paper:voxelnet": { + "assumption": r"""点云检测不必依赖手工特征:把空间体素化,用 VFE 层端到端学每个体素的特征,再交给 3D 卷积 + RPN,就能让"特征学习 > 手工设计"在 LiDAR 上复现。""", + "failure": r"""稠密 3D 卷积在体素网格上计算与显存开销巨大、实时性差(这正是 [PointPillars](paper_pointpillars.md) 用柱状编码绕开的痛点);体素分辨率是精度-算力的硬折中,小目标在粗体素里被抹平。""", + "experiment": r"""扫描体素分辨率,画"小目标召回-推理延迟"前沿,并与柱状(PointPillars)、点级(PointRCNN)表示同图对比。可证伪点:若存在一档体素分辨率同时压过柱状的速度与点级的精度,则"表示选择是硬折中"被推翻。""", + "isomorphism": r"""体素化是把不规则点云投到规则网格以复用卷积,与图像 patch 化、占据栅格、SLAM 体素地图同构;VFE 则是 PointNet 式集合特征学习的局部版。""", +}, + +"paper:centerpoint": { + "assumption": r"""把 3D 目标当作 BEV 平面上的"中心点"来检测,比拟合带朝向的 3D 框更简单稳健——朝向、尺寸、速度都作为中心点的回归属性,且天然支持基于中心的跨帧跟踪。""", + "failure": r"""中心点假设在密集遮挡、中心被截断或多目标中心重叠时退化;BEV 投影丢高度信息,对悬空/高架结构不友好;速度回归依赖相邻帧质量。""", + "experiment": r"""在密集遮挡场景对比 anchor-based([VoxelNet](paper_voxelnet.md) 系)与 center-based 的漏检率,并测"中心点跟踪"在 ID switch 上相对匈牙利匹配跟踪的增益。可证伪点:若遮挡下 center-based 不占优,则其简化假设的价值受限。""", + "isomorphism": r"""把检测从"框"简化到"点",与 2D 的 CenterNet、把检测当 [集合预测](move_treat_detection_as_set_prediction_with_learnable_queries.md) 一样是"换一种更易学的目标表示";中心点跟踪对应基于位置先验的数据关联。""", +}, + +"paper:byol": { + "assumption": r"""自监督表示学习不一定需要负样本:用在线网络预测目标网络(其权重是在线网络的滑动平均)的表示,配合 predictor 与 stop-gradient,就能避免表示塌缩。相信"预测一个缓慢移动的自己"足以塑造语义空间。""", + "failure": r"""为何不塌缩至今无完全共识——predictor、BN、EMA 缺一就可能塌缩,鲁棒区间靠经验调出;对增广策略与 batch 统计敏感。"不需要负样本"是工程结论而非理论保证。""", + "experiment": r"""逐一移除 predictor / stop-gradient / EMA,测表示是否塌缩(用特征秩或下游线性探针精度衡量),定位真正阻止塌缩的最小组件集。可证伪点:若移除全部三者仍不塌缩,则现有"塌缩归因"被推翻。""", + "isomorphism": r"""与 [VICReg](paper_vicreg.md) 用方差-协方差正则显式防塌缩是同一问题的两种解;EMA 目标网络与 [DQN](paper_mnih2015_dqn.md) 的目标网络、半监督的 mean teacher 同构。""", +}, + +"paper:vicreg": { + "assumption": r"""防止自监督表示塌缩可以靠三个显式损失项:保持每维方差(variance)、拉近正对(invariance)、去相关各维(covariance)——无需负样本、无需 EMA、无需 stop-gradient 这些隐式技巧。把"别塌缩"写成可解释的正则。""", + "failure": r"""三项的权重需要平衡,调不好则要么塌缩要么欠拟合;方差/协方差约束在 batch 维度估计,受 batch size 影响;显式正则未必比 [BYOL](paper_byol.md) 的隐式机制更省调参。""", + "experiment": r"""把 VICReg 的三项逐一关掉,观测表示秩与下游精度的变化,验证"方差项"是否是防塌缩的关键项。可证伪点:若去掉方差项仍不塌缩,则其作为"显式防塌缩"的核心解释不成立。""", + "isomorphism": r"""与 [BYOL](paper_byol.md)(隐式防塌缩)、Barlow Twins(去冗余)、信息最大化原则同源;去相关项对应经典的白化(whitening)与 ICA。""", +}, + +"paper:bahdanau2014_attention": { + "assumption": r"""序列到序列不必把整句压进一个定长向量:让解码每一步用一个学习到的对齐权重,对编码器各位置做加权求和(软对齐),就能突破定长瓶颈、按需取信息。""", + "failure": r"""加性注意力对长序列仍是逐步 RNN 解码,无法并行、长程仍受循环限制(这正是 [Transformer](paper_vaswani2017.md) 用全自注意力 + 并行解决的);对齐是软的、可解释性有限。""", + "experiment": r"""在不同源句长度上对比有无注意力的 seq2seq 翻译质量,定位定长瓶颈开始显著伤害性能的句长阈值。可证伪点:若长句下无注意力模型不退化,则"定长瓶颈"不是主要瓶颈。""", + "isomorphism": r"""是 [Transformer 自注意力](paper_vaswani2017.md) 的直接前身,也是 [DETR](paper_carion2020.md) query、记忆网络寻址的雏形;软对齐本质是按内容寻址的可微查表。""", +}, + +"paper:preference_learning": { + "assumption": r"""当奖励难以手工指定时,人类的成对偏好比绝对评分更可靠、更易提供;用 Bradley-Terry 模型 $P(a\succ b)=\sigma(r_\psi(a)-r_\psi(b))$ 从比较里学一个奖励模型,再用 RL 优化它,就能对齐难以言说的目标。""", + "failure": r"""学到的奖励模型一旦被策略过度优化就会被"刷分"(reward hacking / Goodhart);人类偏好有噪声、有偏、标注昂贵;奖励模型在策略探索到的新分布上外推不可靠。""", + "experiment": r"""固定偏好数据训练奖励模型,让策略持续优化它,监测真实目标随"代理奖励"上升到何处开始背离(Goodhart 拐点),并测加 KL 正则能把拐点推后多少。可证伪点:若代理奖励与真实目标始终同向,则 over-optimization 风险被高估。""", + "isomorphism": r"""是 RLHF / [DPO](../paper_rlhf_dpo.md) 的直接源头;Bradley-Terry 与计量经济学的离散选择模型、排序学习同构;奖励过优化对应经济学的 Goodhart 定律。""", +}, + +"paper:tcp_carla": { + "assumption": r"""轨迹规划与直接控制各有所长:轨迹分支稳、控制分支在紧急时反应快;把两者在同一网络里融合并按情境加权,能在 CARLA 闭环里同时拿到平顺与鲁棒。""", + "failure": r"""两分支的加权策略是启发式,融合权重在分布外场景可能选错;端到端控制分支仍受模仿学习的因果混淆与协变量偏移之困。""", + "experiment": r"""做分支消融:分别只用轨迹分支、只用控制分支、用融合,测平顺度与碰撞率,并构造紧急场景看控制分支的边际价值。可证伪点:若单分支在所有场景都不弱于融合,则双分支设计冗余。""", + "isomorphism": r"""与 [快慢双系统](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md)、模型预测控制的"长程参考轨迹 + 短程反馈跟踪"分层同构;融合权重对应控制里的增益调度(gain scheduling)。""", +}, + +"paper:gameformer": { + "assumption": r"""密集交通里各车的未来相互依赖,不能各算各的;用 level-k 博弈的层级推理(我预测你、你预测我……逐层细化)配合 Transformer 解码,能显式建模交互式多智能体未来。""", + "failure": r"""level-k 的层数是超参,真实驾驶员的理性层级不一且异质;博弈假设(共同理性、已知收益)在现实里常被违反;层级越深算力越大、收益递减。""", + "experiment": r"""扫描推理层级 $k$,测密集交互场景(汇入、无保护左转)的联合预测精度随 $k$ 的增益,定位收益饱和的 $k^\star$。可证伪点:若 $k=1$(不互相推理)已接近最优,则博弈式层级推理的必要性被推翻。""", + "isomorphism": r"""与博弈论的迭代最优反应、行为经济学的 level-k 思维、多智能体 RL 的对手建模同构;它把 [多智能体交互建模问题](problem_multi_agent_interaction_modeling_in_dense_traffic.md) 写成可学习的层级解码。""", +}, + +"paper:codex": { + "assumption": r"""在海量代码上微调的语言模型能把自然语言意图翻译成可执行程序;于是"动作"可以是一段代码,决策被提升到可组合、可调用 API、可验证的符号层面。""", + "failure": r"""生成的代码会语法对但语义错(幻觉 API、边界条件漏判),且对提示极敏感;功能正确性只能靠执行验证(pass@k 衡量的是"采样 k 次至少一对",非单次可靠)。在安全攸关回路里,一段错代码的代价远高于一句错话。""", + "experiment": r"""把"代码即动作"接进一个可执行的驾驶决策沙盒,测单次执行成功率与 pass@k 的差距,并统计运行时错误类型分布。可证伪点:若单次成功率已接近 pass@k,则"需多次采样 + 验证"的工程必要性下降。""", + "isomorphism": r"""与把规划写成可执行代码的 [知识驱动反思智能体](paradigm_knowledge_driven_reflective_agent.md)、[工具调用扩展语言模型](insight_tool_use_extends_language_model_into_environment_grounded_actor.md) 同源;代码作为动作空间对应程序合成与神经符号方法。""", +}, + } From e2d0852b3c810d542d3577b89c4f165e1abf3785 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 04:14:33 +0000 Subject: [PATCH 8/8] Round 6: research lenses for all 8 spine papers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spine papers are the user-supplied core materials; only UniAD had a lens. Add lenses for the other seven — PlanT, DriveVLM, Agent-Driver, DiLu, Spike-driven Transformer, DINOv3, CF-VLA — each grounded in the card's own framing (object-level planning, fast/slow dual system, LLM tool-agent, reflective memory, event-sparse spiking compute, frozen SSL features, counterfactual replanning). Every lens states the load-bearing assumption, the failure boundary, a concrete falsifiable experiment, and the cross-domain isomorphism. Research lenses 49 -> 56; all 8/8 spine papers covered. QC: 0 dead links, 0/231 lint findings, math test green, every lens link resolves. --- docs/data/research_lens.json | 42 +++++++++++++++++++++++++++++++ tools/build_research_lens.py | 49 ++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/docs/data/research_lens.json b/docs/data/research_lens.json index 76b0295..786ce4e 100644 --- a/docs/data/research_lens.json +++ b/docs/data/research_lens.json @@ -221,6 +221,48 @@ "experiment": "把\"代码即动作\"接进一个可执行的驾驶决策沙盒,测单次执行成功率与 pass@k 的差距,并统计运行时错误类型分布。可证伪点:若单次成功率已接近 pass@k,则\"需多次采样 + 验证\"的工程必要性下降。", "isomorphism": "与把规划写成可执行代码的 [知识驱动反思智能体](paradigm_knowledge_driven_reflective_agent.md)、[工具调用扩展语言模型](insight_tool_use_extends_language_model_into_environment_grounded_actor.md) 同源;代码作为动作空间对应程序合成与神经符号方法。" }, + "paper:2210.14222": { + "assumption": "一个紧凑的对象级表示(每个动态体的位置、朝向、速度等属性 token)已是规划的充分状态——只要感知把场景压成\"对象短语\",一个 Transformer 就能用注意力在这句短语里找出真正决定下一步的那个对象,并保持可解释。", + "failure": "上游感知一旦漏检/错估,对象级抽象会把噪声当事实(garbage in);它丢掉了占据、遮挡、细粒度几何这些非对象化线索;当关键约束来自地图拓扑而非某个具体对象时,注意力无处可指。", + "experiment": "喂入真值对象 vs 加噪/漏检对象,画规划碰撞率随感知质量的衰减;再读注意力权重,用反事实删除\"被高度注意的对象\"看决策是否改变。可证伪点:若删除高注意力对象不改变决策,则\"可解释注意力 = 因果归因\"不成立。", + "isomorphism": "对象 token 与 [DETR query](paper_carion2020.md)、场景图、符号化中间表示同构(见 [洞察:符号中间层带来可解释与迁移](insight_symbolic_intermediate_enables_interpretability_and_transfer.md));\"先抽象成对象再推理\"是神经-符号方法的驾驶版。" + }, + "paper:2402.12289": { + "assumption": "视觉-语言模型的场景理解 + 链式思维能产出层级化驾驶决策;把它当\"慢系统\"与一个传统快管线配对,就能既享受语言推理的泛化、又用快系统兜住实时性。", + "failure": "VLM 的延迟与幻觉在安全回路里是硬伤;语言决策到连续轨迹仍隔着 grounding 鸿沟;慢系统单独无法满足实时控制,二者的接管/仲裁逻辑本身是一个新的失效面。", + "experiment": "统计慢系统输出在多大比例上真正改写了快系统轨迹、在什么延迟下发生;再关掉链式思维看决策质量降多少。可证伪点:若慢系统极少改写快系统、或关掉 CoT 几乎不降,则\"VLM 慢思考\"的边际价值存疑。", + "isomorphism": "正是 Kahneman 快/慢双系统(System 1/2)在驾驶里的实例(见 [洞察:快慢双系统融合反应式与审议式控制](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md));也对应级联推理里\"廉价模型先行、昂贵模型按需介入\"。" + }, + "paper:2311.10813": { + "assumption": "把驾驶建成\"有手册(memory)、有工具(perception/prediction API)、有项目经理(LLM)的施工队\"——LLM 用工具调用取结构化信息、用记忆复用经验,就能做出类人且可解释的决策。", + "failure": "工具调用会选错/调错、LLM 推理会幻觉、长链推理延迟高;记忆检索到不相关案例会误导;整条链的可靠性是各环可靠性的乘积,环越多越脆。", + "experiment": "逐一消融记忆库/工具库,测决策质量与延迟;再向工具返回注入噪声,看 LLM 是否盲信。可证伪点:若去掉记忆与工具后决策几乎不降,则\"agent 化\"的复杂度不被收益证明。", + "isomorphism": "是 [LLM agent 范式](paradigm_llm_agent_paradigm.md)、[工具调用扩展语言模型](insight_tool_use_extends_language_model_into_environment_grounded_actor.md)、[ReAct](paper_react.md) 在驾驶上的落地;\"施工队\"隐喻对应经典的黑板系统(blackboard architecture)。" + }, + "paper:2309.16292": { + "assumption": "LLM 里的常识 + \"记日记\"式的反思记忆,能让闭环驾驶以少量经验泛化到未见场景——把经验回放做成可读、可改、可迁移的自然语言形态,胜过纯数据驱动的黑箱策略。", + "failure": "推理会幻觉、对 prompt 敏感;记忆库的检索质量决定上限,一条错误\"教训\"会被反复强化;实时性与数据驱动方法仍有差距。", + "experiment": "在未见场景上对比 DiLu 与 RL/IL 基线的零样本成功率,并消融反思/记忆模块;再故意写入一条错误\"日记\"看是否污染后续决策。可证伪点:若关掉记忆与反思后泛化不降,则知识驱动相对数据驱动的优势不成立。", + "isomorphism": "与 [知识驱动反思智能体](paradigm_knowledge_driven_reflective_agent.md)、案例推理(case-based reasoning)、把经验回放语义化同构;\"记日记再翻日记\"对应 episodic memory 与检索增强生成。" + }, + "paper:2307.01694": { + "assumption": "把 Transformer 的浮点乘加阵列换成脉冲驱动(二值、事件稀疏)的 AND/ADD 算子,能在匹配精度的同时把能耗降一个量级——尤其在事件相机这种天然事件流输入下。", + "failure": "脉冲网络靠代理梯度训练、收敛与精度仍逊于稠密 ViT;能耗优势只有在神经形态芯片上才真正兑现,跑在 GPU 上反而更慢;时间步(time step)是精度-延迟的硬折中。", + "experiment": "画精度 vs 突触操作数(synops,能耗代理)前沿,对比稠密 ViT,并指明在何种硬件上能耗优势才出现。可证伪点:若在通用硬件上脉冲版既不更省也不更准,则其价值绑死在专用芯片上。", + "isomorphism": "与事件相机、[类脑神经形态协同设计](paradigm_brain_inspired_neuromorphic_co_design.md)、[洞察:事件稀疏计算匹配能耗预算](insight_event_sparse_compute_matches_energy_budget.md) 同构;事件稀疏激活也与 MoE 的条件计算异曲同工。" + }, + "paper:2508.10104": { + "assumption": "足够大规模的自监督预训练能产出通用的稠密视觉特征,冻结即可迁移到众多下游任务(Gram anchoring 保证\"语义相近则几何相近\"不走样),无需任务专用标注。", + "failure": "web 图像与驾驶域存在分布差,冻结特征未必覆盖驾驶长尾(夜间、恶劣天气、罕见物);无任务自适应时精度有天花板;训练此类模型的算力门槛极高。", + "experiment": "冻结 DINOv3 特征 + 线性头做驾驶感知,对比有监督预训练,定位仍需微调才能闭合的精度缺口与具体场景。可证伪点:若冻结特征在所有驾驶子任务上都追平微调,则\"免微调迁移\"完全成立;否则即界定了它的边界。", + "isomorphism": "与 [洞察:基础特征免微调迁移](insight_foundation_features_transfer_without_finetune.md)、[BYOL](paper_byol.md) / [VICReg](paper_vicreg.md) 的无负样本/显式正则 SSL 谱系同构;Gram anchoring 与表示几何的白化/对齐正则一脉相承。" + }, + "paper:2512.24426": { + "assumption": "让 VLA 在动手前\"先在脑子里走一遍最坏情况\"——由 LLM 在 prompt-time 即兴提供一个世界模型、生成反事实 rollout,就能在执行前捕捉并改写不安全动作。", + "failure": "反事实的质量取决于 LLM 即兴世界模型的保真度,错误的\"想象\"会带来错误的安全感;prompt-time 推演的算力与延迟代价高;最坏情况若覆盖不全,等于没推演。", + "experiment": "测有/无反事实重规划时安全攸关干预率与碰撞率的差,并人为降级反事实保真度看安全收益如何衰减。可证伪点:若反事实重规划不降低安全攸关事件率,则\"脑内预演\"在该设置下无效。", + "isomorphism": "是 [模型基世界想象规划](paradigm_model_based_world_imagination_planning.md)、MPC 滚动重规划、[反事实数据为中心的安全](paradigm_counterfactual_data_centric_safety.md) 的 LLM 翻版;区别在世界模型由语言模型即兴提供,而非显式学得。" + }, "paper:alvinn": { "assumption": "人类驾驶演示 $\\mathcal{D}_\\text{expert}$ 覆盖了部署时会遇到的状态分布,于是把每帧 $(I,a)$ 当独立样本做监督回归 $\\min_\\theta\\mathbb{E}\\,\\|f_\\theta(I)-a\\|^2$ 就足以学会开车——感知与决策可以被一个端到端函数一次性吞掉,无需车道线检测等手工模块。", "failure": "纯演示几乎只含正常行驶,模型从没见过\"已偏离车道该如何回正\",一旦轻微出错就进入演示未覆盖区、误差以 $\\mathcal{O}(\\varepsilon T^2)$ 复合发散(见 [DAgger](paper_ross2011_dagger.md))。Pomerleau 的补救是用几何变换合成偏移位姿样本——这正暴露了\"端到端模仿不会自带纠偏\"这一原则性缺口。", diff --git a/tools/build_research_lens.py b/tools/build_research_lens.py index 4546caa..6564374 100644 --- a/tools/build_research_lens.py +++ b/tools/build_research_lens.py @@ -280,6 +280,55 @@ "isomorphism": r"""与把规划写成可执行代码的 [知识驱动反思智能体](paradigm_knowledge_driven_reflective_agent.md)、[工具调用扩展语言模型](insight_tool_use_extends_language_model_into_environment_grounded_actor.md) 同源;代码作为动作空间对应程序合成与神经符号方法。""", }, +"paper:2210.14222": { + "assumption": r"""一个紧凑的对象级表示(每个动态体的位置、朝向、速度等属性 token)已是规划的充分状态——只要感知把场景压成"对象短语",一个 Transformer 就能用注意力在这句短语里找出真正决定下一步的那个对象,并保持可解释。""", + "failure": r"""上游感知一旦漏检/错估,对象级抽象会把噪声当事实(garbage in);它丢掉了占据、遮挡、细粒度几何这些非对象化线索;当关键约束来自地图拓扑而非某个具体对象时,注意力无处可指。""", + "experiment": r"""喂入真值对象 vs 加噪/漏检对象,画规划碰撞率随感知质量的衰减;再读注意力权重,用反事实删除"被高度注意的对象"看决策是否改变。可证伪点:若删除高注意力对象不改变决策,则"可解释注意力 = 因果归因"不成立。""", + "isomorphism": r"""对象 token 与 [DETR query](paper_carion2020.md)、场景图、符号化中间表示同构(见 [洞察:符号中间层带来可解释与迁移](insight_symbolic_intermediate_enables_interpretability_and_transfer.md));"先抽象成对象再推理"是神经-符号方法的驾驶版。""", +}, + +"paper:2402.12289": { + "assumption": r"""视觉-语言模型的场景理解 + 链式思维能产出层级化驾驶决策;把它当"慢系统"与一个传统快管线配对,就能既享受语言推理的泛化、又用快系统兜住实时性。""", + "failure": r"""VLM 的延迟与幻觉在安全回路里是硬伤;语言决策到连续轨迹仍隔着 grounding 鸿沟;慢系统单独无法满足实时控制,二者的接管/仲裁逻辑本身是一个新的失效面。""", + "experiment": r"""统计慢系统输出在多大比例上真正改写了快系统轨迹、在什么延迟下发生;再关掉链式思维看决策质量降多少。可证伪点:若慢系统极少改写快系统、或关掉 CoT 几乎不降,则"VLM 慢思考"的边际价值存疑。""", + "isomorphism": r"""正是 Kahneman 快/慢双系统(System 1/2)在驾驶里的实例(见 [洞察:快慢双系统融合反应式与审议式控制](insight_dual_system_fast_slow_loop_marries_reactive_and_deliberative_control.md));也对应级联推理里"廉价模型先行、昂贵模型按需介入"。""", +}, + +"paper:2311.10813": { + "assumption": r"""把驾驶建成"有手册(memory)、有工具(perception/prediction API)、有项目经理(LLM)的施工队"——LLM 用工具调用取结构化信息、用记忆复用经验,就能做出类人且可解释的决策。""", + "failure": r"""工具调用会选错/调错、LLM 推理会幻觉、长链推理延迟高;记忆检索到不相关案例会误导;整条链的可靠性是各环可靠性的乘积,环越多越脆。""", + "experiment": r"""逐一消融记忆库/工具库,测决策质量与延迟;再向工具返回注入噪声,看 LLM 是否盲信。可证伪点:若去掉记忆与工具后决策几乎不降,则"agent 化"的复杂度不被收益证明。""", + "isomorphism": r"""是 [LLM agent 范式](paradigm_llm_agent_paradigm.md)、[工具调用扩展语言模型](insight_tool_use_extends_language_model_into_environment_grounded_actor.md)、[ReAct](paper_react.md) 在驾驶上的落地;"施工队"隐喻对应经典的黑板系统(blackboard architecture)。""", +}, + +"paper:2309.16292": { + "assumption": r"""LLM 里的常识 + "记日记"式的反思记忆,能让闭环驾驶以少量经验泛化到未见场景——把经验回放做成可读、可改、可迁移的自然语言形态,胜过纯数据驱动的黑箱策略。""", + "failure": r"""推理会幻觉、对 prompt 敏感;记忆库的检索质量决定上限,一条错误"教训"会被反复强化;实时性与数据驱动方法仍有差距。""", + "experiment": r"""在未见场景上对比 DiLu 与 RL/IL 基线的零样本成功率,并消融反思/记忆模块;再故意写入一条错误"日记"看是否污染后续决策。可证伪点:若关掉记忆与反思后泛化不降,则知识驱动相对数据驱动的优势不成立。""", + "isomorphism": r"""与 [知识驱动反思智能体](paradigm_knowledge_driven_reflective_agent.md)、案例推理(case-based reasoning)、把经验回放语义化同构;"记日记再翻日记"对应 episodic memory 与检索增强生成。""", +}, + +"paper:2307.01694": { + "assumption": r"""把 Transformer 的浮点乘加阵列换成脉冲驱动(二值、事件稀疏)的 AND/ADD 算子,能在匹配精度的同时把能耗降一个量级——尤其在事件相机这种天然事件流输入下。""", + "failure": r"""脉冲网络靠代理梯度训练、收敛与精度仍逊于稠密 ViT;能耗优势只有在神经形态芯片上才真正兑现,跑在 GPU 上反而更慢;时间步(time step)是精度-延迟的硬折中。""", + "experiment": r"""画精度 vs 突触操作数(synops,能耗代理)前沿,对比稠密 ViT,并指明在何种硬件上能耗优势才出现。可证伪点:若在通用硬件上脉冲版既不更省也不更准,则其价值绑死在专用芯片上。""", + "isomorphism": r"""与事件相机、[类脑神经形态协同设计](paradigm_brain_inspired_neuromorphic_co_design.md)、[洞察:事件稀疏计算匹配能耗预算](insight_event_sparse_compute_matches_energy_budget.md) 同构;事件稀疏激活也与 MoE 的条件计算异曲同工。""", +}, + +"paper:2508.10104": { + "assumption": r"""足够大规模的自监督预训练能产出通用的稠密视觉特征,冻结即可迁移到众多下游任务(Gram anchoring 保证"语义相近则几何相近"不走样),无需任务专用标注。""", + "failure": r"""web 图像与驾驶域存在分布差,冻结特征未必覆盖驾驶长尾(夜间、恶劣天气、罕见物);无任务自适应时精度有天花板;训练此类模型的算力门槛极高。""", + "experiment": r"""冻结 DINOv3 特征 + 线性头做驾驶感知,对比有监督预训练,定位仍需微调才能闭合的精度缺口与具体场景。可证伪点:若冻结特征在所有驾驶子任务上都追平微调,则"免微调迁移"完全成立;否则即界定了它的边界。""", + "isomorphism": r"""与 [洞察:基础特征免微调迁移](insight_foundation_features_transfer_without_finetune.md)、[BYOL](paper_byol.md) / [VICReg](paper_vicreg.md) 的无负样本/显式正则 SSL 谱系同构;Gram anchoring 与表示几何的白化/对齐正则一脉相承。""", +}, + +"paper:2512.24426": { + "assumption": r"""让 VLA 在动手前"先在脑子里走一遍最坏情况"——由 LLM 在 prompt-time 即兴提供一个世界模型、生成反事实 rollout,就能在执行前捕捉并改写不安全动作。""", + "failure": r"""反事实的质量取决于 LLM 即兴世界模型的保真度,错误的"想象"会带来错误的安全感;prompt-time 推演的算力与延迟代价高;最坏情况若覆盖不全,等于没推演。""", + "experiment": r"""测有/无反事实重规划时安全攸关干预率与碰撞率的差,并人为降级反事实保真度看安全收益如何衰减。可证伪点:若反事实重规划不降低安全攸关事件率,则"脑内预演"在该设置下无效。""", + "isomorphism": r"""是 [模型基世界想象规划](paradigm_model_based_world_imagination_planning.md)、MPC 滚动重规划、[反事实数据为中心的安全](paradigm_counterfactual_data_centric_safety.md) 的 LLM 翻版;区别在世界模型由语言模型即兴提供,而非显式学得。""", +}, + }