future-agi · KarthikAvinashFI · Mar 11, 2026 · Mar 11, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/components/TableOfContents.astro b/src/components/TableOfContents.astro
@@ -91,6 +91,47 @@ const feedbackUrl = `https://github.com/${GITHUB_REPO}/issues/new?title=${encode
 <script is:inline>
 (function() {
   function setupToc() {
+    var tocNav = document.querySelector('[data-toc-link]');
+    if (tocNav) tocNav = tocNav.parentElement;
+
+    // Discover step headings from the DOM and inject into TOC
+    if (tocNav) {
+      var stepHeadings = document.querySelectorAll('[data-step-heading]');
+      stepHeadings.forEach(function(stepH) {
+        if (!stepH.id) return;
+        // Check if already in TOC
+        var existing = tocNav.querySelector('[data-heading="' + stepH.id + '"]');
+        if (existing) return;
+
+        // Find the correct position: insert after the nearest preceding h2 TOC link
+        var allHeadings = Array.from(document.querySelectorAll('h2[id], h3[id]'));
+        var stepIndex = -1;
+        for (var i = 0; i < allHeadings.length; i++) {
+          if (allHeadings[i] === stepH) { stepIndex = i; break; }
+        }
+
+        // Find the TOC link to insert after
+        var insertAfter = null;
+        for (var j = stepIndex - 1; j >= 0; j--) {
+          var prevLink = tocNav.querySelector('[data-heading="' + allHeadings[j].id + '"]');
+          if (prevLink) { insertAfter = prevLink; break; }
+        }
+
+        var link = document.createElement('a');
+        link.href = '#' + stepH.id;
+        link.className = 'block text-[13px] leading-relaxed transition-colors duration-200 hover:text-[var(--color-text-secondary)] text-[var(--color-text-muted)] pl-3';
+        link.setAttribute('data-toc-link', '');
+        link.setAttribute('data-heading', stepH.id);
+        link.textContent = stepH.textContent;
+
+        if (insertAfter && insertAfter.nextSibling) {
+          tocNav.insertBefore(link, insertAfter.nextSibling);
+        } else {
+          tocNav.appendChild(link);
+        }
+      });
+    }
+
     var tocLinks = document.querySelectorAll('[data-toc-link]');
     var headings = document.querySelectorAll('h2[id], h3[id]');
 

diff --git a/src/components/docs/Step.astro b/src/components/docs/Step.astro
@@ -4,10 +4,16 @@ interface Props {
 }
 
 const { title } = Astro.props;
+
+// Generate a URL-safe slug from the title
+const slug = title
+  .toLowerCase()
+  .replace(/[^a-z0-9]+/g, '-')
+  .replace(/^-|-$/g, '');
 ---
 
 <div class="step-item">
-  <h4 class="font-semibold text-[var(--color-text-primary)] mb-2">{title}</h4>
+  <h3 id={slug} class="font-semibold text-[var(--color-text-primary)] mb-2 text-base" data-step-heading>{title}</h3>
   <div class="text-sm text-[var(--color-text-secondary)] [&>p]:mb-2 [&>p:last-child]:mb-0">
     <slot />
   </div>

diff --git a/src/layouts/DocsLayout.astro b/src/layouts/DocsLayout.astro
@@ -154,5 +154,38 @@ if (breadcrumbs.length > 0) {
     </div>
   </main>
 
+<script is:inline>
+(function() {
+  function addCopyButtons() {
+    document.querySelectorAll('pre').forEach(function(pre) {
+      if (pre.querySelector('.code-copy-btn')) return;
+      var wrapper = pre.parentElement;
+      if (wrapper && wrapper.classList.contains('code-wrapper')) return;
+
+      pre.style.position = 'relative';
+      var btn = document.createElement('button');
+      btn.className = 'code-copy-btn';
+      btn.setAttribute('aria-label', 'Copy code');
+      btn.innerHTML = '<svg width="16" height="16" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"/></svg>';
+      btn.addEventListener('click', function() {
+        var code = pre.querySelector('code');
+        var text = code ? code.textContent : pre.textContent;
+        navigator.clipboard.writeText(text || '').then(function() {
+          btn.innerHTML = '<svg width="16" height="16" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"/></svg>';
+          btn.style.color = 'var(--color-success)';
+          setTimeout(function() {
+            btn.innerHTML = '<svg width="16" height="16" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"/></svg>';
+            btn.style.color = '';
+          }, 2000);
+        });
+      });
+      pre.appendChild(btn);
+    });
+  }
+  addCopyButtons();
+  document.addEventListener('astro:page-load', addCopyButtons);
+})();
+</script>
+
   <FastNav />
 </BaseLayout>
diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts
@@ -643,7 +643,6 @@ export const tabNavigation: NavTab[] = [
                   { title: 'Evaluate Customer Agent Conversations', href: '/docs/cookbook/quickstart/conversation-eval' },
                   { title: 'Dataset SDK: Upload, Evaluate, and Download Results', href: '/docs/cookbook/quickstart/batch-eval' },
                   { title: 'Async Evaluations for Large-Scale Testing', href: '/docs/cookbook/quickstart/async-batch-eval' },
-                  { title: 'Text-to-SQL Evaluation', href: '/docs/cookbook/quickstart/text-to-sql-eval' },
                 ]
               },
               {
@@ -718,6 +717,24 @@ export const tabNavigation: NavTab[] = [
               },
             ]
           },
+          {
+            title: 'Use Cases',
+            icon: 'briefcase',
+            items: [
+              { title: 'Test and Fix Your Chat Agent with Simulated Conversations', href: '/docs/cookbook/use-cases/end-to-end-agent-testing' },
+              { title: 'Detect Domain-Specific Hallucinations in Your Chatbot', href: '/docs/cookbook/use-cases/domain-hallucination-detection' },
+              { title: 'A/B Test Prompt Versions and Ship the Winner', href: '/docs/cookbook/use-cases/full-prompt-lifecycle' },
+              { title: 'Stop Your Financial Chatbot From Leaking PII', href: '/docs/cookbook/use-cases/secure-ai-evals-guardrails' },
+              { title: 'Screen Your AI App for HIPAA and GDPR Violations', href: '/docs/cookbook/use-cases/compliance-hipaa-gdpr' },
+              { title: 'Evaluate LLM Translation for Accuracy and Fluency', href: '/docs/cookbook/use-cases/translation-eval' },
+              { title: 'Set Up Quality Gates for Your Support Bot in 20 Minutes', href: '/docs/cookbook/use-cases/auto-eval-pipeline' },
+              { title: 'Monitor LLM Quality in Production and Catch Regressions', href: '/docs/cookbook/use-cases/production-quality-monitoring' },
+              { title: 'Evaluate Your Code Generation Agent\'s Output Quality', href: '/docs/cookbook/use-cases/coding-agent-eval' },
+              { title: 'Red-Team Your LLM Application Before Attackers Do', href: '/docs/cookbook/use-cases/red-teaming-llm' },
+              { title: 'Improve Your LLM Judge with Human Feedback', href: '/docs/cookbook/use-cases/feedback-loop-eval' },
+              { title: 'Text-to-SQL Eval: Catch Logic Errors Before Production', href: '/docs/cookbook/use-cases/text-to-sql-eval' },
+            ]
+          },
           {
             title: 'Getting Started',
             icon: 'zap',

diff --git a/src/pages/docs/cookbook/quickstart/conversation-eval.mdx b/src/pages/docs/cookbook/quickstart/conversation-eval.mdx
@@ -296,7 +296,7 @@ You can run all conversational agent metrics at once from the dashboard using th
 1. Go to [app.futureagi.com](https://app.futureagi.com) → **Dataset**
 2. Open a dataset that has a `conversation` column (JSON array of `role`/`content` messages) and a `system_prompt` column containing the agent's system prompt
 3. Click **Evaluate** → **Add Evaluations**
-4. Under **Groups**, select **Conversational agent evaluation** — this adds all 13 metrics in one click
+4. Under **Groups**, select **Conversational agent evaluation** — this adds all 10 metrics in one click
 5. Map the `conversation` column to the conversation input, and the `system_prompt` column to the system prompt input — this is needed for `customer_agent_prompt_conformance`, which checks whether the agent followed its instructions
 6. Click **Add & Run**
 
@@ -341,7 +341,7 @@ You can now evaluate multi-turn customer support conversations across quality, f
 - Diagnosed specific failure modes: context loss, poor query handling, repetitive loops, and missed escalation
 - Checked whether the agent followed its system prompt with `customer_agent_prompt_conformance`
 - Ran a full scorecard comparing a good conversation against a bad one across 7 metrics
-- Used the Conversational agent evaluation group to run all 13 metrics on a dataset from the dashboard
+- Used the Conversational agent evaluation group to run all 10 metrics on a dataset from the dashboard
 
 ## Next steps