diff --git a/.semversioner/next-release/patch-20260308082306195595.json b/.semversioner/next-release/patch-20260308082306195595.json
new file mode 100644
index 000000000..16cc06617
--- /dev/null
+++ b/.semversioner/next-release/patch-20260308082306195595.json
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Add Top-K and min co-occurrence filters to NLP edge extraction to prevent O(N^2) relationship explosion"
+}
diff --git a/packages/graphrag/graphrag/config/defaults.py b/packages/graphrag/graphrag/config/defaults.py
index 640933581..db64e8302 100644
--- a/packages/graphrag/graphrag/config/defaults.py
+++ b/packages/graphrag/graphrag/config/defaults.py
@@ -188,6 +188,8 @@ class ExtractGraphNLPDefaults:
     text_analyzer: TextAnalyzerDefaults = field(default_factory=TextAnalyzerDefaults)
     concurrent_requests: int = 25
     async_mode: AsyncType = AsyncType.Threaded
+    max_entities_per_chunk: int = 0
+    min_co_occurrence: int = 1
 
 
 @dataclass
diff --git a/packages/graphrag/graphrag/config/models/extract_graph_nlp_config.py b/packages/graphrag/graphrag/config/models/extract_graph_nlp_config.py
index 5ab587cf2..bafe99c98 100644
--- a/packages/graphrag/graphrag/config/models/extract_graph_nlp_config.py
+++ b/packages/graphrag/graphrag/config/models/extract_graph_nlp_config.py
@@ -72,3 +72,16 @@ class ExtractGraphNLPConfig(BaseModel):
         description="The async mode to use.",
         default=graphrag_config_defaults.extract_graph_nlp.async_mode,
     )
+    max_entities_per_chunk: int = Field(
+        description="Maximum number of noun-phrase entities to retain per text chunk "
+        "when building co-occurrence edges. Entities are ranked by global frequency "
+        "and only the top-K are paired, reducing edges from O(N^2) to O(K^2). "
+        "Set to 0 to disable (keep all entities).",
+        default=graphrag_config_defaults.extract_graph_nlp.max_entities_per_chunk,
+    )
+    min_co_occurrence: int = Field(
+        description="Minimum number of text units in which an edge must co-occur "
+        "to be retained. Edges appearing in fewer text units are discarded as "
+        "likely coincidental. Set to 1 to disable filtering.",
+        default=graphrag_config_defaults.extract_graph_nlp.min_co_occurrence,
+    )
diff --git a/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py b/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py
index ece890a57..c57840164 100644
--- a/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py
+++ b/packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py
@@ -25,6 +25,8 @@ async def build_noun_graph(
     text_analyzer: BaseNounPhraseExtractor,
     normalize_edge_weights: bool,
     cache: Cache,
+    max_entities_per_chunk: int = 0,
+    min_co_occurrence: int = 1,
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Build a noun graph from text units."""
     title_to_ids = await _extract_nodes(
@@ -49,6 +51,8 @@ async def build_noun_graph(
         title_to_ids,
         nodes_df=nodes_df,
         normalize_edge_weights=normalize_edge_weights,
+        max_entities_per_chunk=max_entities_per_chunk,
+        min_co_occurrence=min_co_occurrence,
     )
     return (nodes_df, edges_df)
 
@@ -100,10 +104,23 @@ def _extract_edges(
     title_to_ids: dict[str, list[str]],
     nodes_df: pd.DataFrame,
     normalize_edge_weights: bool = True,
+    max_entities_per_chunk: int = 0,
+    min_co_occurrence: int = 1,
 ) -> pd.DataFrame:
     """Build co-occurrence edges between noun phrases.
 
     Nodes that appear in the same text unit are connected.
+
+    Two optional filters reduce O(N^2) edge explosion in
+    entity-dense corpora (e.g. scientific/technical text):
+
+    * ``max_entities_per_chunk`` – When > 0, only the K most
+      globally-frequent entities per text unit are paired,
+      capping edges at C(K,2) instead of C(N,2).
+    * ``min_co_occurrence`` – When > 1, edges that appear in
+      fewer than this many text units are discarded, removing
+      coincidental co-occurrences.
+
     Returns edges with schema [source, target, weight, text_unit_ids].
     """
     if not title_to_ids:
@@ -111,6 +128,10 @@ def _extract_edges(
             columns=["source", "target", "weight", "text_unit_ids"],
         )
 
+    entity_freq: dict[str, int] = {
+        t: len(ids) for t, ids in title_to_ids.items()
+    }
+
     text_unit_to_titles: dict[str, list[str]] = defaultdict(list)
     for title, tu_ids in title_to_ids.items():
         for tu_id in tu_ids:
@@ -118,9 +139,17 @@ def _extract_edges(
 
     edge_map: dict[tuple[str, str], list[str]] = defaultdict(list)
     for tu_id, titles in text_unit_to_titles.items():
-        if len(titles) < 2:
+        unique_titles = sorted(set(titles))
+        if len(unique_titles) < 2:
             continue
-        for pair in combinations(sorted(set(titles)), 2):
+        if max_entities_per_chunk > 0 and len(unique_titles) > max_entities_per_chunk:
+            unique_titles = sorted(
+                unique_titles,
+                key=lambda t: entity_freq.get(t, 0),
+                reverse=True,
+            )[:max_entities_per_chunk]
+            unique_titles.sort()
+        for pair in combinations(unique_titles, 2):
             edge_map[pair].append(tu_id)
 
     records = [
@@ -131,7 +160,17 @@ def _extract_edges(
             "text_unit_ids": tu_ids,
         }
         for (src, tgt), tu_ids in edge_map.items()
+        if len(tu_ids) >= min_co_occurrence
     ]
+
+    if len(records) < len(edge_map):
+        logger.info(
+            "Edge co-occurrence filter: %d -> %d edges (min_co_occurrence=%d)",
+            len(edge_map),
+            len(records),
+            min_co_occurrence,
+        )
+
     edges_df = pd.DataFrame(
         records,
         columns=["source", "target", "weight", "text_unit_ids"],
diff --git a/packages/graphrag/graphrag/index/workflows/extract_graph_nlp.py b/packages/graphrag/graphrag/index/workflows/extract_graph_nlp.py
index d4cae458b..8e4724107 100644
--- a/packages/graphrag/graphrag/index/workflows/extract_graph_nlp.py
+++ b/packages/graphrag/graphrag/index/workflows/extract_graph_nlp.py
@@ -52,6 +52,10 @@ async def run_workflow(
             relationships_table=relationships_table,
             text_analyzer=text_analyzer,
             normalize_edge_weights=(config.extract_graph_nlp.normalize_edge_weights),
+            max_entities_per_chunk=(
+                config.extract_graph_nlp.max_entities_per_chunk
+            ),
+            min_co_occurrence=config.extract_graph_nlp.min_co_occurrence,
         )
 
     logger.info("Workflow completed: extract_graph_nlp")
@@ -65,6 +69,8 @@ async def extract_graph_nlp(
     relationships_table: Table,
     text_analyzer: BaseNounPhraseExtractor,
     normalize_edge_weights: bool,
+    max_entities_per_chunk: int = 0,
+    min_co_occurrence: int = 1,
 ) -> dict[str, list[dict[str, Any]]]:
     """Extract noun-phrase graph and stream results to output tables."""
     extracted_nodes, extracted_edges = await build_noun_graph(
@@ -72,6 +78,8 @@ async def extract_graph_nlp(
         text_analyzer=text_analyzer,
         normalize_edge_weights=normalize_edge_weights,
         cache=cache,
+        max_entities_per_chunk=max_entities_per_chunk,
+        min_co_occurrence=min_co_occurrence,
     )
 
     if len(extracted_nodes) == 0:
diff --git a/tests/unit/config/utils.py b/tests/unit/config/utils.py
index ae898d456..8625038b0 100644
--- a/tests/unit/config/utils.py
+++ b/tests/unit/config/utils.py
@@ -213,6 +213,8 @@ def assert_extract_graph_nlp_configs(
     assert actual.normalize_edge_weights == expected.normalize_edge_weights
     assert_text_analyzer_configs(actual.text_analyzer, expected.text_analyzer)
     assert actual.concurrent_requests == expected.concurrent_requests
+    assert actual.max_entities_per_chunk == expected.max_entities_per_chunk
+    assert actual.min_co_occurrence == expected.min_co_occurrence
 
 
 def assert_prune_graph_configs(
diff --git a/tests/unit/indexing/operations/test_build_noun_graph.py b/tests/unit/indexing/operations/test_build_noun_graph.py
new file mode 100644
index 000000000..e416e0128
--- /dev/null
+++ b/tests/unit/indexing/operations/test_build_noun_graph.py
@@ -0,0 +1,266 @@
+# Copyright (C) 2026 Microsoft Corporation.
+# Licensed under the MIT License
+
+"""Tests for build_noun_graph edge extraction with Top-K and co-occurrence filters.
+
+Validates that _extract_edges correctly applies max_entities_per_chunk (Top-K)
+and min_co_occurrence filters to control the O(N^2) co-occurrence edge explosion
+in entity-dense corpora.
+"""
+
+import pandas as pd
+
+from graphrag.index.operations.build_noun_graph.build_noun_graph import (
+    _extract_edges,
+)
+
+
+def _make_nodes_df(title_to_ids: dict[str, list[str]]) -> pd.DataFrame:
+    """Build a nodes DataFrame from a title_to_ids mapping."""
+    return pd.DataFrame(
+        [
+            {"title": t, "frequency": len(ids), "text_unit_ids": ids}
+            for t, ids in title_to_ids.items()
+        ],
+        columns=["title", "frequency", "text_unit_ids"],
+    )
+
+
+class TestExtractEdgesDefaults:
+    """Baseline behaviour with default parameters (no filtering)."""
+
+    def test_empty_input(self):
+        """Empty title_to_ids produces an empty edges DataFrame."""
+        edges = _extract_edges({}, pd.DataFrame(), normalize_edge_weights=False)
+        assert len(edges) == 0
+        assert list(edges.columns) == ["source", "target", "weight", "text_unit_ids"]
+
+    def test_single_entity_no_edges(self):
+        """A single entity cannot form any pairs."""
+        title_to_ids = {"alpha": ["tu1", "tu2"]}
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(title_to_ids, nodes, normalize_edge_weights=False)
+        assert len(edges) == 0
+
+    def test_two_entities_one_chunk(self):
+        """Two entities in the same chunk produce exactly one edge."""
+        title_to_ids = {"alpha": ["tu1"], "beta": ["tu1"]}
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(title_to_ids, nodes, normalize_edge_weights=False)
+        assert len(edges) == 1
+        assert edges.iloc[0]["weight"] == 1
+
+    def test_co_occurrence_weight(self):
+        """Weight equals the number of shared text units."""
+        title_to_ids = {"alpha": ["tu1", "tu2", "tu3"], "beta": ["tu1", "tu2", "tu3"]}
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(title_to_ids, nodes, normalize_edge_weights=False)
+        assert len(edges) == 1
+        assert edges.iloc[0]["weight"] == 3
+
+    def test_all_pairs_generated(self):
+        """N entities in one chunk produce C(N,2) edges."""
+        title_to_ids = {
+            "a": ["tu1"],
+            "b": ["tu1"],
+            "c": ["tu1"],
+            "d": ["tu1"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(title_to_ids, nodes, normalize_edge_weights=False)
+        # C(4,2) = 6
+        assert len(edges) == 6
+
+
+class TestMaxEntitiesPerChunk:
+    """Top-K entity filtering per text unit."""
+
+    def test_disabled_when_zero(self):
+        """max_entities_per_chunk=0 keeps all entities (default)."""
+        title_to_ids = {
+            "a": ["tu1"],
+            "b": ["tu1"],
+            "c": ["tu1"],
+            "d": ["tu1"],
+            "e": ["tu1"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids,
+            nodes,
+            normalize_edge_weights=False,
+            max_entities_per_chunk=0,
+        )
+        # C(5,2) = 10
+        assert len(edges) == 10
+
+    def test_caps_entities_per_chunk(self):
+        """Only top-K most frequent entities are paired per chunk."""
+        # Frequencies: a=3, b=3, c=1, d=1, e=1  (all in tu1)
+        title_to_ids = {
+            "a": ["tu1", "tu2", "tu3"],
+            "b": ["tu1", "tu2", "tu3"],
+            "c": ["tu1"],
+            "d": ["tu1"],
+            "e": ["tu1"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids,
+            nodes,
+            normalize_edge_weights=False,
+            max_entities_per_chunk=2,
+        )
+        # Only a and b survive the top-2 filter → C(2,2)=1 edge
+        assert len(edges) == 1
+        sources_targets = set(edges.iloc[0][["source", "target"]])
+        assert sources_targets == {"a", "b"}
+
+    def test_no_effect_when_below_limit(self):
+        """Top-K has no effect when chunk has fewer entities than K."""
+        title_to_ids = {"a": ["tu1"], "b": ["tu1"]}
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids,
+            nodes,
+            normalize_edge_weights=False,
+            max_entities_per_chunk=10,
+        )
+        assert len(edges) == 1
+
+    def test_selects_by_global_frequency(self):
+        """Top-K selection uses global frequency, not per-chunk count."""
+        # In tu1: a, b, c, d all present
+        # Global freq: a=5, b=4, c=1, d=1
+        # Top-2 by global freq → a, b
+        title_to_ids = {
+            "a": ["tu1", "tu2", "tu3", "tu4", "tu5"],
+            "b": ["tu1", "tu2", "tu3", "tu4"],
+            "c": ["tu1"],
+            "d": ["tu1"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids,
+            nodes,
+            normalize_edge_weights=False,
+            max_entities_per_chunk=2,
+        )
+        assert len(edges) == 1
+        sources_targets = set(edges.iloc[0][["source", "target"]])
+        assert sources_targets == {"a", "b"}
+
+    def test_reduces_quadratic_explosion(self):
+        """Top-K significantly reduces edges in dense chunks."""
+        # 20 entities in one chunk: C(20,2) = 190 edges without limit
+        title_to_ids = {chr(65 + i): ["tu1"] for i in range(20)}
+        nodes = _make_nodes_df(title_to_ids)
+
+        edges_all = _extract_edges(
+            title_to_ids, nodes, normalize_edge_weights=False, max_entities_per_chunk=0
+        )
+        edges_k5 = _extract_edges(
+            title_to_ids, nodes, normalize_edge_weights=False, max_entities_per_chunk=5
+        )
+        # C(20,2) = 190, C(5,2) = 10
+        assert len(edges_all) == 190
+        assert len(edges_k5) == 10
+
+
+class TestMinCoOccurrence:
+    """Minimum co-occurrence threshold filtering."""
+
+    def test_default_keeps_all(self):
+        """min_co_occurrence=1 keeps all edges (default)."""
+        title_to_ids = {"a": ["tu1"], "b": ["tu1"]}
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids, nodes, normalize_edge_weights=False, min_co_occurrence=1
+        )
+        assert len(edges) == 1
+
+    def test_filters_low_co_occurrence(self):
+        """Edges appearing in fewer than min_co_occurrence chunks are removed."""
+        title_to_ids = {
+            "a": ["tu1", "tu2"],
+            "b": ["tu1", "tu2"],
+            "c": ["tu1"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids, nodes, normalize_edge_weights=False, min_co_occurrence=2
+        )
+        # a-b co-occur in tu1,tu2 (weight=2) → kept
+        # a-c co-occur in tu1 only (weight=1) → removed
+        # b-c co-occur in tu1 only (weight=1) → removed
+        assert len(edges) == 1
+        assert set(edges.iloc[0][["source", "target"]]) == {"a", "b"}
+
+    def test_removes_all_when_threshold_too_high(self):
+        """All edges removed when threshold exceeds max weight."""
+        title_to_ids = {"a": ["tu1"], "b": ["tu1"]}
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids, nodes, normalize_edge_weights=False, min_co_occurrence=5
+        )
+        assert len(edges) == 0
+
+
+class TestCombinedFilters:
+    """Top-K and co-occurrence filters work together."""
+
+    def test_both_filters_applied(self):
+        """Top-K limits entities, then co-occurrence filters weak edges."""
+        # 6 entities in tu1 and tu2: a(freq=5), b(freq=4), c(freq=3), d/e/f(freq=1)
+        title_to_ids = {
+            "a": ["tu1", "tu2", "tu3", "tu4", "tu5"],
+            "b": ["tu1", "tu2", "tu3", "tu4"],
+            "c": ["tu1", "tu2", "tu3"],
+            "d": ["tu1"],
+            "e": ["tu1"],
+            "f": ["tu1"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+        edges = _extract_edges(
+            title_to_ids,
+            nodes,
+            normalize_edge_weights=False,
+            max_entities_per_chunk=3,
+            min_co_occurrence=2,
+        )
+        # Top-3 in tu1: a, b, c → pairs: a-b, a-c, b-c
+        # Top-3 in tu2: a, b, c → same pairs
+        # Top-3 in tu3: a, b, c → same pairs
+        # Top-3 in tu4: a, b (only 2 entities, no pairs from tu5)
+        # a-b: tu1,tu2,tu3,tu4 (weight=4) ✓
+        # a-c: tu1,tu2,tu3 (weight=3) ✓
+        # b-c: tu1,tu2,tu3 (weight=3) ✓
+        assert len(edges) == 3
+        for _, row in edges.iterrows():
+            assert row["weight"] >= 2
+
+    def test_backward_compatible_defaults(self):
+        """Default parameters produce the same result as original code."""
+        title_to_ids = {
+            "x": ["tu1", "tu2"],
+            "y": ["tu1"],
+            "z": ["tu2"],
+        }
+        nodes = _make_nodes_df(title_to_ids)
+
+        edges_default = _extract_edges(
+            title_to_ids, nodes, normalize_edge_weights=False
+        )
+        edges_explicit = _extract_edges(
+            title_to_ids,
+            nodes,
+            normalize_edge_weights=False,
+            max_entities_per_chunk=0,
+            min_co_occurrence=1,
+        )
+        assert len(edges_default) == len(edges_explicit)
+        assert set(
+            zip(edges_default["source"], edges_default["target"])
+        ) == set(
+            zip(edges_explicit["source"], edges_explicit["target"])
+        )