Amended unified analysis report

stephen-rowan · stephen-rowan · commit 8d9575c1521b · 2025-10-15T09:44:47.000+01:00
diff --git a/Graph Analysis/unified_analysis.py b/Graph Analysis/unified_analysis.py
@@ -39,58 +39,6 @@ def _truncate_label(text: str, max_len: int = 80) -> str:
     return safe if len(safe) <= max_len else (safe[: max_len - 1] + "…")
 
 
-# ---------------- Degree (Co-attendance) ----------------
-
-def extract_participants(record: Dict[str, Any]) -> List[str]:
-    """Extract likely participants from a meeting record.
-    - peoplePresent: comma-separated string under meetingInfo
-    - host, documenter: added if present (deduped)
-    """
-    participants: List[str] = []
-    meeting_info = {}
-    if isinstance(record, dict):
-        meeting_info = record.get("meetingInfo", {}) or {}
-    # peoplePresent as comma-separated string
-    pp = meeting_info.get("peoplePresent", "")
-    if isinstance(pp, str) and pp.strip():
-        participants.extend([p.strip() for p in pp.split(",") if p.strip()])
-    # host/documenter as single names
-    for key in ("host", "documenter"):
-        val = meeting_info.get(key)
-        if isinstance(val, str) and val.strip():
-            participants.append(val.strip())
-    # dedupe while preserving order
-    seen = set()
-    deduped: List[str] = []
-    for p in participants:
-        if p not in seen:
-            seen.add(p)
-            deduped.append(p)
-    return deduped
-
-
-def build_coattendance_graph(records: Iterable[Any]) -> nx.Graph:
-    G = nx.Graph()
-    for rec in records:
-        participants = extract_participants(rec)
-        if len(participants) < 2:
-            continue
-        for p in participants:
-            G.add_node(p)
-        for u, v in combinations(participants, 2):
-            if G.has_edge(u, v):
-                G[u][v]["weight"] += 1
-            else:
-                G.add_edge(u, v, weight=1)
-    return G
-
-
-def degree_analysis(G: nx.Graph) -> Tuple[Dict[str, int], Counter]:
-    degree_dict = dict(G.degree())
-    degree_counts = Counter(degree_dict.values())
-    return degree_dict, degree_counts
-
-
 # ---------------- JSON Path Structure ----------------
 
 def extract_json_paths(obj: Any, prefix: str = "") -> List[str]:
@@ -137,7 +85,7 @@ def build_path_graph(paths: List[str]) -> nx.DiGraph:
     return G
 
 
-# ---------------- Field Co-occurrence (Centrality, Clustering, Components) ----------------
+# ---------------- Field Co-occurrence (Degree, Centrality, Clustering, Components) ----------------
 
 def find_field_combinations(obj: Any) -> List[set]:
     results: List[set] = []
@@ -167,6 +115,12 @@ def build_field_graph(data: Any) -> nx.Graph:
     return G
 
 
+def field_degree(G: nx.Graph) -> Tuple[Dict[str, int], Counter]:
+    degree_dict = dict(G.degree())
+    degree_counts = Counter(degree_dict.values())
+    return degree_dict, degree_counts
+
+
 def compute_centrality_measures(G: nx.Graph) -> Dict[str, Dict[str, float]]:
     degree = nx.degree_centrality(G) if G.number_of_nodes() else {}
     betweenness = nx.betweenness_centrality(G) if G.number_of_nodes() else {}
@@ -207,9 +161,9 @@ def connected_components_info(G: nx.Graph, top: int) -> Dict[str, Any]:
 def write_report(
     output_file: str,
     summary: Dict[str, Any],
-    degree: Tuple[Dict[str, int], Counter],
-    degree_top: List[Tuple[str, int]],
-    degree_dist: List[Tuple[int, int]],
+    field_deg: Tuple[Dict[str, int], Counter],
+    field_top: List[Tuple[str, int]],
+    field_dist: List[Tuple[int, int]],
     path_info: Dict[str, Any],
     parent_top: List[Tuple[str, int]],
     centrality: Dict[str, Dict[str, float]],
@@ -228,17 +182,17 @@ def write_report(
             f.write(f"- {k}: {v}\n")
         f.write("\n")
 
-        # Degree Analysis
-        f.write("## Degree (Co-attendance) Analysis\n")
-        f.write("### Top Nodes by Degree\n")
-        f.write("| Rank | Node | Degree |\n|------|------|--------|\n")
-        for i, (node, deg) in enumerate(degree_top, 1):
+        # JSON Field Degree Analysis
+        f.write("## JSON Field Degree Analysis\n")
+        f.write("### Top Fields by Degree\n")
+        f.write("| Rank | Field | Degree |\n|------|-------|--------|\n")
+        for i, (node, deg) in enumerate(field_top, 1):
             label = _truncate_label(node, 80)
             f.write(f"| {i} | {label} | {deg} |\n")
         f.write("\n")
         f.write("### Degree Distribution\n")
-        f.write("| Degree | Count of Nodes |\n|--------|-----------------|\n")
-        for d, c in degree_dist:
+        f.write("| Degree | Count of Fields |\n|--------|------------------|\n")
+        for d, c in field_dist:
             f.write(f"| {d} | {c} |\n")
         f.write("\n")
 
@@ -322,13 +276,6 @@ def main() -> None:
     args = parser.parse_args()
 
     data = load_json(args.input)
-    records = ensure_iterable_records(data)
-
-    # Degree / co-attendance graph (participants-only)
-    G_attend = build_coattendance_graph(records)
-    degree_dict, degree_counts = degree_analysis(G_attend)
-    degree_top = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[: args.limit_top]
-    degree_dist = sorted(degree_counts.items(), key=lambda x: x[0])
 
     # Path analysis
     all_paths = extract_json_paths(data)
@@ -338,15 +285,20 @@ def main() -> None:
 
     # Field co-occurrence graph
     G_fields = build_field_graph(data)
+
+    # Field degree (JSON Field Degree Analysis)
+    fdeg_dict, fdeg_counts = field_degree(G_fields)
+    field_top = sorted(fdeg_dict.items(), key=lambda x: x[1], reverse=True)[: args.limit_top]
+    field_dist = sorted(fdeg_counts.items(), key=lambda x: x[0])
+
+    # Centrality on field graph
     centrality = compute_centrality_measures(G_fields)
 
     # Clustering & components on field graph
     avg_clust, top_clust_nodes = clustering_metrics(G_fields, args.limit_top)
     components = connected_components_info(G_fields, args.limit_top)
 
     summary = {
-        "Co-attendance graph (nodes)": len(G_attend.nodes),
-        "Co-attendance graph (edges)": len(G_attend.edges),
         "Path graph (nodes)": len(G_paths.nodes),
         "Path graph (edges)": len(G_paths.edges),
         "Field graph (nodes)": len(G_fields.nodes),
@@ -356,9 +308,9 @@ def main() -> None:
     write_report(
         output_file=args.output,
         summary=summary,
-        degree=(degree_dict, degree_counts),
-        degree_top=degree_top,
-        degree_dist=degree_dist,
+        field_deg=(fdeg_dict, fdeg_counts),
+        field_top=field_top,
+        field_dist=field_dist,
         path_info=pmetrics,
         parent_top=parent_top,
         centrality=centrality,
diff --git a/reports/unified_analysis_report_fields.md b/reports/unified_analysis_report_fields.md
@@ -0,0 +1,115 @@
+# Unified Graph Analysis Report
+**Generated on:** 2025-10-15 09:44:01
+
+## Summary
+- Path graph (nodes): 6833
+- Path graph (edges): 6832
+- Field graph (nodes): 44
+- Field graph (edges): 149
+
+## JSON Field Degree Analysis
+### Top Fields by Degree
+| Rank | Field | Degree |
+|------|-------|--------|
+| 1 | host | 11 |
+| 2 | typeOfMeeting | 11 |
+| 3 | date | 11 |
+| 4 | documenter | 11 |
+| 5 | workingDocs | 11 |
+| 6 | purpose | 11 |
+| 7 | peoplePresent | 11 |
+| 8 | status | 11 |
+| 9 | meetingVideoLink | 10 |
+| 10 | tags | 9 |
+
+### Degree Distribution
+| Degree | Count of Fields |
+|--------|------------------|
+| 1 | 2 |
+| 2 | 2 |
+| 3 | 9 |
+| 4 | 4 |
+| 5 | 1 |
+| 7 | 2 |
+| 8 | 3 |
+| 9 | 12 |
+| 10 | 1 |
+| 11 | 8 |
+
+## JSON Path Structure Analysis
+- Total Unique Paths: 6832
+- Maximum Depth: 6
+- Average Depth: 4.20
+
+### Deepest JSON Paths (sample)
+- `[0].agendaItems[0].actionItems[0].text`
+- `[0].agendaItems[0].actionItems[0].assignee`
+- `[0].agendaItems[0].actionItems[0].dueDate`
+- `[0].agendaItems[0].actionItems[0].status`
+- `[0].agendaItems[0].decisionItems[0].decision`
+- `[0].agendaItems[0].decisionItems[0].effect`
+- `[0].agendaItems[0].decisionItems[1].decision`
+- `[0].agendaItems[0].decisionItems[1].rationale`
+- `[0].agendaItems[0].decisionItems[1].effect`
+- `[0].agendaItems[0].decisionItems[2].decision`
+
+### Most Common Parent Paths
+| Rank | Parent Path | Count |
+|------|-------------|-------|
+| 1 | `[12].agendaItems[0]` | 26 |
+| 2 | `[2].agendaItems[0]` | 21 |
+| 3 | `[10].agendaItems[0]` | 21 |
+| 4 | `[7].agendaItems[0]` | 19 |
+| 5 | `[17].agendaItems[0]` | 19 |
+| 6 | `[22].meetingInfo` | 19 |
+| 7 | `[23].meetingInfo` | 19 |
+| 8 | `[101].agendaItems[0]` | 19 |
+| 9 | `[11].agendaItems[0]` | 18 |
+| 10 | `[37].agendaItems[0]` | 18 |
+
+## Field Centrality (Co-occurrence)
+| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |
+|------|-------|--------|-------------|-----------|------------|
+| 1 | host | 0.256 | 0.001 | 0.256 | 0.309 |
+| 2 | typeOfMeeting | 0.256 | 0.001 | 0.256 | 0.309 |
+| 3 | date | 0.256 | 0.001 | 0.256 | 0.309 |
+| 4 | documenter | 0.256 | 0.001 | 0.256 | 0.309 |
+| 5 | workingDocs | 0.256 | 0.001 | 0.256 | 0.309 |
+| 6 | purpose | 0.256 | 0.001 | 0.256 | 0.309 |
+| 7 | peoplePresent | 0.256 | 0.001 | 0.256 | 0.309 |
+| 8 | status | 0.256 | 0.030 | 0.256 | 0.000 |
+| 9 | meetingVideoLink | 0.233 | 0.000 | 0.234 | 0.290 |
+| 10 | tags | 0.209 | 0.000 | 0.209 | 0.000 |
+
+## Clustering (Field Co-occurrence Graph)
+- Average Clustering Coefficient: 0.882
+
+### Top Nodes by Clustering Coefficient
+| Rank | Field | Clustering |
+|------|-------|------------|
+| 1 | tags | 1.000 |
+| 2 | workgroup_id | 1.000 |
+| 3 | meetingInfo | 1.000 |
+| 4 | workgroup | 1.000 |
+| 5 | noSummaryGiven | 1.000 |
+| 6 | canceledSummary | 1.000 |
+| 7 | type | 1.000 |
+| 8 | agendaItems | 1.000 |
+| 9 | timestampedVideo | 1.000 |
+| 10 | assignee | 1.000 |
+
+## Connected Components (Field Co-occurrence Graph)
+- Number of Components: 6
+- Component Sizes (top 10): [12, 12, 10, 4, 4, 2]
+- Sample of Largest Component Nodes (top 10):
+  - typeOfMeeting
+  - host
+  - mediaLink
+  - documenter
+  - workingDocs
+  - purpose
+  - peoplePresent
+  - miroBoardLink
+  - otherMediaLink
+  - date
+