Update with JSON Field Degree Analysis Report Script

stephen-rowan · stephen-rowan · commit 671c50f61065 · 2025-10-14T13:25:30.000+01:00
diff --git a/Graph Analysis/degree_analysis_to_md.py b/Graph Analysis/degree_analysis_to_md.py
@@ -5,110 +5,116 @@
 from collections import Counter
 from datetime import datetime
 
+
 def load_json_remote(url):
     """Load JSON data from a remote URL."""
     response = requests.get(url)
     response.raise_for_status()
     return response.json()
 
-def find_participant_lists(obj):
+
+def find_field_combinations(obj):
     """
-    Recursively find all lists of strings in a JSON-like object.
-    Returns a list of lists (each list being a potential participant group).
+    Recursively extract all field (key) names and record their co-occurrence within objects.
+    Returns a list of sets — each set contains field names that appear together.
     """
-    results = []
+    cooccurrences = []
 
     if isinstance(obj, dict):
+        keys = set(obj.keys())
+        if len(keys) > 1:
+            cooccurrences.append(keys)
+
         for value in obj.values():
-            results.extend(find_participant_lists(value))
+            cooccurrences.extend(find_field_combinations(value))
+
     elif isinstance(obj, list):
-        # Check if this list looks like a participant list (all strings)
-        if all(isinstance(x, str) for x in obj) and len(obj) > 1:
-            results.append(obj)
-        else:
-            for item in obj:
-                results.extend(find_participant_lists(item))
+        for item in obj:
+            cooccurrences.extend(find_field_combinations(item))
 
-    return results
+    return cooccurrences
 
-def build_coattendance_graph(meetings):
-    """Build an undirected co-attendance graph from all detected participant lists."""
-    G = nx.Graph()
 
-    for meeting in meetings:
-        participant_lists = find_participant_lists(meeting)
-        # Merge all string lists found in this record
-        participants = set()
-        for lst in participant_lists:
-            participants.update(lst)
+def build_field_graph(meetings):
+    """Build a co-occurrence graph where nodes are JSON field names."""
+    G = nx.Graph()
 
-        if len(participants) < 2:
-            continue
+    cooccurrence_sets = find_field_combinations(meetings)
 
-        for p in participants:
-            G.add_node(p)
-        for u, v in combinations(participants, 2):
+    for field_set in cooccurrence_sets:
+        for field in field_set:
+            G.add_node(field)
+        for u, v in combinations(field_set, 2):
             if G.has_edge(u, v):
-                G[u][v]['weight'] += 1
+                G[u][v]["weight"] += 1
             else:
                 G.add_edge(u, v, weight=1)
 
     return G
 
+
 def degree_analysis(G):
     """Compute degree metrics for the graph."""
     degree_dict = dict(G.degree())
     degree_counts = Counter(degree_dict.values())
     return degree_dict, degree_counts
 
+
 def write_markdown_report(degree_dict, degree_counts, output_file):
     """Write results to a Markdown file."""
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     with open(output_file, "w", encoding="utf-8") as f:
-        f.write(f"# Degree Analysis Report\n")
+        f.write(f"# JSON Field Degree Analysis Report\n")
         f.write(f"**Generated on:** {timestamp}\n\n")
 
         # Summary
         f.write("## Summary Statistics\n")
-        f.write(f"- Total Nodes: {len(degree_dict)}\n")
+        f.write(f"- Total Unique Fields: {len(degree_dict)}\n")
         f.write(f"- Maximum Degree: {max(degree_dict.values()) if degree_dict else 0}\n")
         f.write(f"- Minimum Degree: {min(degree_dict.values()) if degree_dict else 0}\n\n")
 
-        # Top 10 nodes
-        f.write("## Top 10 Nodes by Degree\n")
-        f.write("| Rank | Node | Degree |\n|------|-------|---------|\n")
-        for i, (node, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:10], 1):
-            f.write(f"| {i} | {node} | {deg} |\n")
+        # Top 15 fields by degree
+        f.write("## Top 15 JSON Fields by Degree\n")
+        f.write("| Rank | Field Name | Degree |\n|------|-------------|---------|\n")
+        for i, (field, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:15], 1):
+            f.write(f"| {i} | {field} | {deg} |\n")
         f.write("\n")
 
         # Degree distribution
         f.write("## Degree Distribution\n")
-        f.write("| Degree | Count of Nodes |\n|---------|----------------|\n")
+        f.write("| Degree | Count of Fields |\n|---------|-----------------|\n")
         for degree, count in sorted(degree_counts.items()):
             f.write(f"| {degree} | {count} |\n")
 
     print(f"✅ Markdown report saved to: {output_file}")
 
-def main():
-    url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json"
-    output_file = "degree_analysis_report.md"
 
-    print("📡 Fetching data from remote source...")
+def main():
+    url = (
+        "https://raw.githubusercontent.com/SingularityNET-Archive/"
+        "SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
+        "Meeting-Summaries/2025/meeting-summaries-array.json"
+    )
+    output_file = "degree_analysis_by_field.md"
+
+    print("📡 Fetching JSON from remote source...")
     data = load_json_remote(url)
-    print(f"✅ Downloaded {len(data)} meeting records.")
+    print("✅ JSON file downloaded.")
 
-    print("🔍 Detecting participant lists recursively...")
-    G = build_coattendance_graph(data)
+    print("🔍 Building field co-occurrence graph...")
+    G = build_field_graph(data)
 
     if len(G.nodes) == 0:
-        print("⚠️ No participant lists found — please check JSON structure manually.")
+        print("⚠️ No JSON field structure detected.")
         return
 
-    print(f"📊 Built graph with {len(G.nodes)} nodes and {len(G.edges)} edges.")
+    print(f"📊 Built graph with {len(G.nodes)} fields and {len(G.edges)} relationships.")
 
     degree_dict, degree_counts = degree_analysis(G)
     write_markdown_report(degree_dict, degree_counts, output_file)
 
+
 if __name__ == "__main__":
     main()
 
+
diff --git a/degree_analysis_by_field.md b/degree_analysis_by_field.md
@@ -0,0 +1,40 @@
+# JSON Field Degree Analysis Report
+**Generated on:** 2025-10-14 13:24:26
+
+## Summary Statistics
+- Total Unique Fields: 44
+- Maximum Degree: 11
+- Minimum Degree: 1
+
+## Top 15 JSON Fields by Degree
+| Rank | Field Name | Degree |
+|------|-------------|---------|
+| 1 | host | 11 |
+| 2 | date | 11 |
+| 3 | workingDocs | 11 |
+| 4 | purpose | 11 |
+| 5 | typeOfMeeting | 11 |
+| 6 | documenter | 11 |
+| 7 | peoplePresent | 11 |
+| 8 | status | 11 |
+| 9 | meetingVideoLink | 10 |
+| 10 | agendaItems | 9 |
+| 11 | type | 9 |
+| 12 | workgroup_id | 9 |
+| 13 | tags | 9 |
+| 14 | workgroup | 9 |
+| 15 | canceledSummary | 9 |
+
+## Degree Distribution
+| Degree | Count of Fields |
+|---------|-----------------|
+| 1 | 2 |
+| 2 | 2 |
+| 3 | 9 |
+| 4 | 4 |
+| 5 | 1 |
+| 7 | 2 |
+| 8 | 3 |
+| 9 | 12 |
+| 10 | 1 |
+| 11 | 8 |
diff --git a/degree_analysis_by_name.md b/degree_analysis_by_name.md
@@ -0,0 +1,38 @@
+# Degree Analysis Report (Grouped by Node Name)
+**Generated on:** 2025-10-14 13:21:08
+
+## Summary Statistics
+- Total Unique Names: 418
+- Maximum Degree: 18
+- Minimum Degree: 1
+
+## Top 10 Nodes by Degree (Grouped by Name)
+| Rank | Node Name | Aggregated Degree |
+|------|------------|------------------|
+| 1 | reviewed gitbook, shared update on the [getting started page] (https://ambassadorss-organization.gitbook.io/knowledge-base) | 18 |
+| 2 | all categories are updated with new information | 18 |
+| 3 | takes effort to find relevant information | 18 |
+| 4 | we went over the progress report, line by line | 18 |
+| 5 | [insight](https://docs.google.com/document/d/1x1jc3op5i4dgkthxhsyzwf-msakn8lzarh5zvpxg_1a/) into how odin will test the establishing proposal this summer and how they do the minimal requirements proposal process to create collaboration environments | 18 |
+| 6 | we established the initial q3 monthly roadmap and base expectations of what we want to achieve in sessions. | 18 |
+| 7 | it's time-consuming to learn about the document context and understand its current relevance for gitbook | 18 |
+| 8 | [knowledge base wg q2 2025 report](https://docs.google.com/document/d/1zjfx7x31xmxr0fzk41zyerx95lifuauew50cwbfkjkw) | 18 |
+| 9 | both advance and tevo liked the organising process | 18 |
+| 10 | should we reward new feedback? | 18 |
+
+## Degree Distribution
+| Degree | Count of Nodes |
+|---------|----------------|
+| 1 | 14 |
+| 2 | 30 |
+| 3 | 56 |
+| 4 | 70 |
+| 5 | 61 |
+| 6 | 48 |
+| 7 | 32 |
+| 9 | 12 |
+| 10 | 22 |
+| 11 | 13 |
+| 12 | 13 |
+| 13 | 28 |
+| 18 | 19 |