Skip to content

Commit 1e24f93

Browse files
committed
Create Centrality Analysis script
1 parent a4de7d1 commit 1e24f93

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import json
2+
import requests
3+
import networkx as nx
4+
from itertools import combinations
5+
from datetime import datetime
6+
import statistics
7+
import os
8+
9+
10+
def load_json_remote(url):
11+
"""Load JSON data from a remote URL."""
12+
response = requests.get(url)
13+
response.raise_for_status()
14+
return response.json()
15+
16+
17+
def find_field_combinations(obj):
18+
"""Recursively find sets of co-occurring JSON field names."""
19+
results = []
20+
21+
if isinstance(obj, dict):
22+
keys = set(obj.keys())
23+
if len(keys) > 1:
24+
results.append(keys)
25+
for value in obj.values():
26+
results.extend(find_field_combinations(value))
27+
28+
elif isinstance(obj, list):
29+
for item in obj:
30+
results.extend(find_field_combinations(item))
31+
32+
return results
33+
34+
35+
def build_field_graph(data):
36+
"""Build a field co-occurrence graph."""
37+
G = nx.Graph()
38+
cooccurrence_sets = find_field_combinations(data)
39+
40+
for field_set in cooccurrence_sets:
41+
for field in field_set:
42+
G.add_node(field)
43+
for u, v in combinations(field_set, 2):
44+
if G.has_edge(u, v):
45+
G[u][v]["weight"] += 1
46+
else:
47+
G.add_edge(u, v, weight=1)
48+
return G
49+
50+
51+
def compute_centrality_measures(G):
52+
"""Compute various centrality metrics for each node."""
53+
degree = nx.degree_centrality(G)
54+
betweenness = nx.betweenness_centrality(G)
55+
closeness = nx.closeness_centrality(G)
56+
try:
57+
eigenvector = nx.eigenvector_centrality(G, max_iter=1000)
58+
except nx.PowerIterationFailedConvergence:
59+
eigenvector = {n: 0 for n in G.nodes()}
60+
61+
return {
62+
"degree": degree,
63+
"betweenness": betweenness,
64+
"closeness": closeness,
65+
"eigenvector": eigenvector,
66+
}
67+
68+
69+
def interpret_centrality(centrality):
70+
"""Generate an interpretation narrative."""
71+
degree = centrality["degree"]
72+
betweenness = centrality["betweenness"]
73+
closeness = centrality["closeness"]
74+
eigenvector = centrality["eigenvector"]
75+
76+
def top_keys(metric_dict, n=5):
77+
return [f"{k} ({v:.3f})" for k, v in sorted(metric_dict.items(), key=lambda x: x[1], reverse=True)[:n]]
78+
79+
interpretation = [
80+
"## Interpretation of Centrality Results\n",
81+
"Centrality measures identify the most structurally influential fields within the JSON schema. "
82+
"These indicate which fields are most central (high degree), which act as connectors (high betweenness), "
83+
"which can quickly reach others (high closeness), and which connect to other influential fields (high eigenvector).\n\n",
84+
f"### Most Connected Fields (Degree Centrality)\n{', '.join(top_keys(degree))}\n\n",
85+
f"### Key Bridge Fields (Betweenness Centrality)\n{', '.join(top_keys(betweenness))}\n\n",
86+
f"### Most Accessible Fields (Closeness Centrality)\n{', '.join(top_keys(closeness))}\n\n",
87+
f"### Most Influential Fields (Eigenvector Centrality)\n{', '.join(top_keys(eigenvector))}\n\n",
88+
"_Interpretation:_\n"
89+
"Fields with **high degree** appear alongside many others — they are structurally core. "
90+
"Fields with **high betweenness** link otherwise separate parts of the schema and may represent connectors "
91+
"(e.g., ‘meeting’, ‘summary’, or ‘participants’). "
92+
"Fields with **high closeness** are well-distributed across the structure, suggesting they can reach or influence many others easily. "
93+
"Finally, fields with **high eigenvector centrality** are not only connected but connected to other important fields — "
94+
"these represent high-level schema hubs or critical integration points."
95+
]
96+
97+
return "\n".join(interpretation)
98+
99+
100+
def write_markdown_report(G, centrality, output_file):
101+
"""Write all centrality results and interpretations to a Markdown file."""
102+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
103+
104+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
105+
106+
with open(output_file, "w", encoding="utf-8") as f:
107+
f.write(f"# JSON Field Centrality Analysis Report\n")
108+
f.write(f"**Generated on:** {timestamp}\n\n")
109+
f.write(f"- Total Fields: {len(G.nodes)}\n")
110+
f.write(f"- Total Relationships: {len(G.edges)}\n\n")
111+
112+
f.write("## Top 10 Fields by Centrality\n")
113+
f.write("| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |\n")
114+
f.write("|------|--------|----------|--------------|-------------|-------------|\n")
115+
116+
top_fields = sorted(centrality["degree"].keys(), key=lambda x: centrality["degree"][x], reverse=True)[:10]
117+
for i, node in enumerate(top_fields, 1):
118+
f.write(
119+
f"| {i} | {node} | "
120+
f"{centrality['degree'][node]:.3f} | "
121+
f"{centrality['betweenness'][node]:.3f} | "
122+
f"{centrality['closeness'][node]:.3f} | "
123+
f"{centrality['eigenvector'][node]:.3f} |\n"
124+
)
125+
f.write("\n")
126+
127+
interpretation = interpret_centrality(centrality)
128+
f.write(interpretation)
129+
f.write("\n")
130+
131+
print(f"✅ Centrality analysis report saved to: {output_file}")
132+
133+
134+
def main():
135+
url = (
136+
"https://raw.githubusercontent.com/SingularityNET-Archive/"
137+
"SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
138+
"Meeting-Summaries/2025/meeting-summaries-array.json"
139+
)
140+
output_dir = "reports"
141+
os.makedirs(output_dir, exist_ok=True)
142+
output_file = os.path.join(output_dir, "centrality_analysis_report.md")
143+
144+
print("📡 Fetching JSON data...")
145+
data = load_json_remote(url)
146+
print("✅ JSON file downloaded.")
147+
148+
print("🔍 Building field co-occurrence graph...")
149+
G = build_field_graph(data)
150+
print(f"📊 Graph contains {len(G.nodes)} fields and {len(G.edges)} relationships.")
151+
152+
print("📈 Computing centrality measures...")
153+
centrality = compute_centrality_measures(G)
154+
155+
write_markdown_report(G, centrality, output_file)
156+
157+
158+
if __name__ == "__main__":
159+
main()
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# JSON Field Centrality Analysis Report
2+
**Generated on:** 2025-10-15 08:28:29
3+
4+
- Total Fields: 44
5+
- Total Relationships: 149
6+
7+
## Top 10 Fields by Centrality
8+
| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |
9+
|------|--------|----------|--------------|-------------|-------------|
10+
| 1 | documenter | 0.256 | 0.001 | 0.256 | 0.309 |
11+
| 2 | date | 0.256 | 0.001 | 0.256 | 0.309 |
12+
| 3 | peoplePresent | 0.256 | 0.001 | 0.256 | 0.309 |
13+
| 4 | host | 0.256 | 0.001 | 0.256 | 0.309 |
14+
| 5 | purpose | 0.256 | 0.001 | 0.256 | 0.309 |
15+
| 6 | workingDocs | 0.256 | 0.001 | 0.256 | 0.309 |
16+
| 7 | typeOfMeeting | 0.256 | 0.001 | 0.256 | 0.309 |
17+
| 8 | status | 0.256 | 0.030 | 0.256 | 0.000 |
18+
| 9 | meetingVideoLink | 0.233 | 0.000 | 0.234 | 0.290 |
19+
| 10 | type | 0.209 | 0.000 | 0.209 | 0.000 |
20+
21+
## Interpretation of Centrality Results
22+
23+
Centrality measures identify the most structurally influential fields within the JSON schema. These indicate which fields are most central (high degree), which act as connectors (high betweenness), which can quickly reach others (high closeness), and which connect to other influential fields (high eigenvector).
24+
25+
26+
### Most Connected Fields (Degree Centrality)
27+
documenter (0.256), date (0.256), peoplePresent (0.256), host (0.256), purpose (0.256)
28+
29+
30+
### Key Bridge Fields (Betweenness Centrality)
31+
status (0.030), actionItems (0.003), decisionItems (0.003), discussionPoints (0.002), documenter (0.001)
32+
33+
34+
### Most Accessible Fields (Closeness Centrality)
35+
documenter (0.256), date (0.256), peoplePresent (0.256), host (0.256), purpose (0.256)
36+
37+
38+
### Most Influential Fields (Eigenvector Centrality)
39+
documenter (0.309), date (0.309), peoplePresent (0.309), host (0.309), purpose (0.309)
40+
41+
42+
_Interpretation:_
43+
Fields with **high degree** appear alongside many others — they are structurally core. Fields with **high betweenness** link otherwise separate parts of the schema and may represent connectors (e.g., ‘meeting’, ‘summary’, or ‘participants’). Fields with **high closeness** are well-distributed across the structure, suggesting they can reach or influence many others easily. Finally, fields with **high eigenvector centrality** are not only connected but connected to other important fields — these represent high-level schema hubs or critical integration points.

0 commit comments

Comments
 (0)