Skip to content

Commit 671c50f

Browse files
committed
Update with JSON Field Degree Analysis Report Script
1 parent 4b2658e commit 671c50f

File tree

3 files changed

+128
-44
lines changed

3 files changed

+128
-44
lines changed

Graph Analysis/degree_analysis_to_md.py

Lines changed: 50 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,110 +5,116 @@
55
from collections import Counter
66
from datetime import datetime
77

8+
89
def load_json_remote(url):
910
"""Load JSON data from a remote URL."""
1011
response = requests.get(url)
1112
response.raise_for_status()
1213
return response.json()
1314

14-
def find_participant_lists(obj):
15+
16+
def find_field_combinations(obj):
1517
"""
16-
Recursively find all lists of strings in a JSON-like object.
17-
Returns a list of lists (each list being a potential participant group).
18+
Recursively extract all field (key) names and record their co-occurrence within objects.
19+
Returns a list of sets — each set contains field names that appear together.
1820
"""
19-
results = []
21+
cooccurrences = []
2022

2123
if isinstance(obj, dict):
24+
keys = set(obj.keys())
25+
if len(keys) > 1:
26+
cooccurrences.append(keys)
27+
2228
for value in obj.values():
23-
results.extend(find_participant_lists(value))
29+
cooccurrences.extend(find_field_combinations(value))
30+
2431
elif isinstance(obj, list):
25-
# Check if this list looks like a participant list (all strings)
26-
if all(isinstance(x, str) for x in obj) and len(obj) > 1:
27-
results.append(obj)
28-
else:
29-
for item in obj:
30-
results.extend(find_participant_lists(item))
32+
for item in obj:
33+
cooccurrences.extend(find_field_combinations(item))
3134

32-
return results
35+
return cooccurrences
3336

34-
def build_coattendance_graph(meetings):
35-
"""Build an undirected co-attendance graph from all detected participant lists."""
36-
G = nx.Graph()
3737

38-
for meeting in meetings:
39-
participant_lists = find_participant_lists(meeting)
40-
# Merge all string lists found in this record
41-
participants = set()
42-
for lst in participant_lists:
43-
participants.update(lst)
38+
def build_field_graph(meetings):
39+
"""Build a co-occurrence graph where nodes are JSON field names."""
40+
G = nx.Graph()
4441

45-
if len(participants) < 2:
46-
continue
42+
cooccurrence_sets = find_field_combinations(meetings)
4743

48-
for p in participants:
49-
G.add_node(p)
50-
for u, v in combinations(participants, 2):
44+
for field_set in cooccurrence_sets:
45+
for field in field_set:
46+
G.add_node(field)
47+
for u, v in combinations(field_set, 2):
5148
if G.has_edge(u, v):
52-
G[u][v]['weight'] += 1
49+
G[u][v]["weight"] += 1
5350
else:
5451
G.add_edge(u, v, weight=1)
5552

5653
return G
5754

55+
5856
def degree_analysis(G):
5957
"""Compute degree metrics for the graph."""
6058
degree_dict = dict(G.degree())
6159
degree_counts = Counter(degree_dict.values())
6260
return degree_dict, degree_counts
6361

62+
6463
def write_markdown_report(degree_dict, degree_counts, output_file):
6564
"""Write results to a Markdown file."""
6665
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
6766
with open(output_file, "w", encoding="utf-8") as f:
68-
f.write(f"# Degree Analysis Report\n")
67+
f.write(f"# JSON Field Degree Analysis Report\n")
6968
f.write(f"**Generated on:** {timestamp}\n\n")
7069

7170
# Summary
7271
f.write("## Summary Statistics\n")
73-
f.write(f"- Total Nodes: {len(degree_dict)}\n")
72+
f.write(f"- Total Unique Fields: {len(degree_dict)}\n")
7473
f.write(f"- Maximum Degree: {max(degree_dict.values()) if degree_dict else 0}\n")
7574
f.write(f"- Minimum Degree: {min(degree_dict.values()) if degree_dict else 0}\n\n")
7675

77-
# Top 10 nodes
78-
f.write("## Top 10 Nodes by Degree\n")
79-
f.write("| Rank | Node | Degree |\n|------|-------|---------|\n")
80-
for i, (node, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:10], 1):
81-
f.write(f"| {i} | {node} | {deg} |\n")
76+
# Top 15 fields by degree
77+
f.write("## Top 15 JSON Fields by Degree\n")
78+
f.write("| Rank | Field Name | Degree |\n|------|-------------|---------|\n")
79+
for i, (field, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:15], 1):
80+
f.write(f"| {i} | {field} | {deg} |\n")
8281
f.write("\n")
8382

8483
# Degree distribution
8584
f.write("## Degree Distribution\n")
86-
f.write("| Degree | Count of Nodes |\n|---------|----------------|\n")
85+
f.write("| Degree | Count of Fields |\n|---------|-----------------|\n")
8786
for degree, count in sorted(degree_counts.items()):
8887
f.write(f"| {degree} | {count} |\n")
8988

9089
print(f"✅ Markdown report saved to: {output_file}")
9190

92-
def main():
93-
url = "https://raw.githubusercontent.com/SingularityNET-Archive/SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/Meeting-Summaries/2025/meeting-summaries-array.json"
94-
output_file = "degree_analysis_report.md"
9591

96-
print("📡 Fetching data from remote source...")
92+
def main():
93+
url = (
94+
"https://raw.githubusercontent.com/SingularityNET-Archive/"
95+
"SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
96+
"Meeting-Summaries/2025/meeting-summaries-array.json"
97+
)
98+
output_file = "degree_analysis_by_field.md"
99+
100+
print("📡 Fetching JSON from remote source...")
97101
data = load_json_remote(url)
98-
print(f"✅ Downloaded {len(data)} meeting records.")
102+
print("✅ JSON file downloaded.")
99103

100-
print("🔍 Detecting participant lists recursively...")
101-
G = build_coattendance_graph(data)
104+
print("🔍 Building field co-occurrence graph...")
105+
G = build_field_graph(data)
102106

103107
if len(G.nodes) == 0:
104-
print("⚠️ No participant lists found — please check JSON structure manually.")
108+
print("⚠️ No JSON field structure detected.")
105109
return
106110

107-
print(f"📊 Built graph with {len(G.nodes)} nodes and {len(G.edges)} edges.")
111+
print(f"📊 Built graph with {len(G.nodes)} fields and {len(G.edges)} relationships.")
108112

109113
degree_dict, degree_counts = degree_analysis(G)
110114
write_markdown_report(degree_dict, degree_counts, output_file)
111115

116+
112117
if __name__ == "__main__":
113118
main()
114119

120+

degree_analysis_by_field.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# JSON Field Degree Analysis Report
2+
**Generated on:** 2025-10-14 13:24:26
3+
4+
## Summary Statistics
5+
- Total Unique Fields: 44
6+
- Maximum Degree: 11
7+
- Minimum Degree: 1
8+
9+
## Top 15 JSON Fields by Degree
10+
| Rank | Field Name | Degree |
11+
|------|-------------|---------|
12+
| 1 | host | 11 |
13+
| 2 | date | 11 |
14+
| 3 | workingDocs | 11 |
15+
| 4 | purpose | 11 |
16+
| 5 | typeOfMeeting | 11 |
17+
| 6 | documenter | 11 |
18+
| 7 | peoplePresent | 11 |
19+
| 8 | status | 11 |
20+
| 9 | meetingVideoLink | 10 |
21+
| 10 | agendaItems | 9 |
22+
| 11 | type | 9 |
23+
| 12 | workgroup_id | 9 |
24+
| 13 | tags | 9 |
25+
| 14 | workgroup | 9 |
26+
| 15 | canceledSummary | 9 |
27+
28+
## Degree Distribution
29+
| Degree | Count of Fields |
30+
|---------|-----------------|
31+
| 1 | 2 |
32+
| 2 | 2 |
33+
| 3 | 9 |
34+
| 4 | 4 |
35+
| 5 | 1 |
36+
| 7 | 2 |
37+
| 8 | 3 |
38+
| 9 | 12 |
39+
| 10 | 1 |
40+
| 11 | 8 |

degree_analysis_by_name.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Degree Analysis Report (Grouped by Node Name)
2+
**Generated on:** 2025-10-14 13:21:08
3+
4+
## Summary Statistics
5+
- Total Unique Names: 418
6+
- Maximum Degree: 18
7+
- Minimum Degree: 1
8+
9+
## Top 10 Nodes by Degree (Grouped by Name)
10+
| Rank | Node Name | Aggregated Degree |
11+
|------|------------|------------------|
12+
| 1 | reviewed gitbook, shared update on the [getting started page] (https://ambassadorss-organization.gitbook.io/knowledge-base) | 18 |
13+
| 2 | all categories are updated with new information | 18 |
14+
| 3 | takes effort to find relevant information | 18 |
15+
| 4 | we went over the progress report, line by line | 18 |
16+
| 5 | [insight](https://docs.google.com/document/d/1x1jc3op5i4dgkthxhsyzwf-msakn8lzarh5zvpxg_1a/) into how odin will test the establishing proposal this summer and how they do the minimal requirements proposal process to create collaboration environments | 18 |
17+
| 6 | we established the initial q3 monthly roadmap and base expectations of what we want to achieve in sessions. | 18 |
18+
| 7 | it's time-consuming to learn about the document context and understand its current relevance for gitbook | 18 |
19+
| 8 | [knowledge base wg q2 2025 report](https://docs.google.com/document/d/1zjfx7x31xmxr0fzk41zyerx95lifuauew50cwbfkjkw) | 18 |
20+
| 9 | both advance and tevo liked the organising process | 18 |
21+
| 10 | should we reward new feedback? | 18 |
22+
23+
## Degree Distribution
24+
| Degree | Count of Nodes |
25+
|---------|----------------|
26+
| 1 | 14 |
27+
| 2 | 30 |
28+
| 3 | 56 |
29+
| 4 | 70 |
30+
| 5 | 61 |
31+
| 6 | 48 |
32+
| 7 | 32 |
33+
| 9 | 12 |
34+
| 10 | 22 |
35+
| 11 | 13 |
36+
| 12 | 13 |
37+
| 13 | 28 |
38+
| 18 | 19 |

0 commit comments

Comments
 (0)