Skip to content

Commit 899c9bd

Browse files
committed
Altered Degree Analysis script
1 parent 7faab6b commit 899c9bd

File tree

2 files changed

+80
-84
lines changed

2 files changed

+80
-84
lines changed

Graph Analysis/Degree_Analysis/degree_analysis_to_md.py

Lines changed: 25 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
import requests
33
import networkx as nx
44
from itertools import combinations
5-
from collections import Counter
65
from datetime import datetime
7-
import statistics
86

97

108
def load_json_remote(url):
@@ -33,7 +31,7 @@ def find_field_combinations(obj):
3331

3432

3533
def build_field_graph(data):
36-
"""Build a graph where nodes = field names and edges = co-occurrence in same object."""
34+
"""Build a graph where nodes = field names and edges = co-occurrence in the same object."""
3735
G = nx.Graph()
3836
cooccurrence_sets = find_field_combinations(data)
3937

@@ -49,85 +47,31 @@ def build_field_graph(data):
4947

5048

5149
def degree_analysis(G):
52-
"""Compute degree metrics for the graph."""
53-
degree_dict = dict(G.degree())
54-
degree_counts = Counter(degree_dict.values())
55-
return degree_dict, degree_counts
56-
57-
58-
def interpret_degree_results(degree_dict):
59-
"""Generate a narrative interpretation of the degree analysis."""
60-
if not degree_dict:
61-
return "No fields found to analyze."
62-
63-
degrees = list(degree_dict.values())
64-
avg_deg = statistics.mean(degrees)
65-
max_deg = max(degrees)
66-
min_deg = min(degrees)
67-
68-
sorted_fields = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)
69-
n = len(sorted_fields)
70-
top_20 = sorted_fields[: max(1, n // 5)]
71-
bottom_20 = sorted_fields[-max(1, n // 5) :]
72-
73-
core_fields = [f"{name} ({deg})" for name, deg in top_20]
74-
peripheral_fields = [f"{name} ({deg})" for name, deg in bottom_20]
75-
76-
text = [
77-
"## Interpretation of Degree Results\n",
78-
"The **degree** of a field represents how many other fields it co-occurs with "
79-
"across the dataset. Fields with high degree are more central — they tend to appear "
80-
"together with many other fields, indicating they may be *core schema components*. "
81-
"Fields with low degree are more isolated or specialized.\n",
82-
f"- **Average degree:** {avg_deg:.2f}\n"
83-
f"- **Maximum degree:** {max_deg}\n"
84-
f"- **Minimum degree:** {min_deg}\n\n",
85-
"### Core (Highly Connected) Fields\n",
86-
", ".join(core_fields) + "\n\n",
87-
"### Peripheral (Low-Connectivity) Fields\n",
88-
", ".join(peripheral_fields) + "\n\n",
89-
"_Interpretation:_\n",
90-
"The core fields likely represent the fundamental metadata elements that occur in nearly every record "
91-
"(e.g., identifiers, titles, timestamps). The peripheral fields may represent optional or contextual data "
92-
"used only in specific cases or submodules of the schema."
93-
]
94-
95-
return "\n".join(text)
96-
97-
98-
def write_markdown_report(degree_dict, degree_counts, output_file):
99-
"""Write results to a Markdown file, including interpretation."""
50+
"""Compute degree of each field in the graph."""
51+
return dict(G.degree())
52+
53+
54+
def write_markdown_report(degree_dict, output_file):
55+
"""Write the degree results to a Markdown report."""
10056
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
57+
10158
with open(output_file, "w", encoding="utf-8") as f:
102-
f.write(f"# JSON Field Degree Analysis Report\n")
59+
f.write(f"# JSON Field Degree Report\n")
10360
f.write(f"**Generated on:** {timestamp}\n\n")
10461

105-
# Summary
106-
f.write("## Summary Statistics\n")
107-
f.write(f"- Total Unique Fields: {len(degree_dict)}\n")
108-
f.write(f"- Maximum Degree: {max(degree_dict.values()) if degree_dict else 0}\n")
109-
f.write(f"- Minimum Degree: {min(degree_dict.values()) if degree_dict else 0}\n\n")
110-
111-
# Top fields
112-
f.write("## Top 15 JSON Fields by Degree\n")
113-
f.write("| Rank | Field Name | Degree |\n|------|-------------|---------|\n")
114-
for i, (field, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:15], 1):
115-
f.write(f"| {i} | {field} | {deg} |\n")
116-
f.write("\n")
62+
f.write(f"## Summary\n")
63+
f.write(f"- Total unique fields: {len(degree_dict)}\n")
64+
f.write(f"- Maximum degree: {max(degree_dict.values()) if degree_dict else 0}\n")
65+
f.write(f"- Minimum degree: {min(degree_dict.values()) if degree_dict else 0}\n\n")
11766

118-
# Degree distribution
119-
f.write("## Degree Distribution\n")
120-
f.write("| Degree | Count of Fields |\n|---------|-----------------|\n")
121-
for degree, count in sorted(degree_counts.items()):
122-
f.write(f"| {degree} | {count} |\n")
123-
f.write("\n")
67+
f.write("## Field Degrees (sorted by degree)\n")
68+
f.write("| Rank | Field Name | Degree |\n")
69+
f.write("|------|-------------|---------|\n")
12470

125-
# Interpretation
126-
interpretation = interpret_degree_results(degree_dict)
127-
f.write(interpretation)
128-
f.write("\n")
71+
for i, (field, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True), start=1):
72+
f.write(f"| {i} | {field} | {deg} |\n")
12973

130-
print(f"✅ Markdown report with interpretation saved to: {output_file}")
74+
print(f"✅ Markdown report saved to: {output_file}")
13175

13276

13377
def main():
@@ -136,27 +80,24 @@ def main():
13680
"SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
13781
"Meeting-Summaries/2025/meeting-summaries-array.json"
13882
)
139-
output_file = "degree_analysis_by_field_with_interpretation.md"
83+
output_file = "json_field_degree_report.md"
14084

14185
print("📡 Fetching JSON from remote source...")
14286
data = load_json_remote(url)
14387
print("✅ JSON file downloaded.")
14488

14589
print("🔍 Building field co-occurrence graph...")
14690
G = build_field_graph(data)
91+
print(f"📊 Built graph with {len(G.nodes)} fields and {len(G.edges)} relationships.\n")
14792

148-
if len(G.nodes) == 0:
149-
print("⚠️ No JSON field structure detected.")
150-
return
151-
152-
print(f"📊 Built graph with {len(G.nodes)} fields and {len(G.edges)} relationships.")
153-
154-
degree_dict, degree_counts = degree_analysis(G)
155-
write_markdown_report(degree_dict, degree_counts, output_file)
93+
degree_dict = degree_analysis(G)
94+
write_markdown_report(degree_dict, output_file)
15695

15796

15897
if __name__ == "__main__":
15998
main()
16099

161100

162101

102+
103+
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# JSON Field Degree Report
2+
**Generated on:** 2025-10-15 09:38:46
3+
4+
## Summary
5+
- Total unique fields: 44
6+
- Maximum degree: 11
7+
- Minimum degree: 1
8+
9+
## Field Degrees (sorted by degree)
10+
| Rank | Field Name | Degree |
11+
|------|-------------|---------|
12+
| 1 | host | 11 |
13+
| 2 | purpose | 11 |
14+
| 3 | typeOfMeeting | 11 |
15+
| 4 | documenter | 11 |
16+
| 5 | date | 11 |
17+
| 6 | peoplePresent | 11 |
18+
| 7 | workingDocs | 11 |
19+
| 8 | status | 11 |
20+
| 9 | meetingVideoLink | 10 |
21+
| 10 | agendaItems | 9 |
22+
| 11 | workgroup_id | 9 |
23+
| 12 | meetingInfo | 9 |
24+
| 13 | canceledSummary | 9 |
25+
| 14 | tags | 9 |
26+
| 15 | type | 9 |
27+
| 16 | workgroup | 9 |
28+
| 17 | noSummaryGiven | 9 |
29+
| 18 | canceledSummaryText | 9 |
30+
| 19 | noSummaryGivenText | 9 |
31+
| 20 | otherMediaLink | 9 |
32+
| 21 | miroBoardLink | 9 |
33+
| 22 | timestampedVideo | 8 |
34+
| 23 | decisionItems | 8 |
35+
| 24 | actionItems | 8 |
36+
| 25 | discussionPoints | 7 |
37+
| 26 | mediaLink | 7 |
38+
| 27 | meetingTopics | 5 |
39+
| 28 | narrative | 4 |
40+
| 29 | discussion | 4 |
41+
| 30 | agenda | 4 |
42+
| 31 | gameRules | 4 |
43+
| 32 | assignee | 3 |
44+
| 33 | text | 3 |
45+
| 34 | dueDate | 3 |
46+
| 35 | effect | 3 |
47+
| 36 | decision | 3 |
48+
| 37 | rationale | 3 |
49+
| 38 | topicsCovered | 3 |
50+
| 39 | emotions | 3 |
51+
| 40 | opposing | 3 |
52+
| 41 | other | 2 |
53+
| 42 | gamesPlayed | 2 |
54+
| 43 | title | 1 |
55+
| 44 | link | 1 |

0 commit comments

Comments
 (0)