Skip to content

Commit 17432ae

Browse files
committed
Add interpretation
1 parent 671c50f commit 17432ae

File tree

4 files changed

+119
-14
lines changed

4 files changed

+119
-14
lines changed
File renamed without changes.
File renamed without changes.

Graph Analysis/degree_analysis_to_md.py

Lines changed: 56 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from itertools import combinations
55
from collections import Counter
66
from datetime import datetime
7+
import statistics
78

89

910
def load_json_remote(url):
@@ -14,17 +15,13 @@ def load_json_remote(url):
1415

1516

1617
def find_field_combinations(obj):
17-
"""
18-
Recursively extract all field (key) names and record their co-occurrence within objects.
19-
Returns a list of sets — each set contains field names that appear together.
20-
"""
18+
"""Recursively extract co-occurring field names (keys)."""
2119
cooccurrences = []
2220

2321
if isinstance(obj, dict):
2422
keys = set(obj.keys())
2523
if len(keys) > 1:
2624
cooccurrences.append(keys)
27-
2825
for value in obj.values():
2926
cooccurrences.extend(find_field_combinations(value))
3027

@@ -35,11 +32,10 @@ def find_field_combinations(obj):
3532
return cooccurrences
3633

3734

38-
def build_field_graph(meetings):
39-
"""Build a co-occurrence graph where nodes are JSON field names."""
35+
def build_field_graph(data):
36+
"""Build a graph where nodes = field names and edges = co-occurrence in same object."""
4037
G = nx.Graph()
41-
42-
cooccurrence_sets = find_field_combinations(meetings)
38+
cooccurrence_sets = find_field_combinations(data)
4339

4440
for field_set in cooccurrence_sets:
4541
for field in field_set:
@@ -49,7 +45,6 @@ def build_field_graph(meetings):
4945
G[u][v]["weight"] += 1
5046
else:
5147
G.add_edge(u, v, weight=1)
52-
5348
return G
5449

5550

@@ -60,8 +55,48 @@ def degree_analysis(G):
6055
return degree_dict, degree_counts
6156

6257

58+
def interpret_degree_results(degree_dict):
59+
"""Generate a narrative interpretation of the degree analysis."""
60+
if not degree_dict:
61+
return "No fields found to analyze."
62+
63+
degrees = list(degree_dict.values())
64+
avg_deg = statistics.mean(degrees)
65+
max_deg = max(degrees)
66+
min_deg = min(degrees)
67+
68+
sorted_fields = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)
69+
n = len(sorted_fields)
70+
top_20 = sorted_fields[: max(1, n // 5)]
71+
bottom_20 = sorted_fields[-max(1, n // 5) :]
72+
73+
core_fields = [f"{name} ({deg})" for name, deg in top_20]
74+
peripheral_fields = [f"{name} ({deg})" for name, deg in bottom_20]
75+
76+
text = [
77+
"## Interpretation of Degree Results\n",
78+
"The **degree** of a field represents how many other fields it co-occurs with "
79+
"across the dataset. Fields with high degree are more central — they tend to appear "
80+
"together with many other fields, indicating they may be *core schema components*. "
81+
"Fields with low degree are more isolated or specialized.\n",
82+
f"- **Average degree:** {avg_deg:.2f}\n"
83+
f"- **Maximum degree:** {max_deg}\n"
84+
f"- **Minimum degree:** {min_deg}\n\n",
85+
"### Core (Highly Connected) Fields\n",
86+
", ".join(core_fields) + "\n\n",
87+
"### Peripheral (Low-Connectivity) Fields\n",
88+
", ".join(peripheral_fields) + "\n\n",
89+
"_Interpretation:_\n",
90+
"The core fields likely represent the fundamental metadata elements that occur in nearly every record "
91+
"(e.g., identifiers, titles, timestamps). The peripheral fields may represent optional or contextual data "
92+
"used only in specific cases or submodules of the schema."
93+
]
94+
95+
return "\n".join(text)
96+
97+
6398
def write_markdown_report(degree_dict, degree_counts, output_file):
64-
"""Write results to a Markdown file."""
99+
"""Write results to a Markdown file, including interpretation."""
65100
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
66101
with open(output_file, "w", encoding="utf-8") as f:
67102
f.write(f"# JSON Field Degree Analysis Report\n")
@@ -73,7 +108,7 @@ def write_markdown_report(degree_dict, degree_counts, output_file):
73108
f.write(f"- Maximum Degree: {max(degree_dict.values()) if degree_dict else 0}\n")
74109
f.write(f"- Minimum Degree: {min(degree_dict.values()) if degree_dict else 0}\n\n")
75110

76-
# Top 15 fields by degree
111+
# Top fields
77112
f.write("## Top 15 JSON Fields by Degree\n")
78113
f.write("| Rank | Field Name | Degree |\n|------|-------------|---------|\n")
79114
for i, (field, deg) in enumerate(sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:15], 1):
@@ -85,8 +120,14 @@ def write_markdown_report(degree_dict, degree_counts, output_file):
85120
f.write("| Degree | Count of Fields |\n|---------|-----------------|\n")
86121
for degree, count in sorted(degree_counts.items()):
87122
f.write(f"| {degree} | {count} |\n")
123+
f.write("\n")
124+
125+
# Interpretation
126+
interpretation = interpret_degree_results(degree_dict)
127+
f.write(interpretation)
128+
f.write("\n")
88129

89-
print(f"✅ Markdown report saved to: {output_file}")
130+
print(f"✅ Markdown report with interpretation saved to: {output_file}")
90131

91132

92133
def main():
@@ -95,7 +136,7 @@ def main():
95136
"SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
96137
"Meeting-Summaries/2025/meeting-summaries-array.json"
97138
)
98-
output_file = "degree_analysis_by_field.md"
139+
output_file = "degree_analysis_by_field_with_interpretation.md"
99140

100141
print("📡 Fetching JSON from remote source...")
101142
data = load_json_remote(url)
@@ -118,3 +159,4 @@ def main():
118159
main()
119160

120161

162+
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# JSON Field Degree Analysis Report
2+
**Generated on:** 2025-10-14 13:29:12
3+
4+
## Summary Statistics
5+
- Total Unique Fields: 44
6+
- Maximum Degree: 11
7+
- Minimum Degree: 1
8+
9+
## Top 15 JSON Fields by Degree
10+
| Rank | Field Name | Degree |
11+
|------|-------------|---------|
12+
| 1 | typeOfMeeting | 11 |
13+
| 2 | host | 11 |
14+
| 3 | peoplePresent | 11 |
15+
| 4 | date | 11 |
16+
| 5 | purpose | 11 |
17+
| 6 | workingDocs | 11 |
18+
| 7 | documenter | 11 |
19+
| 8 | status | 11 |
20+
| 9 | meetingVideoLink | 10 |
21+
| 10 | workgroup | 9 |
22+
| 11 | workgroup_id | 9 |
23+
| 12 | meetingInfo | 9 |
24+
| 13 | noSummaryGiven | 9 |
25+
| 14 | type | 9 |
26+
| 15 | canceledSummary | 9 |
27+
28+
## Degree Distribution
29+
| Degree | Count of Fields |
30+
|---------|-----------------|
31+
| 1 | 2 |
32+
| 2 | 2 |
33+
| 3 | 9 |
34+
| 4 | 4 |
35+
| 5 | 1 |
36+
| 7 | 2 |
37+
| 8 | 3 |
38+
| 9 | 12 |
39+
| 10 | 1 |
40+
| 11 | 8 |
41+
42+
## Interpretation of Degree Results
43+
44+
The **degree** of a field represents how many other fields it co-occurs with across the dataset. Fields with high degree are more central — they tend to appear together with many other fields, indicating they may be *core schema components*. Fields with low degree are more isolated or specialized.
45+
46+
- **Average degree:** 6.77
47+
- **Maximum degree:** 11
48+
- **Minimum degree:** 1
49+
50+
51+
### Core (Highly Connected) Fields
52+
53+
typeOfMeeting (11), host (11), peoplePresent (11), date (11), purpose (11), workingDocs (11), documenter (11), status (11)
54+
55+
56+
### Peripheral (Low-Connectivity) Fields
57+
58+
rationale (3), topicsCovered (3), emotions (3), opposing (3), other (2), gamesPlayed (2), link (1), title (1)
59+
60+
61+
_Interpretation:_
62+
63+
The core fields likely represent the fundamental metadata elements that occur in nearly every record (e.g., identifiers, titles, timestamps). The peripheral fields may represent optional or contextual data used only in specific cases or submodules of the schema.

0 commit comments

Comments
 (0)