22import requests
33import networkx as nx
44from itertools import combinations
5- from collections import Counter
65from datetime import datetime
7- import statistics
86
97
108def load_json_remote (url ):
@@ -33,7 +31,7 @@ def find_field_combinations(obj):
3331
3432
3533def build_field_graph (data ):
36- """Build a graph where nodes = field names and edges = co-occurrence in same object."""
34+ """Build a graph where nodes = field names and edges = co-occurrence in the same object."""
3735 G = nx .Graph ()
3836 cooccurrence_sets = find_field_combinations (data )
3937
@@ -49,85 +47,31 @@ def build_field_graph(data):
4947
5048
5149def degree_analysis (G ):
52- """Compute degree metrics for the graph."""
53- degree_dict = dict (G .degree ())
54- degree_counts = Counter (degree_dict .values ())
55- return degree_dict , degree_counts
56-
57-
58- def interpret_degree_results (degree_dict ):
59- """Generate a narrative interpretation of the degree analysis."""
60- if not degree_dict :
61- return "No fields found to analyze."
62-
63- degrees = list (degree_dict .values ())
64- avg_deg = statistics .mean (degrees )
65- max_deg = max (degrees )
66- min_deg = min (degrees )
67-
68- sorted_fields = sorted (degree_dict .items (), key = lambda x : x [1 ], reverse = True )
69- n = len (sorted_fields )
70- top_20 = sorted_fields [: max (1 , n // 5 )]
71- bottom_20 = sorted_fields [- max (1 , n // 5 ) :]
72-
73- core_fields = [f"{ name } ({ deg } )" for name , deg in top_20 ]
74- peripheral_fields = [f"{ name } ({ deg } )" for name , deg in bottom_20 ]
75-
76- text = [
77- "## Interpretation of Degree Results\n " ,
78- "The **degree** of a field represents how many other fields it co-occurs with "
79- "across the dataset. Fields with high degree are more central — they tend to appear "
80- "together with many other fields, indicating they may be *core schema components*. "
81- "Fields with low degree are more isolated or specialized.\n " ,
82- f"- **Average degree:** { avg_deg :.2f} \n "
83- f"- **Maximum degree:** { max_deg } \n "
84- f"- **Minimum degree:** { min_deg } \n \n " ,
85- "### Core (Highly Connected) Fields\n " ,
86- ", " .join (core_fields ) + "\n \n " ,
87- "### Peripheral (Low-Connectivity) Fields\n " ,
88- ", " .join (peripheral_fields ) + "\n \n " ,
89- "_Interpretation:_\n " ,
90- "The core fields likely represent the fundamental metadata elements that occur in nearly every record "
91- "(e.g., identifiers, titles, timestamps). The peripheral fields may represent optional or contextual data "
92- "used only in specific cases or submodules of the schema."
93- ]
94-
95- return "\n " .join (text )
96-
97-
98- def write_markdown_report (degree_dict , degree_counts , output_file ):
99- """Write results to a Markdown file, including interpretation."""
50+ """Compute degree of each field in the graph."""
51+ return dict (G .degree ())
52+
53+
54+ def write_markdown_report (degree_dict , output_file ):
55+ """Write the degree results to a Markdown report."""
10056 timestamp = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" )
57+
10158 with open (output_file , "w" , encoding = "utf-8" ) as f :
102- f .write (f"# JSON Field Degree Analysis Report\n " )
59+ f .write (f"# JSON Field Degree Report\n " )
10360 f .write (f"**Generated on:** { timestamp } \n \n " )
10461
105- # Summary
106- f .write ("## Summary Statistics\n " )
107- f .write (f"- Total Unique Fields: { len (degree_dict )} \n " )
108- f .write (f"- Maximum Degree: { max (degree_dict .values ()) if degree_dict else 0 } \n " )
109- f .write (f"- Minimum Degree: { min (degree_dict .values ()) if degree_dict else 0 } \n \n " )
110-
111- # Top fields
112- f .write ("## Top 15 JSON Fields by Degree\n " )
113- f .write ("| Rank | Field Name | Degree |\n |------|-------------|---------|\n " )
114- for i , (field , deg ) in enumerate (sorted (degree_dict .items (), key = lambda x : x [1 ], reverse = True )[:15 ], 1 ):
115- f .write (f"| { i } | { field } | { deg } |\n " )
116- f .write ("\n " )
62+ f .write (f"## Summary\n " )
63+ f .write (f"- Total unique fields: { len (degree_dict )} \n " )
64+ f .write (f"- Maximum degree: { max (degree_dict .values ()) if degree_dict else 0 } \n " )
65+ f .write (f"- Minimum degree: { min (degree_dict .values ()) if degree_dict else 0 } \n \n " )
11766
118- # Degree distribution
119- f .write ("## Degree Distribution\n " )
120- f .write ("| Degree | Count of Fields |\n |---------|-----------------|\n " )
121- for degree , count in sorted (degree_counts .items ()):
122- f .write (f"| { degree } | { count } |\n " )
123- f .write ("\n " )
67+ f .write ("## Field Degrees (sorted by degree)\n " )
68+ f .write ("| Rank | Field Name | Degree |\n " )
69+ f .write ("|------|-------------|---------|\n " )
12470
125- # Interpretation
126- interpretation = interpret_degree_results (degree_dict )
127- f .write (interpretation )
128- f .write ("\n " )
71+ for i , (field , deg ) in enumerate (sorted (degree_dict .items (), key = lambda x : x [1 ], reverse = True ), start = 1 ):
72+ f .write (f"| { i } | { field } | { deg } |\n " )
12973
130- print (f"✅ Markdown report with interpretation saved to: { output_file } " )
74+ print (f"✅ Markdown report saved to: { output_file } " )
13175
13276
13377def main ():
@@ -136,27 +80,24 @@ def main():
13680 "SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
13781 "Meeting-Summaries/2025/meeting-summaries-array.json"
13882 )
139- output_file = "degree_analysis_by_field_with_interpretation .md"
83+ output_file = "json_field_degree_report .md"
14084
14185 print ("📡 Fetching JSON from remote source..." )
14286 data = load_json_remote (url )
14387 print ("✅ JSON file downloaded." )
14488
14589 print ("🔍 Building field co-occurrence graph..." )
14690 G = build_field_graph (data )
91+ print (f"📊 Built graph with { len (G .nodes )} fields and { len (G .edges )} relationships.\n " )
14792
148- if len (G .nodes ) == 0 :
149- print ("⚠️ No JSON field structure detected." )
150- return
151-
152- print (f"📊 Built graph with { len (G .nodes )} fields and { len (G .edges )} relationships." )
153-
154- degree_dict , degree_counts = degree_analysis (G )
155- write_markdown_report (degree_dict , degree_counts , output_file )
93+ degree_dict = degree_analysis (G )
94+ write_markdown_report (degree_dict , output_file )
15695
15796
15897if __name__ == "__main__" :
15998 main ()
16099
161100
162101
102+
103+
0 commit comments