Skip to content

Commit 4c39ce4

Browse files
committed
Create Path Analysis script
1 parent bd07b18 commit 4c39ce4

File tree

2 files changed

+173
-0
lines changed

2 files changed

+173
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import json
2+
import requests
3+
import networkx as nx
4+
from collections import Counter
5+
from datetime import datetime
6+
import os
7+
8+
9+
def load_json_remote(url):
10+
"""Load JSON data from a remote URL."""
11+
response = requests.get(url)
12+
response.raise_for_status()
13+
return response.json()
14+
15+
16+
def extract_json_paths(obj, prefix=""):
17+
"""
18+
Recursively extract all JSON paths in dot notation.
19+
Example: {"a": {"b": 1}} -> ["a", "a.b"]
20+
"""
21+
paths = []
22+
if isinstance(obj, dict):
23+
for k, v in obj.items():
24+
path = f"{prefix}.{k}" if prefix else k
25+
paths.append(path)
26+
paths.extend(extract_json_paths(v, path))
27+
elif isinstance(obj, list):
28+
for i, item in enumerate(obj):
29+
path = f"{prefix}[{i}]"
30+
paths.append(path)
31+
paths.extend(extract_json_paths(item, path))
32+
return paths
33+
34+
35+
def build_path_graph(paths):
36+
"""Build a directed graph from JSON paths (parent → child relationships)."""
37+
G = nx.DiGraph()
38+
for path in paths:
39+
if "." in path:
40+
parent = path.rsplit(".", 1)[0]
41+
G.add_edge(parent, path)
42+
elif "[" in path:
43+
parent = path.rsplit("[", 1)[0]
44+
G.add_edge(parent, path)
45+
else:
46+
G.add_node(path)
47+
return G
48+
49+
50+
def path_analysis(paths):
51+
"""Compute path metrics and structural statistics."""
52+
depths = [p.count(".") + p.count("[") for p in paths]
53+
max_depth = max(depths) if depths else 0
54+
avg_depth = sum(depths) / len(depths) if depths else 0
55+
56+
# Find deepest paths
57+
deepest_paths = [p for p, d in zip(paths, depths) if d == max_depth]
58+
59+
# Count parent prefixes
60+
parent_counts = Counter([p.rsplit(".", 1)[0] if "." in p else p for p in paths])
61+
62+
return {
63+
"total_paths": len(paths),
64+
"max_depth": max_depth,
65+
"avg_depth": avg_depth,
66+
"deepest_paths": deepest_paths[:10],
67+
"parent_counts": parent_counts.most_common(10),
68+
}
69+
70+
71+
def write_markdown_report(analysis, output_file):
72+
"""Generate Markdown report summarizing JSON path analysis."""
73+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
74+
75+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
76+
77+
with open(output_file, "w", encoding="utf-8") as f:
78+
f.write(f"# JSON Path Analysis Report\n")
79+
f.write(f"**Generated on:** {timestamp}\n\n")
80+
81+
f.write("## Summary Statistics\n")
82+
f.write(f"- Total Unique Paths: {analysis['total_paths']}\n")
83+
f.write(f"- Maximum Depth: {analysis['max_depth']}\n")
84+
f.write(f"- Average Depth: {analysis['avg_depth']:.2f}\n\n")
85+
86+
f.write("## Deepest JSON Paths\n")
87+
for p in analysis["deepest_paths"]:
88+
f.write(f"- `{p}`\n")
89+
f.write("\n")
90+
91+
f.write("## Most Common Parent Paths\n")
92+
f.write("| Rank | Parent Path | Count |\n|------|--------------|--------|\n")
93+
for i, (parent, count) in enumerate(analysis["parent_counts"], 1):
94+
f.write(f"| {i} | `{parent}` | {count} |\n")
95+
f.write("\n")
96+
97+
f.write("## Interpretation\n")
98+
f.write(
99+
"This report analyzes the structural complexity of the JSON file. "
100+
"Each path represents a unique traversal route through nested keys and arrays. "
101+
"The **maximum depth** indicates how deeply nested certain fields are, "
102+
"while the **most common parent paths** reveal recurring structural patterns.\n"
103+
)
104+
105+
print(f"✅ Path analysis report saved to: {output_file}")
106+
107+
108+
def main():
109+
url = (
110+
"https://raw.githubusercontent.com/SingularityNET-Archive/"
111+
"SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
112+
"Meeting-Summaries/2025/meeting-summaries-array.json"
113+
)
114+
output_dir = "reports"
115+
os.makedirs(output_dir, exist_ok=True)
116+
output_file = os.path.join(output_dir, "path_analysis_report.md")
117+
118+
print("📡 Fetching JSON data from remote source...")
119+
data = load_json_remote(url)
120+
print("✅ JSON file downloaded.")
121+
122+
print("🔍 Extracting all JSON paths...")
123+
all_paths = extract_json_paths(data)
124+
print(f"📊 Extracted {len(all_paths)} unique paths.")
125+
126+
print("🔧 Performing path analysis...")
127+
analysis = path_analysis(all_paths)
128+
129+
print("🧩 Building path graph...")
130+
G = build_path_graph(all_paths)
131+
print(f"✅ Graph built with {len(G.nodes)} nodes and {len(G.edges)} edges.")
132+
133+
write_markdown_report(analysis, output_file)
134+
135+
136+
if __name__ == "__main__":
137+
main()

reports/path_analysis_report.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# JSON Path Analysis Report
2+
**Generated on:** 2025-10-15 08:22:28
3+
4+
## Summary Statistics
5+
- Total Unique Paths: 6832
6+
- Maximum Depth: 6
7+
- Average Depth: 4.20
8+
9+
## Deepest JSON Paths
10+
- `[0].agendaItems[0].actionItems[0].text`
11+
- `[0].agendaItems[0].actionItems[0].assignee`
12+
- `[0].agendaItems[0].actionItems[0].dueDate`
13+
- `[0].agendaItems[0].actionItems[0].status`
14+
- `[0].agendaItems[0].decisionItems[0].decision`
15+
- `[0].agendaItems[0].decisionItems[0].effect`
16+
- `[0].agendaItems[0].decisionItems[1].decision`
17+
- `[0].agendaItems[0].decisionItems[1].rationale`
18+
- `[0].agendaItems[0].decisionItems[1].effect`
19+
- `[0].agendaItems[0].decisionItems[2].decision`
20+
21+
## Most Common Parent Paths
22+
| Rank | Parent Path | Count |
23+
|------|--------------|--------|
24+
| 1 | `[12].agendaItems[0]` | 26 |
25+
| 2 | `[2].agendaItems[0]` | 21 |
26+
| 3 | `[10].agendaItems[0]` | 21 |
27+
| 4 | `[7].agendaItems[0]` | 19 |
28+
| 5 | `[17].agendaItems[0]` | 19 |
29+
| 6 | `[22].meetingInfo` | 19 |
30+
| 7 | `[23].meetingInfo` | 19 |
31+
| 8 | `[101].agendaItems[0]` | 19 |
32+
| 9 | `[11].agendaItems[0]` | 18 |
33+
| 10 | `[37].agendaItems[0]` | 18 |
34+
35+
## Interpretation
36+
This report analyzes the structural complexity of the JSON file. Each path represents a unique traversal route through nested keys and arrays. The **maximum depth** indicates how deeply nested certain fields are, while the **most common parent paths** reveal recurring structural patterns.

0 commit comments

Comments
 (0)