Skip to content

Commit 7faab6b

Browse files
committed
Create unified report
1 parent ac6e3b6 commit 7faab6b

File tree

7 files changed

+785
-0
lines changed

7 files changed

+785
-0
lines changed

Graph Analysis/unified_analysis.py

Lines changed: 372 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,372 @@
1+
import argparse
2+
import json
3+
import os
4+
from datetime import datetime
5+
from itertools import combinations
6+
from typing import Any, Dict, Iterable, List, Tuple
7+
8+
import networkx as nx
9+
import requests
10+
from collections import Counter
11+
12+
13+
DEFAULT_INPUT = (
14+
"https://raw.githubusercontent.com/SingularityNET-Archive/"
15+
"SingularityNET-Archive/refs/heads/main/Data/Snet-Ambassador-Program/"
16+
"Meeting-Summaries/2025/meeting-summaries-array.json"
17+
)
18+
19+
20+
def is_url(source: str) -> bool:
21+
return source.startswith("http://") or source.startswith("https://")
22+
23+
24+
def load_json(source: str) -> Any:
25+
if is_url(source):
26+
resp = requests.get(source)
27+
resp.raise_for_status()
28+
return resp.json()
29+
with open(source, "r", encoding="utf-8") as f:
30+
return json.load(f)
31+
32+
33+
# ---------------- Utility ----------------
34+
35+
def _truncate_label(text: str, max_len: int = 80) -> str:
36+
if text is None:
37+
return ""
38+
safe = str(text).replace("\n", " ").strip()
39+
return safe if len(safe) <= max_len else (safe[: max_len - 1] + "…")
40+
41+
42+
# ---------------- Degree (Co-attendance) ----------------
43+
44+
def extract_participants(record: Dict[str, Any]) -> List[str]:
45+
"""Extract likely participants from a meeting record.
46+
- peoplePresent: comma-separated string under meetingInfo
47+
- host, documenter: added if present (deduped)
48+
"""
49+
participants: List[str] = []
50+
meeting_info = {}
51+
if isinstance(record, dict):
52+
meeting_info = record.get("meetingInfo", {}) or {}
53+
# peoplePresent as comma-separated string
54+
pp = meeting_info.get("peoplePresent", "")
55+
if isinstance(pp, str) and pp.strip():
56+
participants.extend([p.strip() for p in pp.split(",") if p.strip()])
57+
# host/documenter as single names
58+
for key in ("host", "documenter"):
59+
val = meeting_info.get(key)
60+
if isinstance(val, str) and val.strip():
61+
participants.append(val.strip())
62+
# dedupe while preserving order
63+
seen = set()
64+
deduped: List[str] = []
65+
for p in participants:
66+
if p not in seen:
67+
seen.add(p)
68+
deduped.append(p)
69+
return deduped
70+
71+
72+
def build_coattendance_graph(records: Iterable[Any]) -> nx.Graph:
73+
G = nx.Graph()
74+
for rec in records:
75+
participants = extract_participants(rec)
76+
if len(participants) < 2:
77+
continue
78+
for p in participants:
79+
G.add_node(p)
80+
for u, v in combinations(participants, 2):
81+
if G.has_edge(u, v):
82+
G[u][v]["weight"] += 1
83+
else:
84+
G.add_edge(u, v, weight=1)
85+
return G
86+
87+
88+
def degree_analysis(G: nx.Graph) -> Tuple[Dict[str, int], Counter]:
89+
degree_dict = dict(G.degree())
90+
degree_counts = Counter(degree_dict.values())
91+
return degree_dict, degree_counts
92+
93+
94+
# ---------------- JSON Path Structure ----------------
95+
96+
def extract_json_paths(obj: Any, prefix: str = "") -> List[str]:
97+
paths: List[str] = []
98+
if isinstance(obj, dict):
99+
for k, v in obj.items():
100+
path = f"{prefix}.{k}" if prefix else k
101+
paths.append(path)
102+
paths.extend(extract_json_paths(v, path))
103+
elif isinstance(obj, list):
104+
for i, item in enumerate(obj):
105+
path = f"{prefix}[{i}]"
106+
paths.append(path)
107+
paths.extend(extract_json_paths(item, path))
108+
return paths
109+
110+
111+
def path_metrics(paths: List[str]) -> Dict[str, Any]:
112+
depths = [p.count(".") + p.count("[") for p in paths]
113+
max_depth = max(depths) if depths else 0
114+
avg_depth = (sum(depths) / len(depths)) if depths else 0.0
115+
deepest_paths = [p for p, d in zip(paths, depths) if d == max_depth]
116+
parent_counts = Counter([p.rsplit(".", 1)[0] if "." in p else p for p in paths])
117+
return {
118+
"total_paths": len(paths),
119+
"max_depth": max_depth,
120+
"avg_depth": avg_depth,
121+
"deepest_paths": deepest_paths,
122+
"parent_counts": parent_counts,
123+
}
124+
125+
126+
def build_path_graph(paths: List[str]) -> nx.DiGraph:
127+
G = nx.DiGraph()
128+
for path in paths:
129+
if "." in path:
130+
parent = path.rsplit(".", 1)[0]
131+
G.add_edge(parent, path)
132+
elif "[" in path:
133+
parent = path.rsplit("[", 1)[0]
134+
G.add_edge(parent, path)
135+
else:
136+
G.add_node(path)
137+
return G
138+
139+
140+
# ---------------- Field Co-occurrence (Centrality, Clustering, Components) ----------------
141+
142+
def find_field_combinations(obj: Any) -> List[set]:
143+
results: List[set] = []
144+
if isinstance(obj, dict):
145+
keys = set(obj.keys())
146+
if len(keys) > 1:
147+
results.append(keys)
148+
for v in obj.values():
149+
results.extend(find_field_combinations(v))
150+
elif isinstance(obj, list):
151+
for item in obj:
152+
results.extend(find_field_combinations(item))
153+
return results
154+
155+
156+
def build_field_graph(data: Any) -> nx.Graph:
157+
G = nx.Graph()
158+
sets = find_field_combinations(data)
159+
for s in sets:
160+
for k in s:
161+
G.add_node(k)
162+
for u, v in combinations(s, 2):
163+
if G.has_edge(u, v):
164+
G[u][v]["weight"] += 1
165+
else:
166+
G.add_edge(u, v, weight=1)
167+
return G
168+
169+
170+
def compute_centrality_measures(G: nx.Graph) -> Dict[str, Dict[str, float]]:
171+
degree = nx.degree_centrality(G) if G.number_of_nodes() else {}
172+
betweenness = nx.betweenness_centrality(G) if G.number_of_nodes() else {}
173+
closeness = nx.closeness_centrality(G) if G.number_of_nodes() else {}
174+
try:
175+
eigenvector = nx.eigenvector_centrality(G, max_iter=1000) if G.number_of_nodes() else {}
176+
except nx.PowerIterationFailedConvergence:
177+
eigenvector = {n: 0.0 for n in G.nodes()}
178+
return {
179+
"degree": degree,
180+
"betweenness": betweenness,
181+
"closeness": closeness,
182+
"eigenvector": eigenvector,
183+
}
184+
185+
186+
def clustering_metrics(G: nx.Graph, top: int) -> Tuple[float, List[Tuple[str, float]]]:
187+
if G.number_of_nodes() == 0:
188+
return 0.0, []
189+
avg = nx.average_clustering(G)
190+
per_node = nx.clustering(G)
191+
top_nodes = sorted(per_node.items(), key=lambda x: x[1], reverse=True)[:top]
192+
return avg, top_nodes
193+
194+
195+
def connected_components_info(G: nx.Graph, top: int) -> Dict[str, Any]:
196+
if G.number_of_nodes() == 0:
197+
return {"component_count": 0, "component_sizes": [], "largest_component_sample": []}
198+
components = sorted(nx.connected_components(G), key=len, reverse=True)
199+
sizes = [len(c) for c in components]
200+
largest = list(components[0]) if components else []
201+
sample = largest[:top]
202+
return {"component_count": len(components), "component_sizes": sizes, "largest_component_sample": sample}
203+
204+
205+
# ---------------- Report Writer ----------------
206+
207+
def write_report(
208+
output_file: str,
209+
summary: Dict[str, Any],
210+
degree: Tuple[Dict[str, int], Counter],
211+
degree_top: List[Tuple[str, int]],
212+
degree_dist: List[Tuple[int, int]],
213+
path_info: Dict[str, Any],
214+
parent_top: List[Tuple[str, int]],
215+
centrality: Dict[str, Dict[str, float]],
216+
clustering: Tuple[float, List[Tuple[str, float]]],
217+
components: Dict[str, Any],
218+
) -> None:
219+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
220+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
221+
with open(output_file, "w", encoding="utf-8") as f:
222+
f.write("# Unified Graph Analysis Report\n")
223+
f.write(f"**Generated on:** {timestamp}\n\n")
224+
225+
# Summary
226+
f.write("## Summary\n")
227+
for k, v in summary.items():
228+
f.write(f"- {k}: {v}\n")
229+
f.write("\n")
230+
231+
# Degree Analysis
232+
f.write("## Degree (Co-attendance) Analysis\n")
233+
f.write("### Top Nodes by Degree\n")
234+
f.write("| Rank | Node | Degree |\n|------|------|--------|\n")
235+
for i, (node, deg) in enumerate(degree_top, 1):
236+
label = _truncate_label(node, 80)
237+
f.write(f"| {i} | {label} | {deg} |\n")
238+
f.write("\n")
239+
f.write("### Degree Distribution\n")
240+
f.write("| Degree | Count of Nodes |\n|--------|-----------------|\n")
241+
for d, c in degree_dist:
242+
f.write(f"| {d} | {c} |\n")
243+
f.write("\n")
244+
245+
# Path Analysis
246+
f.write("## JSON Path Structure Analysis\n")
247+
f.write(f"- Total Unique Paths: {path_info['total_paths']}\n")
248+
f.write(f"- Maximum Depth: {path_info['max_depth']}\n")
249+
f.write(f"- Average Depth: {path_info['avg_depth']:.2f}\n\n")
250+
f.write("### Deepest JSON Paths (sample)\n")
251+
for p in path_info["deepest_paths"][:10]:
252+
f.write(f"- `{p}`\n")
253+
f.write("\n")
254+
f.write("### Most Common Parent Paths\n")
255+
f.write("| Rank | Parent Path | Count |\n|------|-------------|-------|\n")
256+
for i, (parent, cnt) in enumerate(parent_top, 1):
257+
f.write(f"| {i} | `{parent}` | {cnt} |\n")
258+
f.write("\n")
259+
260+
# Centrality
261+
f.write("## Field Centrality (Co-occurrence)\n")
262+
metrics = centrality
263+
top_fields = sorted(metrics["degree"].keys(), key=lambda x: metrics["degree"][x], reverse=True)[:10]
264+
f.write("| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |\n")
265+
f.write("|------|-------|--------|-------------|-----------|------------|\n")
266+
for i, node in enumerate(top_fields, 1):
267+
f.write(
268+
f"| {i} | {node} | "
269+
f"{metrics['degree'].get(node, 0):.3f} | "
270+
f"{metrics['betweenness'].get(node, 0):.3f} | "
271+
f"{metrics['closeness'].get(node, 0):.3f} | "
272+
f"{metrics['eigenvector'].get(node, 0):.3f} |\n"
273+
)
274+
f.write("\n")
275+
276+
# Clustering
277+
avg_clust, top_clust_nodes = clustering
278+
f.write("## Clustering (Field Co-occurrence Graph)\n")
279+
f.write(f"- Average Clustering Coefficient: {avg_clust:.3f}\n\n")
280+
f.write("### Top Nodes by Clustering Coefficient\n")
281+
f.write("| Rank | Field | Clustering |\n|------|-------|------------|\n")
282+
for i, (node, val) in enumerate(top_clust_nodes, 1):
283+
f.write(f"| {i} | {node} | {val:.3f} |\n")
284+
f.write("\n")
285+
286+
# Connected Components
287+
f.write("## Connected Components (Field Co-occurrence Graph)\n")
288+
f.write(f"- Number of Components: {components['component_count']}\n")
289+
f.write(f"- Component Sizes (top 10): {components['component_sizes'][:10]}\n")
290+
f.write("- Sample of Largest Component Nodes (top 10):\n")
291+
for n in components["largest_component_sample"][:10]:
292+
f.write(f" - {n}\n")
293+
f.write("\n")
294+
295+
296+
def ensure_iterable_records(data: Any) -> List[Any]:
297+
if isinstance(data, list):
298+
return data
299+
if isinstance(data, dict):
300+
return [data]
301+
return []
302+
303+
304+
def main() -> None:
305+
parser = argparse.ArgumentParser(description="Unified Graph Analysis")
306+
parser.add_argument(
307+
"--input",
308+
default=DEFAULT_INPUT,
309+
help="Local JSON file path or HTTP(S) URL",
310+
)
311+
parser.add_argument(
312+
"--output",
313+
default="reports/unified_analysis_report.md",
314+
help="Markdown report output path",
315+
)
316+
parser.add_argument(
317+
"--limit-top",
318+
type=int,
319+
default=10,
320+
help="Top-N rows to include in tables",
321+
)
322+
args = parser.parse_args()
323+
324+
data = load_json(args.input)
325+
records = ensure_iterable_records(data)
326+
327+
# Degree / co-attendance graph (participants-only)
328+
G_attend = build_coattendance_graph(records)
329+
degree_dict, degree_counts = degree_analysis(G_attend)
330+
degree_top = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[: args.limit_top]
331+
degree_dist = sorted(degree_counts.items(), key=lambda x: x[0])
332+
333+
# Path analysis
334+
all_paths = extract_json_paths(data)
335+
pmetrics = path_metrics(all_paths)
336+
parent_top = pmetrics["parent_counts"].most_common(args.limit_top)
337+
G_paths = build_path_graph(all_paths)
338+
339+
# Field co-occurrence graph
340+
G_fields = build_field_graph(data)
341+
centrality = compute_centrality_measures(G_fields)
342+
343+
# Clustering & components on field graph
344+
avg_clust, top_clust_nodes = clustering_metrics(G_fields, args.limit_top)
345+
components = connected_components_info(G_fields, args.limit_top)
346+
347+
summary = {
348+
"Co-attendance graph (nodes)": len(G_attend.nodes),
349+
"Co-attendance graph (edges)": len(G_attend.edges),
350+
"Path graph (nodes)": len(G_paths.nodes),
351+
"Path graph (edges)": len(G_paths.edges),
352+
"Field graph (nodes)": len(G_fields.nodes),
353+
"Field graph (edges)": len(G_fields.edges),
354+
}
355+
356+
write_report(
357+
output_file=args.output,
358+
summary=summary,
359+
degree=(degree_dict, degree_counts),
360+
degree_top=degree_top,
361+
degree_dist=degree_dist,
362+
path_info=pmetrics,
363+
parent_top=parent_top,
364+
centrality=centrality,
365+
clustering=(avg_clust, top_clust_nodes),
366+
components=components,
367+
)
368+
print(f"✅ Unified report written to: {args.output}")
369+
370+
371+
if __name__ == "__main__":
372+
main()

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ path-report:
2222

2323
centrality-report:
2424
$(PY) "Graph Analysis/Path_Analysis/Centrality_Analysis/json_centrality_analysis.py"
25+
26+
unified-report:
27+
$(PY) "Graph Analysis/unified_analysis.py" --output reports/unified_analysis_report.md

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ python Scripts/GEXF-export.py
2828
```bash
2929
python Scripts/Import-JSON.py
3030
```
31+
- Unified analysis report (degree, path, centrality, clustering, components) → writes `reports/unified_analysis_report.md`:
32+
```bash
33+
python "Graph Analysis/unified_analysis.py" --output reports/unified_analysis_report.md
34+
```
3135
- Degree (co-attendance) analysis → writes `Graph Analysis/Degree_Analysis/degree_analysis_report.md`:
3236
```bash
3337
python "Graph Analysis/Degree_Analysis/degree_analysis_to_md.py"

0 commit comments

Comments
 (0)