Skip to content

Commit 8d9575c

Browse files
committed
Amended unified analysis report
1 parent 899c9bd commit 8d9575c

File tree

2 files changed

+142
-75
lines changed

2 files changed

+142
-75
lines changed

Graph Analysis/unified_analysis.py

Lines changed: 27 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -39,58 +39,6 @@ def _truncate_label(text: str, max_len: int = 80) -> str:
3939
return safe if len(safe) <= max_len else (safe[: max_len - 1] + "…")
4040

4141

42-
# ---------------- Degree (Co-attendance) ----------------
43-
44-
def extract_participants(record: Dict[str, Any]) -> List[str]:
45-
"""Extract likely participants from a meeting record.
46-
- peoplePresent: comma-separated string under meetingInfo
47-
- host, documenter: added if present (deduped)
48-
"""
49-
participants: List[str] = []
50-
meeting_info = {}
51-
if isinstance(record, dict):
52-
meeting_info = record.get("meetingInfo", {}) or {}
53-
# peoplePresent as comma-separated string
54-
pp = meeting_info.get("peoplePresent", "")
55-
if isinstance(pp, str) and pp.strip():
56-
participants.extend([p.strip() for p in pp.split(",") if p.strip()])
57-
# host/documenter as single names
58-
for key in ("host", "documenter"):
59-
val = meeting_info.get(key)
60-
if isinstance(val, str) and val.strip():
61-
participants.append(val.strip())
62-
# dedupe while preserving order
63-
seen = set()
64-
deduped: List[str] = []
65-
for p in participants:
66-
if p not in seen:
67-
seen.add(p)
68-
deduped.append(p)
69-
return deduped
70-
71-
72-
def build_coattendance_graph(records: Iterable[Any]) -> nx.Graph:
73-
G = nx.Graph()
74-
for rec in records:
75-
participants = extract_participants(rec)
76-
if len(participants) < 2:
77-
continue
78-
for p in participants:
79-
G.add_node(p)
80-
for u, v in combinations(participants, 2):
81-
if G.has_edge(u, v):
82-
G[u][v]["weight"] += 1
83-
else:
84-
G.add_edge(u, v, weight=1)
85-
return G
86-
87-
88-
def degree_analysis(G: nx.Graph) -> Tuple[Dict[str, int], Counter]:
89-
degree_dict = dict(G.degree())
90-
degree_counts = Counter(degree_dict.values())
91-
return degree_dict, degree_counts
92-
93-
9442
# ---------------- JSON Path Structure ----------------
9543

9644
def extract_json_paths(obj: Any, prefix: str = "") -> List[str]:
@@ -137,7 +85,7 @@ def build_path_graph(paths: List[str]) -> nx.DiGraph:
13785
return G
13886

13987

140-
# ---------------- Field Co-occurrence (Centrality, Clustering, Components) ----------------
88+
# ---------------- Field Co-occurrence (Degree, Centrality, Clustering, Components) ----------------
14189

14290
def find_field_combinations(obj: Any) -> List[set]:
14391
results: List[set] = []
@@ -167,6 +115,12 @@ def build_field_graph(data: Any) -> nx.Graph:
167115
return G
168116

169117

118+
def field_degree(G: nx.Graph) -> Tuple[Dict[str, int], Counter]:
119+
degree_dict = dict(G.degree())
120+
degree_counts = Counter(degree_dict.values())
121+
return degree_dict, degree_counts
122+
123+
170124
def compute_centrality_measures(G: nx.Graph) -> Dict[str, Dict[str, float]]:
171125
degree = nx.degree_centrality(G) if G.number_of_nodes() else {}
172126
betweenness = nx.betweenness_centrality(G) if G.number_of_nodes() else {}
@@ -207,9 +161,9 @@ def connected_components_info(G: nx.Graph, top: int) -> Dict[str, Any]:
207161
def write_report(
208162
output_file: str,
209163
summary: Dict[str, Any],
210-
degree: Tuple[Dict[str, int], Counter],
211-
degree_top: List[Tuple[str, int]],
212-
degree_dist: List[Tuple[int, int]],
164+
field_deg: Tuple[Dict[str, int], Counter],
165+
field_top: List[Tuple[str, int]],
166+
field_dist: List[Tuple[int, int]],
213167
path_info: Dict[str, Any],
214168
parent_top: List[Tuple[str, int]],
215169
centrality: Dict[str, Dict[str, float]],
@@ -228,17 +182,17 @@ def write_report(
228182
f.write(f"- {k}: {v}\n")
229183
f.write("\n")
230184

231-
# Degree Analysis
232-
f.write("## Degree (Co-attendance) Analysis\n")
233-
f.write("### Top Nodes by Degree\n")
234-
f.write("| Rank | Node | Degree |\n|------|------|--------|\n")
235-
for i, (node, deg) in enumerate(degree_top, 1):
185+
# JSON Field Degree Analysis
186+
f.write("## JSON Field Degree Analysis\n")
187+
f.write("### Top Fields by Degree\n")
188+
f.write("| Rank | Field | Degree |\n|------|-------|--------|\n")
189+
for i, (node, deg) in enumerate(field_top, 1):
236190
label = _truncate_label(node, 80)
237191
f.write(f"| {i} | {label} | {deg} |\n")
238192
f.write("\n")
239193
f.write("### Degree Distribution\n")
240-
f.write("| Degree | Count of Nodes |\n|--------|-----------------|\n")
241-
for d, c in degree_dist:
194+
f.write("| Degree | Count of Fields |\n|--------|------------------|\n")
195+
for d, c in field_dist:
242196
f.write(f"| {d} | {c} |\n")
243197
f.write("\n")
244198

@@ -322,13 +276,6 @@ def main() -> None:
322276
args = parser.parse_args()
323277

324278
data = load_json(args.input)
325-
records = ensure_iterable_records(data)
326-
327-
# Degree / co-attendance graph (participants-only)
328-
G_attend = build_coattendance_graph(records)
329-
degree_dict, degree_counts = degree_analysis(G_attend)
330-
degree_top = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[: args.limit_top]
331-
degree_dist = sorted(degree_counts.items(), key=lambda x: x[0])
332279

333280
# Path analysis
334281
all_paths = extract_json_paths(data)
@@ -338,15 +285,20 @@ def main() -> None:
338285

339286
# Field co-occurrence graph
340287
G_fields = build_field_graph(data)
288+
289+
# Field degree (JSON Field Degree Analysis)
290+
fdeg_dict, fdeg_counts = field_degree(G_fields)
291+
field_top = sorted(fdeg_dict.items(), key=lambda x: x[1], reverse=True)[: args.limit_top]
292+
field_dist = sorted(fdeg_counts.items(), key=lambda x: x[0])
293+
294+
# Centrality on field graph
341295
centrality = compute_centrality_measures(G_fields)
342296

343297
# Clustering & components on field graph
344298
avg_clust, top_clust_nodes = clustering_metrics(G_fields, args.limit_top)
345299
components = connected_components_info(G_fields, args.limit_top)
346300

347301
summary = {
348-
"Co-attendance graph (nodes)": len(G_attend.nodes),
349-
"Co-attendance graph (edges)": len(G_attend.edges),
350302
"Path graph (nodes)": len(G_paths.nodes),
351303
"Path graph (edges)": len(G_paths.edges),
352304
"Field graph (nodes)": len(G_fields.nodes),
@@ -356,9 +308,9 @@ def main() -> None:
356308
write_report(
357309
output_file=args.output,
358310
summary=summary,
359-
degree=(degree_dict, degree_counts),
360-
degree_top=degree_top,
361-
degree_dist=degree_dist,
311+
field_deg=(fdeg_dict, fdeg_counts),
312+
field_top=field_top,
313+
field_dist=field_dist,
362314
path_info=pmetrics,
363315
parent_top=parent_top,
364316
centrality=centrality,
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Unified Graph Analysis Report
2+
**Generated on:** 2025-10-15 09:44:01
3+
4+
## Summary
5+
- Path graph (nodes): 6833
6+
- Path graph (edges): 6832
7+
- Field graph (nodes): 44
8+
- Field graph (edges): 149
9+
10+
## JSON Field Degree Analysis
11+
### Top Fields by Degree
12+
| Rank | Field | Degree |
13+
|------|-------|--------|
14+
| 1 | host | 11 |
15+
| 2 | typeOfMeeting | 11 |
16+
| 3 | date | 11 |
17+
| 4 | documenter | 11 |
18+
| 5 | workingDocs | 11 |
19+
| 6 | purpose | 11 |
20+
| 7 | peoplePresent | 11 |
21+
| 8 | status | 11 |
22+
| 9 | meetingVideoLink | 10 |
23+
| 10 | tags | 9 |
24+
25+
### Degree Distribution
26+
| Degree | Count of Fields |
27+
|--------|------------------|
28+
| 1 | 2 |
29+
| 2 | 2 |
30+
| 3 | 9 |
31+
| 4 | 4 |
32+
| 5 | 1 |
33+
| 7 | 2 |
34+
| 8 | 3 |
35+
| 9 | 12 |
36+
| 10 | 1 |
37+
| 11 | 8 |
38+
39+
## JSON Path Structure Analysis
40+
- Total Unique Paths: 6832
41+
- Maximum Depth: 6
42+
- Average Depth: 4.20
43+
44+
### Deepest JSON Paths (sample)
45+
- `[0].agendaItems[0].actionItems[0].text`
46+
- `[0].agendaItems[0].actionItems[0].assignee`
47+
- `[0].agendaItems[0].actionItems[0].dueDate`
48+
- `[0].agendaItems[0].actionItems[0].status`
49+
- `[0].agendaItems[0].decisionItems[0].decision`
50+
- `[0].agendaItems[0].decisionItems[0].effect`
51+
- `[0].agendaItems[0].decisionItems[1].decision`
52+
- `[0].agendaItems[0].decisionItems[1].rationale`
53+
- `[0].agendaItems[0].decisionItems[1].effect`
54+
- `[0].agendaItems[0].decisionItems[2].decision`
55+
56+
### Most Common Parent Paths
57+
| Rank | Parent Path | Count |
58+
|------|-------------|-------|
59+
| 1 | `[12].agendaItems[0]` | 26 |
60+
| 2 | `[2].agendaItems[0]` | 21 |
61+
| 3 | `[10].agendaItems[0]` | 21 |
62+
| 4 | `[7].agendaItems[0]` | 19 |
63+
| 5 | `[17].agendaItems[0]` | 19 |
64+
| 6 | `[22].meetingInfo` | 19 |
65+
| 7 | `[23].meetingInfo` | 19 |
66+
| 8 | `[101].agendaItems[0]` | 19 |
67+
| 9 | `[11].agendaItems[0]` | 18 |
68+
| 10 | `[37].agendaItems[0]` | 18 |
69+
70+
## Field Centrality (Co-occurrence)
71+
| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |
72+
|------|-------|--------|-------------|-----------|------------|
73+
| 1 | host | 0.256 | 0.001 | 0.256 | 0.309 |
74+
| 2 | typeOfMeeting | 0.256 | 0.001 | 0.256 | 0.309 |
75+
| 3 | date | 0.256 | 0.001 | 0.256 | 0.309 |
76+
| 4 | documenter | 0.256 | 0.001 | 0.256 | 0.309 |
77+
| 5 | workingDocs | 0.256 | 0.001 | 0.256 | 0.309 |
78+
| 6 | purpose | 0.256 | 0.001 | 0.256 | 0.309 |
79+
| 7 | peoplePresent | 0.256 | 0.001 | 0.256 | 0.309 |
80+
| 8 | status | 0.256 | 0.030 | 0.256 | 0.000 |
81+
| 9 | meetingVideoLink | 0.233 | 0.000 | 0.234 | 0.290 |
82+
| 10 | tags | 0.209 | 0.000 | 0.209 | 0.000 |
83+
84+
## Clustering (Field Co-occurrence Graph)
85+
- Average Clustering Coefficient: 0.882
86+
87+
### Top Nodes by Clustering Coefficient
88+
| Rank | Field | Clustering |
89+
|------|-------|------------|
90+
| 1 | tags | 1.000 |
91+
| 2 | workgroup_id | 1.000 |
92+
| 3 | meetingInfo | 1.000 |
93+
| 4 | workgroup | 1.000 |
94+
| 5 | noSummaryGiven | 1.000 |
95+
| 6 | canceledSummary | 1.000 |
96+
| 7 | type | 1.000 |
97+
| 8 | agendaItems | 1.000 |
98+
| 9 | timestampedVideo | 1.000 |
99+
| 10 | assignee | 1.000 |
100+
101+
## Connected Components (Field Co-occurrence Graph)
102+
- Number of Components: 6
103+
- Component Sizes (top 10): [12, 12, 10, 4, 4, 2]
104+
- Sample of Largest Component Nodes (top 10):
105+
- typeOfMeeting
106+
- host
107+
- mediaLink
108+
- documenter
109+
- workingDocs
110+
- purpose
111+
- peoplePresent
112+
- miroBoardLink
113+
- otherMediaLink
114+
- date
115+

0 commit comments

Comments
 (0)