Skip to content

Commit 745ba3b

Browse files
committed
added copycut and paste redcuers
1 parent d374e37 commit 745ba3b

3 files changed

Lines changed: 369 additions & 10 deletions

File tree

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
"""
2+
Helpers for extracting copy, cut, and paste signals from writing analytics
3+
events.
4+
"""
5+
6+
from __future__ import annotations
7+
8+
import datetime as dt
9+
import re
10+
11+
12+
PASTE_WAIT_MS = 2500
13+
MENU_FLAG_MS = 1500
14+
DEDUP_MS = 750
15+
BIG_PASTE_THRESHOLD = 200
16+
17+
DOC_URL_RE = re.compile(r"^https://docs.google.com/document/d/(?P<DOCID>[^/\s]+)/(?P<ACT>[a-zA-Z]+)")
18+
19+
20+
def unwrap_event(event):
21+
if isinstance(event, dict) and isinstance(event.get("client"), dict):
22+
return event["client"]
23+
return event if isinstance(event, dict) else {}
24+
25+
26+
def get_doc_id(event):
27+
client = unwrap_event(event)
28+
doc_id = client.get("doc_id")
29+
if doc_id:
30+
return doc_id
31+
32+
url = client.get("object", {}).get("url")
33+
if not url or not DOC_URL_RE.match(url):
34+
return None
35+
36+
return client.get("object", {}).get("id")
37+
38+
39+
def event_action(client):
40+
action = client.get("action") or client.get("event") or client.get("type") or client.get("event_type") or ""
41+
if action:
42+
return str(action).lower()
43+
keystroke = client.get("keystroke", {}) if isinstance(client.get("keystroke"), dict) else {}
44+
return str(keystroke.get("action") or keystroke.get("type") or "").lower()
45+
46+
47+
def keys_info(client):
48+
keystroke = client.get("keystroke", {}) if isinstance(client.get("keystroke"), dict) else {}
49+
key = keystroke.get("key") or client.get("key") or ""
50+
code = keystroke.get("code") or client.get("code") or ""
51+
event_type = keystroke.get("type") or client.get("type") or ""
52+
ctrl = bool(keystroke.get("ctrl") or client.get("ctrl") or keystroke.get("ctrlKey") or client.get("ctrlKey"))
53+
meta = bool(keystroke.get("metaKey") or client.get("metaKey"))
54+
key_code = keystroke.get("keyCode") or client.get("keyCode") or keystroke.get("which") or client.get("which")
55+
return {
56+
"key": str(key).lower() if key else "",
57+
"code": str(code),
58+
"event_type": str(event_type).lower(),
59+
"ctrl_or_meta": bool(ctrl or meta),
60+
"key_code": int(key_code) if isinstance(key_code, (int, float)) else None,
61+
}
62+
63+
64+
def is_copy(client):
65+
action = event_action(client)
66+
if action in {"copy", "clipboard_copy", "gdocs_copy", "menu_copy", "edit_copy"}:
67+
return True
68+
info = keys_info(client)
69+
return info["event_type"] == "keydown" and info["ctrl_or_meta"] and (
70+
info["key"] == "c" or info["code"] == "KeyC" or info["key_code"] == 67
71+
)
72+
73+
74+
def is_cut(client):
75+
action = event_action(client)
76+
if action in {"cut", "clipboard_cut", "gdocs_cut", "menu_cut", "edit_cut"}:
77+
return True
78+
info = keys_info(client)
79+
return info["event_type"] == "keydown" and info["ctrl_or_meta"] and (
80+
info["key"] == "x" or info["code"] == "KeyX" or info["key_code"] == 88
81+
)
82+
83+
84+
def is_paste_keyboard(client):
85+
action = event_action(client)
86+
if action in {"paste", "clipboard_paste", "gdocs_paste", "insert_from_clipboard"}:
87+
return True
88+
info = keys_info(client)
89+
return info["event_type"] == "keydown" and info["ctrl_or_meta"] and (
90+
info["key"] == "v" or info["code"] == "KeyV" or info["key_code"] == 86
91+
)
92+
93+
94+
def looks_like_menu_paste(client):
95+
action = event_action(client)
96+
if action in {"menu_paste", "edit_paste", "contextmenu_paste"}:
97+
return True
98+
return action == "contextmenu"
99+
100+
101+
def timestamp_ms(event, client=None):
102+
client = client or unwrap_event(event)
103+
for source in (client, event):
104+
for key in ("timestamp", "ts", "time", "t"):
105+
value = source.get(key)
106+
if isinstance(value, (int, float)):
107+
return int(value * 1000) if value < 10**11 else int(value)
108+
109+
server = event.get("server", {}) if isinstance(event, dict) else {}
110+
value = server.get("time") if isinstance(server, dict) else None
111+
if isinstance(value, (int, float)):
112+
return int(value * 1000)
113+
114+
return int(dt.datetime.utcnow().timestamp() * 1000)
115+
116+
117+
def collect_inserted_text(command, output):
118+
if not isinstance(command, dict):
119+
return
120+
121+
if command.get("ty") == "is":
122+
string_value = command.get("s")
123+
if isinstance(string_value, str) and string_value:
124+
output.append(string_value)
125+
126+
if isinstance(command.get("nmc"), dict):
127+
collect_inserted_text(command["nmc"], output)
128+
129+
for child in command.get("mts") or []:
130+
collect_inserted_text(child, output)
131+
132+
133+
def extract_insert_from_gdocs_save(client):
134+
parts = []
135+
for bundle in client.get("bundles") or []:
136+
for command in (bundle or {}).get("commands") or []:
137+
collect_inserted_text(command, parts)
138+
return "".join(parts)
139+
140+
141+
def paste_length_bin(length):
142+
if length <= 0:
143+
return "none"
144+
if length <= 20:
145+
return "short_1_20"
146+
if length <= 200:
147+
return "medium_21_200"
148+
return "long_201_plus"
149+
150+
151+
def append_recent(items, entry, limit=10):
152+
items = list(items or [])
153+
items.append(entry)
154+
if len(items) > limit:
155+
items = items[-limit:]
156+
return items
157+
158+
159+
def default_paste_state():
160+
return {
161+
"paste_count": 0,
162+
"pastes_with_length": 0,
163+
"total_paste_chars": 0,
164+
"max_paste_len": 0,
165+
"last_paste_len": 0,
166+
"big_pastes": 0,
167+
"length_bins": {
168+
"short_1_20": 0,
169+
"medium_21_200": 0,
170+
"long_201_plus": 0,
171+
},
172+
"recent_pastes": [],
173+
"awaiting_paste_until": 0,
174+
"maybe_menu_paste_until": 0,
175+
"last_paste_signal_ms": 0,
176+
}
177+
178+
179+
def default_copy_cut_state():
180+
return {
181+
"copy_count": 0,
182+
"cut_count": 0,
183+
"last_copy_ts": 0,
184+
"last_cut_ts": 0,
185+
"recent_events": [],
186+
}
187+
188+
189+
def update_paste_state(event, state):
190+
state = dict(default_paste_state() if state is None else state)
191+
client = event.get("client", {}) or {}
192+
ts_ms = timestamp_ms(event, client)
193+
194+
if is_paste_keyboard(client):
195+
if ts_ms - state.get("last_paste_signal_ms", 0) <= DEDUP_MS:
196+
return False
197+
state["paste_count"] = state.get("paste_count", 0) + 1
198+
state["last_paste_signal_ms"] = ts_ms
199+
state["awaiting_paste_until"] = ts_ms + PASTE_WAIT_MS
200+
state["recent_pastes"] = append_recent(
201+
state.get("recent_pastes"),
202+
{"timestamp_ms": ts_ms, "length": None, "source": "keyboard_signal"},
203+
)
204+
return state
205+
206+
if looks_like_menu_paste(client):
207+
state["maybe_menu_paste_until"] = ts_ms + MENU_FLAG_MS
208+
return state
209+
210+
if event_action(client) != "google_docs_save":
211+
return False
212+
213+
inserted_text = extract_insert_from_gdocs_save(client)
214+
if not inserted_text:
215+
return False
216+
217+
paste_length = len(inserted_text)
218+
awaiting_paste_until = state.get("awaiting_paste_until", 0)
219+
maybe_menu_paste_until = state.get("maybe_menu_paste_until", 0)
220+
counted_from_save = False
221+
222+
if ts_ms <= maybe_menu_paste_until and ts_ms > awaiting_paste_until:
223+
if ts_ms - state.get("last_paste_signal_ms", 0) <= DEDUP_MS:
224+
return False
225+
state["paste_count"] = state.get("paste_count", 0) + 1
226+
state["last_paste_signal_ms"] = ts_ms
227+
counted_from_save = True
228+
229+
if ts_ms > awaiting_paste_until and not counted_from_save:
230+
return False
231+
232+
state["pastes_with_length"] = state.get("pastes_with_length", 0) + 1
233+
state["total_paste_chars"] = state.get("total_paste_chars", 0) + paste_length
234+
state["max_paste_len"] = max(state.get("max_paste_len", 0), paste_length)
235+
state["last_paste_len"] = paste_length
236+
if paste_length >= BIG_PASTE_THRESHOLD:
237+
state["big_pastes"] = state.get("big_pastes", 0) + 1
238+
239+
bin_name = paste_length_bin(paste_length)
240+
if bin_name != "none":
241+
state.setdefault("length_bins", {})
242+
state["length_bins"][bin_name] = state["length_bins"].get(bin_name, 0) + 1
243+
244+
state["recent_pastes"] = append_recent(
245+
state.get("recent_pastes"),
246+
{
247+
"timestamp_ms": ts_ms,
248+
"length": paste_length,
249+
"source": "menu_inferred" if counted_from_save else "google_docs_save",
250+
},
251+
)
252+
state["awaiting_paste_until"] = 0
253+
state["maybe_menu_paste_until"] = 0
254+
return state
255+
256+
257+
def update_copy_cut_state(event, state):
258+
state = dict(default_copy_cut_state() if state is None else state)
259+
client = event.get("client", {}) or {}
260+
ts_ms = timestamp_ms(event, client)
261+
event_type = None
262+
263+
if is_copy(client):
264+
if ts_ms - state.get("last_copy_ts", 0) <= DEDUP_MS:
265+
return False
266+
state["copy_count"] = state.get("copy_count", 0) + 1
267+
state["last_copy_ts"] = ts_ms
268+
event_type = "copy"
269+
elif is_cut(client):
270+
if ts_ms - state.get("last_cut_ts", 0) <= DEDUP_MS:
271+
return False
272+
state["cut_count"] = state.get("cut_count", 0) + 1
273+
state["last_cut_ts"] = ts_ms
274+
event_type = "cut"
275+
276+
if not event_type:
277+
return False
278+
279+
state["recent_events"] = append_recent(
280+
state.get("recent_events"),
281+
{"timestamp_ms": ts_ms, "event_type": event_type},
282+
)
283+
return state

modules/writing_observer/writing_observer/module.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@
9797
'update_docs': update_via_google(runtime=q.parameter("runtime"), doc_ids=q.variable('doc_sources')),
9898
"docs": q.select(q.keys('writing_observer.reconstruct', STUDENTS=q.variable("roster"), STUDENTS_path='user_id', RESOURCES=q.variable("update_docs"), RESOURCES_path='doc_id'), fields={'text': 'text'}),
9999
"docs_combined": q.join(LEFT=q.variable("docs"), RIGHT=q.variable("roster"), LEFT_ON='provenance.provenance.STUDENT.value.user_id', RIGHT_ON='user_id'),
100+
"paste_metrics": q.select(q.keys('writing_observer.lo_paste_reducer', STUDENTS=q.variable("roster"), STUDENTS_path='user_id', RESOURCES=q.variable("doc_sources"), RESOURCES_path='doc_id'), fields='All'),
101+
"copy_cut_metrics": q.select(q.keys('writing_observer.lo_copy_cut_reducer', STUDENTS=q.variable("roster"), STUDENTS_path='user_id', RESOURCES=q.variable("doc_sources"), RESOURCES_path='doc_id'), fields='All'),
100102
'nlp': process_texts(writing_data=q.variable('docs'), options=q.parameter('nlp_options', required=False, default=[])),
101103
'nlp_sep_proc': q.select(q.keys('writing_observer.nlp_components', STUDENTS=q.variable('roster'), STUDENTS_path='user_id', RESOURCES=q.variable("doc_ids"), RESOURCES_path='doc_id'), fields='All'),
102104
'nlp_combined': q.join(LEFT=q.variable(nlp_source), LEFT_ON='provenance.provenance.STUDENT.value.user_id', RIGHT=q.variable('roster'), RIGHT_ON='user_id'),
@@ -162,6 +164,16 @@
162164
"parameters": ["course_id"],
163165
"output": ""
164166
},
167+
"paste_metrics": {
168+
"returns": "paste_metrics",
169+
"parameters": ["course_id"],
170+
"output": ""
171+
},
172+
"copy_cut_metrics": {
173+
"returns": "copy_cut_metrics",
174+
"parameters": ["course_id"],
175+
"output": ""
176+
},
165177
"roster": {
166178
"returns": "roster",
167179
"parameters": ["course_id"],
@@ -258,6 +270,36 @@
258270

259271
# Incoming event APIs
260272
REDUCERS = [
273+
{
274+
'context': "org.mitros.writing_analytics",
275+
'scope': writing_observer.writing_analysis.gdoc_scope,
276+
'function': writing_observer.writing_analysis.lo_paste_reducer,
277+
'default': {
278+
'paste_count': 0,
279+
'pastes_with_length': 0,
280+
'total_paste_chars': 0,
281+
'max_paste_len': 0,
282+
'last_paste_len': 0,
283+
'big_pastes': 0,
284+
'length_bins': {'short_1_20': 0, 'medium_21_200': 0, 'long_201_plus': 0},
285+
'recent_pastes': [],
286+
'awaiting_paste_until': 0,
287+
'maybe_menu_paste_until': 0,
288+
'last_paste_signal_ms': 0
289+
}
290+
},
291+
{
292+
'context': "org.mitros.writing_analytics",
293+
'scope': writing_observer.writing_analysis.gdoc_scope,
294+
'function': writing_observer.writing_analysis.lo_copy_cut_reducer,
295+
'default': {
296+
'copy_count': 0,
297+
'cut_count': 0,
298+
'last_copy_ts': 0,
299+
'last_cut_ts': 0,
300+
'recent_events': []
301+
}
302+
},
261303
{
262304
'context': "org.mitros.writing_analytics",
263305
'scope': writing_observer.writing_analysis.gdoc_scope,
@@ -366,4 +408,4 @@
366408
'name': 'NLP Options',
367409
'suburl': 'nlp-options',
368410
'static_json': INDICATOR_JSONS
369-
}]
411+
}]

0 commit comments

Comments
 (0)