diff --git a/VERSION b/VERSION index fcc4404a..0d5696b3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.0+2026.02.02T16.19.01.249Z.2ae8ac1a.berickson.20260120.comm.protocol.targets +0.1.0+2026.02.02T20.54.51.168Z.c0f2a8f0.berickson.20260113.extension.tab.ids diff --git a/extension/writing-process/src/background.js b/extension/writing-process/src/background.js index d5544d1a..058232fb 100644 --- a/extension/writing-process/src/background.js +++ b/extension/writing-process/src/background.js @@ -3,8 +3,7 @@ Background script. This works across all of Google Chrome. */ import { CONFIG } from "./service_worker_config.js"; -import { googledocs_id_from_url } from './writing_common'; - +import { googledocs_id_from_url, googledocs_tab_id_from_url } from './writing_common'; import * as loEvent from 'lo_event/lo_event/lo_event.js'; import * as loEventDebug from 'lo_event/lo_event/debugLog.js'; import { websocketLogger } from 'lo_event/lo_event/websocketLogger.js'; @@ -203,6 +202,7 @@ chrome.webRequest.onBeforeRequest.addListener( versus GMT. */ event = { 'doc_id': googledocs_id_from_url(request.url), + 'tab_id': googledocs_tab_id_from_url(request.url), 'url': request.url, 'bundles': JSON.parse(formdata.bundles), 'rev': formdata.rev, @@ -216,6 +216,7 @@ chrome.webRequest.onBeforeRequest.addListener( */ event = { 'doc_id': googledocs_id_from_url(request.url), + 'tab_id': googledocs_tab_id_from_url(request.url), 'url': request.url, 'formdata': formdata, 'rev': formdata.rev, diff --git a/extension/writing-process/src/writing.js b/extension/writing-process/src/writing.js index 1e63f9df..084affc5 100644 --- a/extension/writing-process/src/writing.js +++ b/extension/writing-process/src/writing.js @@ -5,7 +5,7 @@ /* For debugging purposes: we know the extension is active */ // document.body.style.border = "1px solid blue"; -import { googledocs_id_from_url, treeget } from './writing_common'; +import { googledocs_id_from_url, googledocs_tab_id_from_url, treeget } from './writing_common'; /* General Utility Functions */ @@ -49,6 +49,7 @@ function log_event(event_type, event) { "type": "http://schema.learning-observer.org/writing-observer/", "title": google_docs_title(), "id": doc_id(), + "tab_id": tab_id(), "url": window.location.href, }; @@ -78,6 +79,18 @@ function doc_id() { } } +function tab_id() { + /* + Extract the Google document's current Tab ID from the window + */ + try { + return googledocs_tab_id_from_url(window.location.href); + } catch(error) { + log_error("Couldn't read document's tab id"); + return null; + } +} + function this_is_a_google_doc() { /* diff --git a/extension/writing-process/src/writing_common.js b/extension/writing-process/src/writing_common.js index 6242b3ab..12fc8107 100644 --- a/extension/writing-process/src/writing_common.js +++ b/extension/writing-process/src/writing_common.js @@ -78,6 +78,30 @@ export function googledocs_id_from_url(url) { return null; } +export function googledocs_tab_id_from_url(url) { + /* + Given a URL like: + https://docs.google.com/document/d//edit?tab=t.95yb7msfl8ul + https://docs.google.com/document/d//edit?tab=t.95yb7msfl8ul#heading=h.abc123 + extract the associated tab ID: + t.95yb7msfl8ul + Return null if not a valid Google Docs URL or tab param. + + Regex explanation: + 1. `/.*:\/\/` - match any protocol (http/https) followed by :// + 2. `docs\.google\.com\/document\/` - match google docs domain + 3. `.*` - match any characters until we find the tab param + 4. `[?&]tab=` - match tab parameter in query string + 5. `([^&#]+)` - capture tab value, stopping at & (next param) or # (hash fragment) + 6. `/i` - case insensitive + */ + var match = url.match(/.*:\/\/docs\.google\.com\/document\/.*[?&]tab=([^&#]+)/i); + if (match) { + return match[1]; + } + return null; +} + var writing_lasthash = ""; function unique_id() { /* diff --git a/modules/writing_observer/VERSION b/modules/writing_observer/VERSION index 32e666db..0d5696b3 100644 --- a/modules/writing_observer/VERSION +++ b/modules/writing_observer/VERSION @@ -1 +1 @@ -0.1.0+2026.01.13T18.33.25.519Z.0984e08f.berickson.20260113.abstract.time.on.task.reducers +0.1.0+2026.02.02T20.54.51.168Z.c0f2a8f0.berickson.20260113.extension.tab.ids diff --git a/modules/writing_observer/writing_observer/module.py b/modules/writing_observer/writing_observer/module.py index 009262ce..b23df832 100644 --- a/modules/writing_observer/writing_observer/module.py +++ b/modules/writing_observer/writing_observer/module.py @@ -261,7 +261,13 @@ { 'context': "org.mitros.writing_analytics", 'scope': writing_observer.writing_analysis.gdoc_scope, - 'function': writing_observer.writing_analysis.time_on_task, + 'function': writing_observer.writing_analysis.gdoc_scope_time_on_task, + 'default': {'saved_ts': 0} + }, + { + 'context': "org.mitros.writing_analytics", + 'scope': writing_observer.writing_analysis.gdoc_tab_scope, + 'function': writing_observer.writing_analysis.gdoc_tab_scope_time_on_task, 'default': {'saved_ts': 0} }, { @@ -286,6 +292,12 @@ 'function': writing_observer.writing_analysis.document_list, 'default': {'docs': []} }, + { + 'context': "org.mitros.writing_analytics", + 'scope': writing_observer.writing_analysis.gdoc_scope, + 'function': writing_observer.writing_analysis.tab_list, + 'default': {'tabs': {}} + }, { 'context': "org.mitros.writing_analytics", 'scope': writing_observer.writing_analysis.student_scope, diff --git a/modules/writing_observer/writing_observer/reconstruct_doc.py b/modules/writing_observer/writing_observer/reconstruct_doc.py index d060c083..39cb5732 100644 --- a/modules/writing_observer/writing_observer/reconstruct_doc.py +++ b/modules/writing_observer/writing_observer/reconstruct_doc.py @@ -40,6 +40,7 @@ def __new__(cls): new_object._text = "" new_object._position = 0 new_object._edit_metadata = {} + new_object._tabs = {} new_object.fix_validity() return new_object @@ -100,6 +101,14 @@ def from_json(json_rep): new_object._text = json_rep.get('text', '') new_object._position = json_rep.get('position', 0) new_object._edit_metadata = json_rep.get('edit_metadata', {}) + + if 'tabs' in json_rep and json_rep['tabs']: + new_object._tabs = {} + for tab_id, tab_data in json_rep['tabs'].items(): + new_object._tabs[tab_id] = google_text.from_json(tab_data) + else: + new_object._tabs = {} + new_object.fix_validity() return new_object @@ -155,11 +164,14 @@ def json(self): ''' This serializes to JSON. ''' - return { + result = { 'text': self._text, 'position': self._position, 'edit_metadata': self._edit_metadata } + if self._tabs: + result['tabs'] = {tab_id: tab.json for tab_id, tab in self._tabs.items()} + return result def get_parsed_text(self): @@ -169,6 +181,15 @@ def get_parsed_text(self): return self._text.replace(PLACEHOLDER, "") +def dispatch_command(doc, cmd): + if cmd['ty'] in dispatch: + doc = dispatch[cmd['ty']](doc, **cmd) + else: + print("Unrecogized Google Docs command: " + repr(cmd['ty'])) + # TODO: Log issue and fix it! + return doc + + def command_list(doc, commands): ''' This will process a list of commands. It is helpful either when @@ -176,11 +197,7 @@ def command_list(doc, commands): new `save` requests. ''' for item in commands: - if item['ty'] in dispatch: - doc = dispatch[item['ty']](doc, **item) - else: - print("Unrecogized Google Docs command: " + repr(item['ty'])) - # TODO: Log issue and fix it! + doc = dispatch_command(doc, item) return doc @@ -301,6 +318,34 @@ def null(doc, **kwargs): return doc +def nm(doc, nmc, nmr, **kwargs): + ''' + Handle named commands for tabs (sub-documents). + + * `nmc` is the command to execute + * `nmr` is the name/reference list, which contains the target tab ID + ''' + # Find the target tab from the nmr list + target_tab = None + for item in reversed(nmr or []): + if isinstance(item, str) and item.startswith("t."): + target_tab = item + break + + if target_tab is None: + # No tab specified, apply to main document + doc = dispatch_command(doc, nmc) + else: + # Ensure the tab exists + if target_tab not in doc._tabs: + doc._tabs[target_tab] = google_text() + + # Apply the command to the sub-document + doc._tabs[target_tab] = dispatch_command(doc._tabs[target_tab], nmc) + + return doc + + # This dictionary maps the `ty` parameter to the function which # handles data of that type. @@ -312,6 +357,7 @@ def null(doc, **kwargs): # these can't be handled like plain 'is' or 'ds' because the include different fields # (e.g., 'sugid', presumably, suggestion id.) dispatch = { + 'ac': null, # new tab title 'ae': null, 'ase': null, # suggestion 'ast': null, # suggestion. Image? @@ -326,8 +372,10 @@ def null(doc, **kwargs): 'is': insert, 'iss': null, # suggested insertion 'mefd': null, # suggestion + 'mkch': null, # name of the first tab 'mlti': multi, 'msfd': null, # suggestion + 'nm': nm, # named command for tabs 'null': null, 'ord': null, 'ras': null, # suggestion. Autospell? @@ -344,6 +392,7 @@ def null(doc, **kwargs): 'sl': null, 'ste': null, # suggestion 'sue': null, # suggestion + 'ucp': null, # updated tab title 'uefd': null, # suggestion 'use': null, # suggestion 'umv': null, diff --git a/modules/writing_observer/writing_observer/writing_analysis.py b/modules/writing_observer/writing_observer/writing_analysis.py index 4d1a836e..85dc8742 100644 --- a/modules/writing_observer/writing_observer/writing_analysis.py +++ b/modules/writing_observer/writing_observer/writing_analysis.py @@ -64,6 +64,8 @@ else: gdoc_scope = student_scope # HACK for backwards-compatibility +gdoc_tab_scope = Scope([KeyField.STUDENT, EventField('doc_id'), EventField('tab_id')]) + @learning_observer.communication_protocol.integration.publish_function('writing_observer.activity_map') def determine_activity_status(last_ts): @@ -71,7 +73,6 @@ def determine_activity_status(last_ts): return {'status': status} -@kvs_pipeline(scope=gdoc_scope) async def time_on_task(event, internal_state): ''' This adds up time intervals between successive timestamps. If the interval @@ -87,6 +88,10 @@ async def time_on_task(event, internal_state): return internal_state, internal_state +gdoc_scope_time_on_task = kvs_pipeline(scope=gdoc_scope)(time_on_task) +gdoc_tab_scope_time_on_task = kvs_pipeline(scope=gdoc_tab_scope)(time_on_task) + + @kvs_pipeline(scope=gdoc_scope) async def binned_time_on_task(event, internal_state): ''' @@ -262,6 +267,159 @@ async def document_list(event, internal_state): return False, False +def _iter_commands_from_client(client): + """Yield command dicts from either bundles (google_docs_save) or history (document_history).""" + event_type = client.get("event") + + if event_type == "google_docs_save": + for bundle in client.get("bundles") or []: + for command in bundle.get("commands") or []: + if isinstance(command, dict): + yield command + + elif event_type == "document_history": + history = client.get("history") or {} + changelog = history.get("changelog") or [] + # Each changelog item is expected to be like: [, ...] + for item in changelog: + if isinstance(item, (list, tuple)) and item and isinstance(item[0], dict): + yield item[0] + + +def _iter_leaf_commands(client): + for cmd in _iter_commands_from_client(client): + if not isinstance(cmd, dict): + continue + + if cmd.get("ty") == "mlti": + for sub in cmd.get("mts") or []: + if isinstance(sub, dict): + yield sub + else: + yield cmd + + +def _get_event_time(event, client): + """Resolve the timestamp once per event, with fallback.""" + server_time = (event.get("server") or {}).get("time") + if server_time is not None: + return server_time + return client.get("timestamp") or (client.get("metadata") or {}).get("ts") + + +def extract_from_ucp(command): + if command.get("ty") != "ucp": + return None, None + d = command.get("d") + try: + return d[0], d[1][1][1] + except (TypeError, IndexError, KeyError): + return None, None + + +def extract_from_mkch(command): + if command.get("ty") != "mkch": + return None, None + + d = command.get("d") + try: + return 't.0', d[0][1] + except (TypeError, IndexError, KeyError, AttributeError): + return None, None + + +def extract_from_ac(command): + if command.get("ty") != "ac": + return None, None + + d = command.get("d") + try: + return d[0], d[1][1] + except (TypeError, IndexError, KeyError, AttributeError): + return None, None + + +TITLE_EXTRACTORS = { + "ucp": extract_from_ucp, + "mkch": extract_from_mkch, + "ac": extract_from_ac, +} + + +def _extract_all_tab_titles(client): + """ + Extract all (tab_id, title) pairs from leaf commands (including those inside mlti). + """ + event_type = client.get("event") + if event_type not in ("google_docs_save", "document_history"): + return [] + + out = [] + for cmd in _iter_leaf_commands(client): + ty = cmd.get("ty") + extractor = TITLE_EXTRACTORS.get(ty) + if not extractor: + continue + tab_id, title = extractor(cmd) + if tab_id is None: + continue + out.append((tab_id, title)) + return out + + +def _extract_tab_id(event): + client = event.get("client", {}) or {} + tab_id = client.get("tab_id") or event.get("tab_id") + if tab_id: + return tab_id + url = client.get("url") or client.get("object", {}).get("url") or event.get("url") + if not url: + return None + match = re.search(r"tab=([^&#]+)", url) + return match.group(1) if match else None + + +@kvs_pipeline(scope=gdoc_scope, null_state={"tabs": {}}) +async def tab_list(event, internal_state): + """ + Track per-document tab metadata (tab_id, title, last_accessed) per student. + + Rules: + - If client.tab_id exists AND is already in state: ONLY update last_accessed for that tab. + - Still add new tabs discovered in commands (and set last_accessed for those new tabs). + - For existing tabs discovered in commands: update title if present, but do NOT touch last_accessed + unless it's the active existing tab (handled first). + """ + internal_state = internal_state or {"tabs": {}} + tabs = internal_state.get("tabs") or {} + + client = event.get("client") or {} + server_time = _get_event_time(event, client) + + active_tab_id = _extract_tab_id(event) + + # 1) Only bump last_accessed for the active tab IF it already exists in state + if active_tab_id is not None and active_tab_id in tabs: + tabs[active_tab_id]["last_accessed"] = server_time + + # 2) Add/update titles for all extracted tabs + for tab_id, title in _extract_all_tab_titles(client): + if tab_id not in tabs: + # New tab: initialize and set last_accessed now + tabs[tab_id] = { + "tab_id": tab_id, + "title": title, + "last_accessed": server_time, + } + else: + # Existing tab: update title if we learned one; do not update last_accessed here + if title is not None: + tabs[tab_id]["title"] = title + + internal_state["tabs"] = tabs + return internal_state, internal_state + + @kvs_pipeline(scope=student_scope) async def last_document(event, internal_state): '''