From e5df216b24df431ba11cf830807d2a4dd37f74a5 Mon Sep 17 00:00:00 2001 From: ab116699 Date: Tue, 10 Feb 2026 16:21:10 -0500 Subject: [PATCH 1/4] feat: test witness first event broadcast page context --- lib/config.ts | 7 +- lib/core/context.ts | 197 ++++++++++++++++++++++++++++++++++++++++++++ lib/edge/witness.ts | 20 ++++- lib/sdk.test.ts | 28 +++++++ lib/sdk.ts | 19 ++++- 5 files changed, 267 insertions(+), 4 deletions(-) create mode 100644 lib/core/context.ts diff --git a/lib/config.ts b/lib/config.ts index 24543fb0..3f466053 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -1,5 +1,6 @@ import { getConsent, inferRegulation } from "./core/regs/consent"; import type { CMPApiConfig, Consent } from "./core/regs/consent"; +import type { PageContextConfig } from "./core/context"; type Experiment = never; @@ -65,6 +66,10 @@ type InitConfig = { // When provided, the server will attempt to answer within the given time limit. // Some APIs like targeting may return partial responses depending at which stage the timeout occurred. timeout?: string; + // Page context configuration for extracting semantic content from the page. + // When enabled, context is sent with the first witness() call after page load. + // Set to true for defaults, or provide a PageContextConfig object for customization. + pageContext?: PageContextConfig | boolean; }; type ResolvedConfig = { @@ -142,5 +147,5 @@ function generateSessionID(): string { .replace(/=+$/g, ""); } -export type { InitConsent, CMPApiConfig, InitConfig, ResolvedConfig, ABTestConfig, MatcherOverride, Experiment }; +export type { InitConsent, CMPApiConfig, InitConfig, ResolvedConfig, ABTestConfig, MatcherOverride, Experiment, PageContextConfig }; export { getConfig, DCN_DEFAULTS, generateSessionID }; diff --git a/lib/core/context.ts b/lib/core/context.ts new file mode 100644 index 00000000..7b79918d --- /dev/null +++ b/lib/core/context.ts @@ -0,0 +1,197 @@ +type SemanticContent = { + title: string; + description?: string; + keywords?: string[]; + canonicalUrl?: string; + ogTags?: Record; + headings?: Array<{ level: number; text: string }>; + content?: string; + jsonLd?: object[]; + language?: string; +}; + +type ContextData = { + semantic: SemanticContent; + html?: string; + url: string; + referrer?: string; + extractedAt: number; +}; + +type PageContextConfig = { + includeHtml?: boolean; + contentSelector?: string; + maxContentLength?: number; + maxHtmlLength?: number; +}; + +const DEFAULT_MAX_CONTENT_LENGTH = 5000; +const DEFAULT_MAX_HTML_LENGTH = 50000; +const MAX_HEADINGS = 20; + +function extractSemanticContent(config: PageContextConfig): SemanticContent { + const semantic: SemanticContent = { + title: document.title || "", + }; + + // Extract meta description + const descriptionMeta = document.querySelector('meta[name="description"]'); + if (descriptionMeta) { + const content = descriptionMeta.getAttribute("content"); + if (content) { + semantic.description = content; + } + } + + // Extract meta keywords + const keywordsMeta = document.querySelector('meta[name="keywords"]'); + if (keywordsMeta) { + const content = keywordsMeta.getAttribute("content"); + if (content) { + semantic.keywords = content + .split(",") + .map((k) => k.trim()) + .filter(Boolean); + } + } + + // Extract canonical URL + const canonicalLink = document.querySelector('link[rel="canonical"]'); + if (canonicalLink) { + const href = canonicalLink.getAttribute("href"); + if (href) { + semantic.canonicalUrl = href; + } + } + + // Extract Open Graph tags + const ogTags: Record = {}; + document.querySelectorAll('meta[property^="og:"]').forEach((meta) => { + const property = meta.getAttribute("property"); + const content = meta.getAttribute("content"); + if (property && content) { + const key = property.replace("og:", ""); + ogTags[key] = content; + } + }); + if (Object.keys(ogTags).length > 0) { + semantic.ogTags = ogTags; + } + + // Extract headings (h1-h3, max 20) + const headings: Array<{ level: number; text: string }> = []; + document.querySelectorAll("h1, h2, h3").forEach((heading) => { + if (headings.length >= MAX_HEADINGS) return; + const level = parseInt(heading.tagName.substring(1), 10); + const text = heading.textContent?.trim(); + if (text) { + headings.push({ level, text }); + } + }); + if (headings.length > 0) { + semantic.headings = headings; + } + + // Extract main content + const maxContentLength = config.maxContentLength ?? DEFAULT_MAX_CONTENT_LENGTH; + const contentElement = findContentElement(config.contentSelector); + if (contentElement) { + const text = extractTextContent(contentElement); + if (text) { + semantic.content = text.substring(0, maxContentLength); + } + } + + // Extract JSON-LD + const jsonLdScripts = document.querySelectorAll('script[type="application/ld+json"]'); + const jsonLdData: object[] = []; + jsonLdScripts.forEach((script) => { + try { + const data = JSON.parse(script.textContent || ""); + if (data && typeof data === "object") { + jsonLdData.push(data); + } + } catch { + // Ignore invalid JSON-LD + } + }); + if (jsonLdData.length > 0) { + semantic.jsonLd = jsonLdData; + } + + // Extract language + const lang = document.documentElement.getAttribute("lang"); + if (lang) { + semantic.language = lang; + } + + return semantic; +} + +function findContentElement(selector?: string): Element | null { + // Use provided selector if available + if (selector) { + return document.querySelector(selector); + } + + // Fall back to heuristics: main, article, or first large content block + const candidates = ["main", "article", '[role="main"]', ".content", "#content", ".post", ".article"]; + + for (const candidate of candidates) { + const element = document.querySelector(candidate); + if (element) { + return element; + } + } + + // Last resort: body + return document.body; +} + +function extractTextContent(element: Element): string { + // Clone to avoid modifying the DOM + const clone = element.cloneNode(true) as Element; + + // Remove script and style elements + clone.querySelectorAll("script, style, noscript, iframe, svg").forEach((el) => el.remove()); + + // Get text and normalize whitespace + const text = clone.textContent || ""; + return text.replace(/\s+/g, " ").trim(); +} + +function extractContext(config: PageContextConfig): ContextData { + const contextData: ContextData = { + semantic: extractSemanticContent(config), + url: window.location.href, + extractedAt: Date.now(), + }; + + // Include referrer if available + if (document.referrer) { + contextData.referrer = document.referrer; + } + + // Include HTML if configured + if (config.includeHtml) { + const maxHtmlLength = config.maxHtmlLength ?? DEFAULT_MAX_HTML_LENGTH; + contextData.html = document.documentElement.outerHTML.substring(0, maxHtmlLength); + } + + return contextData; +} + +function normalizeContextConfig(config: PageContextConfig | boolean | undefined): PageContextConfig | null { + if (!config) { + return null; + } + + if (config === true) { + return {}; + } + + return config; +} + +export type { SemanticContent, ContextData, PageContextConfig }; +export { extractContext, extractSemanticContent, normalizeContextConfig }; diff --git a/lib/edge/witness.ts b/lib/edge/witness.ts index 21b57744..1fa03ad4 100644 --- a/lib/edge/witness.ts +++ b/lib/edge/witness.ts @@ -1,16 +1,32 @@ import type { ResolvedConfig } from "../config"; +import type { ContextData } from "../core/context"; import { fetch } from "../core/network"; type WitnessProperties = { [key: string]: string | number | boolean | unknown[] | null | { [key: string]: unknown }; }; -function Witness(config: ResolvedConfig, event: string, properties: WitnessProperties): Promise { - const evt = { +type WitnessPayload = { + event: string; + properties: WitnessProperties; + pageContext?: ContextData; +}; + +function Witness( + config: ResolvedConfig, + event: string, + properties: WitnessProperties, + context?: ContextData +): Promise { + const evt: WitnessPayload = { event: event, properties: properties, }; + if (context) { + evt.pageContext = context; + } + return fetch("/witness", config, { method: "POST", headers: { diff --git a/lib/sdk.test.ts b/lib/sdk.test.ts index 0a6fcbe1..ddcbb219 100644 --- a/lib/sdk.test.ts +++ b/lib/sdk.test.ts @@ -347,6 +347,34 @@ describe("behavior testing of", () => { ); }); + test("witness with pageContext sends context on first call only", async () => { + const fetchSpy = jest.spyOn(window, "fetch"); + const sdk = new OptableSDK({ + ...defaultConfig, + pageContext: { capture: ["url", "referrer"] }, + }); + + // First call should include pageContext + await sdk.witness("firstEvent", {}); + expect(fetchSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + method: "POST", + _bodyText: expect.stringContaining('"pageContext"'), + url: expect.stringContaining("witness"), + }) + ); + + // Second call should NOT include pageContext (already sent) + await sdk.witness("secondEvent", {}); + expect(fetchSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + method: "POST", + _bodyText: '{"event":"secondEvent","properties":{}}', + url: expect.stringContaining("witness"), + }) + ); + }); + test("config has initTargeting true then constructor sends a targeting request", async () => { const fetchSpy = jest.spyOn(window, "fetch"); const sdk = new OptableSDK({ ...defaultConfig, initPassport: false, initTargeting: true }); diff --git a/lib/sdk.ts b/lib/sdk.ts index 098238ed..a64f864c 100644 --- a/lib/sdk.ts +++ b/lib/sdk.ts @@ -3,6 +3,8 @@ import { default as buildInfo } from "./build.json"; import { getConfig } from "./config"; import type { WitnessProperties } from "./edge/witness"; import type { ProfileTraits } from "./edge/profile"; +import type { PageContextConfig, ContextData } from "./core/context"; +import { extractContext, normalizeContextConfig } from "./core/context"; import { Identify } from "./edge/identify"; import { Uid2Token, Uid2TokenResponse } from "./edge/uid2_token"; import { Resolve, ResolveResponse } from "./edge/resolve"; @@ -28,8 +30,12 @@ class OptableSDK { public dcn: ResolvedConfig; protected init: Promise; + private contextSent: boolean = false; + private contextConfig: PageContextConfig | null = null; + constructor(dcn: InitConfig) { this.dcn = getConfig(dcn); + this.contextConfig = normalizeContextConfig(dcn.pageContext); this.init = this.initialize(); } @@ -99,7 +105,18 @@ class OptableSDK { async witness(event: string, properties: WitnessProperties = {}): Promise { await this.init; - return Witness(this.dcn, event, properties); + + let context: ContextData | undefined; + if (this.contextConfig && !this.contextSent) { + context = extractContext(this.contextConfig); + this.contextSent = true; + } + + return Witness(this.dcn, event, properties, context); + } + + resetContext(): void { + this.contextSent = false; } async profile(traits: ProfileTraits, id: string | null = null, neighbors: string[] | null = null): Promise { From f7bb3ab8240c806dcb3f93f0569cb669306809cf Mon Sep 17 00:00:00 2001 From: ab116699 Date: Wed, 11 Feb 2026 11:11:18 -0500 Subject: [PATCH 2/4] feat: includeContext flag per witness call --- lib/sdk.test.ts | 20 +++++++++++++++----- lib/sdk.ts | 8 ++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/sdk.test.ts b/lib/sdk.test.ts index ddcbb219..ce30e0df 100644 --- a/lib/sdk.test.ts +++ b/lib/sdk.test.ts @@ -347,15 +347,25 @@ describe("behavior testing of", () => { ); }); - test("witness with pageContext sends context on first call only", async () => { + test("witness with pageContext requires explicit opt-in", async () => { const fetchSpy = jest.spyOn(window, "fetch"); const sdk = new OptableSDK({ ...defaultConfig, pageContext: { capture: ["url", "referrer"] }, }); - // First call should include pageContext + // Call without includeContext should NOT include pageContext await sdk.witness("firstEvent", {}); + expect(fetchSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + method: "POST", + _bodyText: '{"event":"firstEvent","properties":{}}', + url: expect.stringContaining("witness"), + }) + ); + + // Call with includeContext: true should include pageContext + await sdk.witness("secondEvent", {}, { includeContext: true }); expect(fetchSpy).toHaveBeenLastCalledWith( expect.objectContaining({ method: "POST", @@ -364,12 +374,12 @@ describe("behavior testing of", () => { }) ); - // Second call should NOT include pageContext (already sent) - await sdk.witness("secondEvent", {}); + // Subsequent call with includeContext: true should NOT include pageContext (already sent) + await sdk.witness("thirdEvent", {}, { includeContext: true }); expect(fetchSpy).toHaveBeenLastCalledWith( expect.objectContaining({ method: "POST", - _bodyText: '{"event":"secondEvent","properties":{}}', + _bodyText: '{"event":"thirdEvent","properties":{}}', url: expect.stringContaining("witness"), }) ); diff --git a/lib/sdk.ts b/lib/sdk.ts index a64f864c..e98e9294 100644 --- a/lib/sdk.ts +++ b/lib/sdk.ts @@ -103,11 +103,15 @@ class OptableSDK { return TargetingKeyValues(tdata); } - async witness(event: string, properties: WitnessProperties = {}): Promise { + async witness( + event: string, + properties: WitnessProperties = {}, + options: { includeContext?: boolean } = {} + ): Promise { await this.init; let context: ContextData | undefined; - if (this.contextConfig && !this.contextSent) { + if (options.includeContext && this.contextConfig && !this.contextSent) { context = extractContext(this.contextConfig); this.contextSent = true; } From 7254bd5d7968a259a4c99e0f141ca74539fca2e2 Mon Sep 17 00:00:00 2001 From: ab116699 Date: Wed, 11 Feb 2026 11:17:30 -0500 Subject: [PATCH 3/4] fix: eslint fix --- lib/config.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/config.ts b/lib/config.ts index 3f466053..d9a57c06 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -147,5 +147,14 @@ function generateSessionID(): string { .replace(/=+$/g, ""); } -export type { InitConsent, CMPApiConfig, InitConfig, ResolvedConfig, ABTestConfig, MatcherOverride, Experiment, PageContextConfig }; +export type { + InitConsent, + CMPApiConfig, + InitConfig, + ResolvedConfig, + ABTestConfig, + MatcherOverride, + Experiment, + PageContextConfig, +}; export { getConfig, DCN_DEFAULTS, generateSessionID }; From f24cf884b127e5a68ca9ba3f2b4c30fe14fad1eb Mon Sep 17 00:00:00 2001 From: ab116699 Date: Thu, 12 Feb 2026 12:19:19 -0500 Subject: [PATCH 4/4] fix: address code review comment --- lib/core/context.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/lib/core/context.ts b/lib/core/context.ts index 7b79918d..d4b58d85 100644 --- a/lib/core/context.ts +++ b/lib/core/context.ts @@ -149,15 +149,10 @@ function findContentElement(selector?: string): Element | null { } function extractTextContent(element: Element): string { - // Clone to avoid modifying the DOM - const clone = element.cloneNode(true) as Element; - - // Remove script and style elements - clone.querySelectorAll("script, style, noscript, iframe, svg").forEach((el) => el.remove()); - - // Get text and normalize whitespace - const text = clone.textContent || ""; - return text.replace(/\s+/g, " ").trim(); + if (element instanceof HTMLElement && element.innerText) { + return element.innerText.trim(); + } + return ""; } function extractContext(config: PageContextConfig): ContextData {