Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions app/api/annotations/[slug]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
type AnnotationStatus,
type AnnotationType
} from "@/lib/annotations";
import { isFieldPathSafe } from "@/lib/safe-path";

export const dynamic = "force-dynamic";

Expand Down Expand Up @@ -36,6 +37,16 @@ export async function POST(
if (!field_path) {
return NextResponse.json({ error: "field_path required" }, { status: 400 });
}
// field_path is later fed to lib/pipeline/extraction-paths.ts:setField, which
// walks the extraction.json object graph and writes at the final segment.
// Reject paths whose shape would land a write on Object.prototype — see
// lib/safe-path.ts and the spot-fix accept flow.
if (!isFieldPathSafe(field_path)) {
return NextResponse.json(
{ error: "field_path malformed or contains forbidden segment" },
{ status: 400 }
);
}
if (type !== "flag" && type !== "note") {
return NextResponse.json({ error: "type must be 'flag' or 'note'" }, { status: 400 });
}
Expand Down
94 changes: 70 additions & 24 deletions lib/pipeline/extraction-paths.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,37 @@
* the path resolves to such a wrapped scalar, getField returns the wrapper
* and unwrapValue returns just the value. setField writes to `wrapper.value`
* and replaces `wrapper.evidence` with the supplied evidence (per skill rules).
*
* Security note: field_path strings ultimately come from the /api/annotations
* route and from spotfix_runs rows. Without the guards in lib/safe-path.ts,
* a segment like `__proto__` would let setField write to Object.prototype
* — a process-wide prototype-pollution primitive. Every helper here routes
* through tokenizePathSafe and treats malformed / forbidden paths as
* benign no-ops (read helpers) or typed failure results (setField).
*/
import fs from "node:fs";
import path from "node:path";
import {
FORBIDDEN_SEGMENTS,
tokenizePathSafe,
type Tokens as SafeTokens,
} from "@/lib/safe-path";

export type Tokens = (string | number)[];
export type Tokens = SafeTokens;

/** Parse a field_path like `cpu_skus[3].tdp_w` or `a.b.0.c` into tokens. */
/**
* Parse a field_path like `cpu_skus[3].tdp_w` or `a.b.0.c` into tokens.
*
* Throws on empty input, malformed shape, or a forbidden segment
* (`__proto__` / `prototype` / `constructor`). Callers that need a
* non-throwing form should use `tokenizePathSafe` from `@/lib/safe-path`.
*/
export function tokenizePath(fieldPath: string): Tokens {
const out: Tokens = [];
// Normalize bracket notation `[3]` → `.3` then split on dots.
const normalized = fieldPath.replace(/\[(\d+)\]/g, ".$1");
for (const seg of normalized.split(".")) {
if (seg === "") continue;
if (/^\d+$/.test(seg)) out.push(Number(seg));
else out.push(seg);
const result = tokenizePathSafe(fieldPath);
if (!result.ok) {
throw new Error(result.reason);
}
return out;
return result.tokens;
}

/** Walk to the parent of the final path segment. Returns null if the parent doesn't exist. */
Expand All @@ -38,17 +52,23 @@ export function getParent(root: any, tokens: Tokens): { parent: any; key: string
for (let i = 0; i < tokens.length - 1; i++) {
const k = tokens[i];
if (cur == null) return null;
// Belt-and-suspenders: tokenizePath already rejects forbidden segments,
// but if a caller bypasses tokenizePath and constructs tokens directly
// we still refuse to descend into prototype-shaped keys.
if (typeof k === "string" && FORBIDDEN_SEGMENTS.has(k)) return null;
cur = (cur as any)[k as any];
}
if (cur == null) return null;
return { parent: cur, key: tokens[tokens.length - 1] };
const finalKey = tokens[tokens.length - 1];
if (typeof finalKey === "string" && FORBIDDEN_SEGMENTS.has(finalKey)) return null;
return { parent: cur, key: finalKey };
}

/** Returns the raw value at the path (the wrapper if wrapped); null if unresolved. */
/** Returns the raw value at the path (the wrapper if wrapped); undefined if unresolved or unsafe. */
export function getField(root: any, fieldPath: string): any {
const tokens = tokenizePath(fieldPath);
if (tokens.length === 0) return undefined;
const parent = getParent(root, tokens);
const tokenized = tokenizePathSafe(fieldPath);
if (!tokenized.ok) return undefined;
const parent = getParent(root, tokenized.tokens);
if (!parent) return undefined;
return (parent.parent as any)[parent.key as any];
}
Expand All @@ -61,14 +81,25 @@ export function unwrapValue(field: any): any {
return field;
}

/** Returns true if a field at this path resolves in the extraction object. */
/**
* Returns true if a field at this path resolves in the extraction object.
* Unsafe or malformed paths resolve to false (they don't throw).
*
* Uses `Object.hasOwn` rather than `in` so the check cannot accidentally
* walk into the prototype chain even on a path that escaped tokenizePathSafe.
*/
export function pathResolves(root: any, fieldPath: string): boolean {
const tokens = tokenizePath(fieldPath);
if (tokens.length === 0) return false;
const tokenized = tokenizePathSafe(fieldPath);
if (!tokenized.ok) return false;
let cur: any = root;
for (const k of tokens) {
for (const k of tokenized.tokens) {
if (cur == null) return false;
if (!(typeof cur === "object" && (k as any) in cur)) return false;
if (typeof cur !== "object") return false;
if (typeof k === "number") {
if (!Array.isArray(cur) || k < 0 || k >= cur.length) return false;
} else if (!Object.prototype.hasOwnProperty.call(cur, k)) {
return false;
}
cur = cur[k as any];
}
return true;
Expand All @@ -88,11 +119,17 @@ export function setField(
newValue: any,
options: { evidence?: any; spotFix?: any } = {}
): { ok: true; before: any } | { ok: false; reason: string } {
const tokens = tokenizePath(fieldPath);
if (tokens.length === 0) return { ok: false, reason: "empty field_path" };
const parent = getParent(root, tokens);
const tokenized = tokenizePathSafe(fieldPath);
if (!tokenized.ok) return { ok: false, reason: tokenized.reason };
const parent = getParent(root, tokenized.tokens);
if (!parent) return { ok: false, reason: "parent path does not resolve" };
const key = parent.key;
// getParent already rejects forbidden final keys, but check once more
// before the write — this is the single line that would otherwise be the
// prototype-pollution sink.
if (typeof key === "string" && FORBIDDEN_SEGMENTS.has(key)) {
return { ok: false, reason: `forbidden final segment: ${key}` };
}
const cur = (parent.parent as any)[key as any];
const before = cur === undefined ? null : JSON.parse(JSON.stringify(cur));

Expand All @@ -113,7 +150,16 @@ export function setField(
// Otherwise the whole subtree is replaced. Spot-fix on a whole row is rare;
// skill says "flag on a row means something in this row is wrong", but the
// agent typically targets a sub-cell. Replace wholesale here.
(parent.parent as any)[key as any] = newValue;
//
// Use defineProperty rather than `[key] = ...` so even an unguarded key
// (which shouldn't reach here) cannot replace a prototype slot — an own,
// writable, enumerable data property always shadows any prototype key.
Object.defineProperty(parent.parent, key as PropertyKey, {
value: newValue,
writable: true,
enumerable: true,
configurable: true,
});
return { ok: true, before };
}

Expand Down
98 changes: 98 additions & 0 deletions lib/safe-path.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Safe field-path parsing for object-graph access into extraction.json.
*
* extraction.json field paths follow the spot-fix-extraction skill convention:
* - dotted: a.b.c
* - bracket: a[3].b
* - mixed: a.3.b
* - nested: vendor_extensions.lenovo.machine_types
*
* Field paths flow in from /api/annotations and from spotfix_runs rows into
* lib/pipeline/extraction-paths.ts:setField, which walks the object graph
* and writes at the final segment. Without the guards in this module the
* write can land on Object.prototype (segments `__proto__` / `prototype` /
* `constructor`) — a process-wide prototype-pollution primitive.
*
* Two layers of defense, used by both the API boundary and the path helpers:
*
* 1. FORBIDDEN_SEGMENTS — explicit deny-list of segments that map onto
* Object.prototype's surface. Any path containing one is rejected.
* 2. FIELD_PATH_RE — positive shape regex. Even if a future Node release
* adds a new prototype-shaped key, paths that don't match the strict
* identifier / array-index grammar are still rejected.
*
* The two layers compose; either alone is insufficient.
*/

/**
* Property names that map onto Object.prototype's surface and must never be
* used as field-path segments. Reads and writes through these segments are
* both rejected.
*/
export const FORBIDDEN_SEGMENTS: ReadonlySet<string> = new Set([
"__proto__",
"prototype",
"constructor",
]);

/**
* Strict shape for a field_path:
* - first segment: identifier `[A-Za-z_][A-Za-z0-9_-]*`
* - subsequent: `.identifier` | `[digits]` | `.digits`
*
* Hyphens are allowed inside identifiers because some schemas use them
* (e.g. `dimm-slots`). Digits-only segments index into arrays.
*/
export const FIELD_PATH_RE: RegExp =
/^[A-Za-z_][A-Za-z0-9_-]*(\.[A-Za-z_][A-Za-z0-9_-]*|\[\d+\]|\.\d+)*$/;

export type Tokens = (string | number)[];

export type TokenizeResult =
| { ok: true; tokens: Tokens }
| { ok: false; reason: string };

/**
* Tokenize a field_path safely. Returns `{ ok: false, reason }` for empty
* input, malformed shape, or any forbidden segment. Never throws.
*
* The forbidden-segment check fires on EVERY segment, not just the final
* one — so a path like `a.__proto__.b` is rejected even though the actual
* pollution write would only land if `__proto__` were the parent of the
* final segment. Defense in depth: also blocks read-side leaks via
* getField / pathResolves.
*/
export function tokenizePathSafe(fieldPath: unknown): TokenizeResult {
if (typeof fieldPath !== "string" || fieldPath.length === 0) {
return { ok: false, reason: "field_path must be a non-empty string" };
}
if (!FIELD_PATH_RE.test(fieldPath)) {
return { ok: false, reason: `malformed field_path: ${fieldPath}` };
}
const out: Tokens = [];
const normalized = fieldPath.replace(/\[(\d+)\]/g, ".$1");
for (const seg of normalized.split(".")) {
if (seg === "") continue;
if (FORBIDDEN_SEGMENTS.has(seg)) {
return {
ok: false,
reason: `forbidden path segment: ${seg}`,
};
}
if (/^\d+$/.test(seg)) out.push(Number(seg));
else out.push(seg);
}
if (out.length === 0) {
return { ok: false, reason: "no usable segments" };
}
return { ok: true, tokens: out };
}

/**
* Boolean shortcut for the API boundary — true iff the field_path passes
* the deny-list AND the shape regex. Equivalent to
* `tokenizePathSafe(fieldPath).ok`.
*/
export function isFieldPathSafe(fieldPath: unknown): boolean {
return tokenizePathSafe(fieldPath).ok;
}