From bacdb024167f9ed44a0055e23d50677d96b3ffba Mon Sep 17 00:00:00 2001 From: Artem Niehrieiev Date: Tue, 2 Jun 2026 12:26:26 +0000 Subject: [PATCH] feat: add read-only validation for MongoDB aggregation pipelines and corresponding tests --- backend/src/ai-core/tools/query-validators.ts | 47 +++++++++++++++++ ...est-info-from-table-with-ai-v7.use.case.ts | 13 ++++- .../data-structures/found-user-info.ro.ts | 2 +- .../saas-microservice/saas.module.ts | 1 + .../utils/build-found-user-info-ro.ts | 4 +- ...-mongo-pipeline-readonly-validator.test.ts | 52 +++++++++++++++++++ 6 files changed, 115 insertions(+), 4 deletions(-) create mode 100644 backend/test/ava-tests/non-saas-tests/non-saas-mongo-pipeline-readonly-validator.test.ts diff --git a/backend/src/ai-core/tools/query-validators.ts b/backend/src/ai-core/tools/query-validators.ts index c088144a1..a4870e847 100644 --- a/backend/src/ai-core/tools/query-validators.ts +++ b/backend/src/ai-core/tools/query-validators.ts @@ -28,6 +28,53 @@ export function isValidSQLQuery(query: string): boolean { return true; } +// Aggregation operators that either write to a collection ($out, $merge) or execute +// server-side JavaScript ($function, $accumulator, $where). None of these belong in a +// read-only AI query, and the substring blocklist in `isValidMongoDbCommand` cannot +// detect them, so they must be rejected by walking the parsed pipeline. +const FORBIDDEN_MONGO_OPERATORS: ReadonlySet = new Set([ + '$out', + '$merge', + '$function', + '$accumulator', + '$where', +]); + +function pipelineContainsForbiddenOperator(node: unknown): boolean { + if (Array.isArray(node)) { + return node.some((item) => pipelineContainsForbiddenOperator(item)); + } + if (!node || typeof node !== 'object') { + return false; + } + for (const [key, value] of Object.entries(node as Record)) { + if (FORBIDDEN_MONGO_OPERATORS.has(key)) { + return true; + } + if (pipelineContainsForbiddenOperator(value)) { + return true; + } + } + return false; +} + +/** + * Ensures a MongoDB aggregation pipeline is read-only: it must parse as JSON and contain + * no write stages (`$out`, `$merge`) or server-side JavaScript operators (`$function`, + * `$accumulator`, `$where`) at any nesting depth (including `$lookup` sub-pipelines). This + * AST-level check complements the substring-based `isValidMongoDbCommand`, which cannot + * detect these stages. Returns false for unparseable pipelines (fail-closed). + */ +export function isReadOnlyMongoAggregationPipeline(pipeline: string): boolean { + let parsedPipeline: unknown; + try { + parsedPipeline = JSON.parse(pipeline); + } catch { + return false; + } + return !pipelineContainsForbiddenOperator(parsedPipeline); +} + export function isValidMongoDbCommand(command: string): boolean { const upperCaseCommand = command.toUpperCase(); const forbiddenKeywords = ['DROP', 'REMOVE', 'UPDATE', 'INSERT', 'DELETE']; diff --git a/backend/src/entities/ai/use-cases/request-info-from-table-with-ai-v7.use.case.ts b/backend/src/entities/ai/use-cases/request-info-from-table-with-ai-v7.use.case.ts index 72efeccf6..d1bc6106e 100644 --- a/backend/src/entities/ai/use-cases/request-info-from-table-with-ai-v7.use.case.ts +++ b/backend/src/entities/ai/use-cases/request-info-from-table-with-ai-v7.use.case.ts @@ -21,7 +21,12 @@ import { collectMongoPipelineCollections } from '../../../ai-core/tools/collect- import { createDatabaseTools } from '../../../ai-core/tools/database-tools.js'; import { searchDocumentation } from '../../../ai-core/tools/documentation-search.js'; import { createDatabaseQuerySystemPrompt } from '../../../ai-core/tools/prompts.js'; -import { isValidMongoDbCommand, isValidSQLQuery, wrapQueryWithLimit } from '../../../ai-core/tools/query-validators.js'; +import { + isReadOnlyMongoAggregationPipeline, + isValidMongoDbCommand, + isValidSQLQuery, + wrapQueryWithLimit, +} from '../../../ai-core/tools/query-validators.js'; import { MessageBuilder } from '../../../ai-core/utils/message-builder.js'; import { encodeError, encodeToToon } from '../../../ai-core/utils/toon-encoder.js'; import AbstractUseCase from '../../../common/abstract-use.case.js'; @@ -298,6 +303,12 @@ export class RequestInfoFromTableWithAIUseCaseV7 'Invalid MongoDB command. Please ensure it is a read-only aggregation pipeline without any forbidden keywords.', ); } + if (!isReadOnlyMongoAggregationPipeline(pipeline)) { + throw new Error( + 'Invalid MongoDB command. Aggregation stages that write data ($out, $merge) or execute ' + + 'server-side JavaScript ($function, $accumulator, $where) are not allowed.', + ); + } await this.assertUserCanReadPipelineCollections( pipeline, inputTableName, diff --git a/backend/src/microservices/saas-microservice/data-structures/found-user-info.ro.ts b/backend/src/microservices/saas-microservice/data-structures/found-user-info.ro.ts index 886f45093..d8d94173f 100644 --- a/backend/src/microservices/saas-microservice/data-structures/found-user-info.ro.ts +++ b/backend/src/microservices/saas-microservice/data-structures/found-user-info.ro.ts @@ -1,5 +1,5 @@ import { UserEntity } from '../../../entities/user/user.entity.js'; type DataKeys = { [K in keyof T]: T[K] extends (...args: never[]) => unknown ? never : K }[keyof T]; -export type FoundUserInfoRO = Omit>, 'password'>; +export type FoundUserInfoRO = Omit>, 'password' | 'otpSecretKey'>; export type FoundUserInfoWithoutCompanyRO = Omit; diff --git a/backend/src/microservices/saas-microservice/saas.module.ts b/backend/src/microservices/saas-microservice/saas.module.ts index 40d367ff0..c14a12a7c 100644 --- a/backend/src/microservices/saas-microservice/saas.module.ts +++ b/backend/src/microservices/saas-microservice/saas.module.ts @@ -122,6 +122,7 @@ export class SaasModule { .forRoutes( { path: 'saas/company/registered', method: RequestMethod.POST }, { path: 'saas/user/:userId', method: RequestMethod.GET }, + { path: 'saas/users/email/:userEmail', method: RequestMethod.GET }, { path: 'saas/user/register', method: RequestMethod.POST }, { path: 'saas/user/demo/register', method: RequestMethod.POST }, { path: 'saas/user/google/login', method: RequestMethod.POST }, diff --git a/backend/src/microservices/saas-microservice/utils/build-found-user-info-ro.ts b/backend/src/microservices/saas-microservice/utils/build-found-user-info-ro.ts index 46c1eac33..35d2a94c3 100644 --- a/backend/src/microservices/saas-microservice/utils/build-found-user-info-ro.ts +++ b/backend/src/microservices/saas-microservice/utils/build-found-user-info-ro.ts @@ -2,11 +2,11 @@ import { UserEntity } from '../../../entities/user/user.entity.js'; import { FoundUserInfoRO, FoundUserInfoWithoutCompanyRO } from '../data-structures/found-user-info.ro.js'; export function buildFoundUserInfoRO(user: UserEntity): FoundUserInfoRO { - const { password: _password, ...userInfo } = user; + const { password: _password, otpSecretKey: _otpSecretKey, ...userInfo } = user; return userInfo; } export function buildFoundUserInfoWithoutCompanyRO(user: UserEntity): FoundUserInfoWithoutCompanyRO { - const { password: _password, company: _company, ...userInfo } = user; + const { password: _password, company: _company, otpSecretKey: _otpSecretKey, ...userInfo } = user; return userInfo; } diff --git a/backend/test/ava-tests/non-saas-tests/non-saas-mongo-pipeline-readonly-validator.test.ts b/backend/test/ava-tests/non-saas-tests/non-saas-mongo-pipeline-readonly-validator.test.ts new file mode 100644 index 000000000..f36236397 --- /dev/null +++ b/backend/test/ava-tests/non-saas-tests/non-saas-mongo-pipeline-readonly-validator.test.ts @@ -0,0 +1,52 @@ +import test from 'ava'; +import { isReadOnlyMongoAggregationPipeline } from '../../../src/ai-core/tools/query-validators.js'; + +test('allows a plain read-only pipeline', (t) => { + t.true(isReadOnlyMongoAggregationPipeline('[{"$match":{"status":"active"}},{"$group":{"_id":"$type"}}]')); +}); + +test('allows a $lookup read pipeline', (t) => { + t.true( + isReadOnlyMongoAggregationPipeline( + '[{"$lookup":{"from":"orders","localField":"id","foreignField":"user_id","as":"o"}},{"$unwind":"$o"}]', + ), + ); +}); + +test('rejects $out (collection overwrite)', (t) => { + t.false(isReadOnlyMongoAggregationPipeline('[{"$match":{}},{"$limit":0},{"$out":"users"}]')); +}); + +test('rejects $merge (collection write)', (t) => { + t.false(isReadOnlyMongoAggregationPipeline('[{"$merge":{"into":"users","whenMatched":"replace"}}]')); +}); + +test('rejects $where (server-side JS)', (t) => { + t.false(isReadOnlyMongoAggregationPipeline('[{"$match":{"$where":"sleep(10000) || true"}}]')); +}); + +test('rejects $function (server-side JS)', (t) => { + t.false( + isReadOnlyMongoAggregationPipeline( + '[{"$addFields":{"x":{"$function":{"body":"function(){return 1;}","args":[],"lang":"js"}}}}]', + ), + ); +}); + +test('rejects $accumulator (server-side JS)', (t) => { + t.false( + isReadOnlyMongoAggregationPipeline( + '[{"$group":{"_id":"$k","v":{"$accumulator":{"init":"function(){return 0}","accumulate":"function(){}","accumulateArgs":[],"merge":"function(){}","lang":"js"}}}}]', + ), + ); +}); + +test('rejects a write stage nested inside a $lookup sub-pipeline', (t) => { + t.false( + isReadOnlyMongoAggregationPipeline('[{"$lookup":{"from":"orders","as":"o","pipeline":[{"$out":"stolen"}]}}]'), + ); +}); + +test('returns false (fail-closed) for an unparseable pipeline', (t) => { + t.false(isReadOnlyMongoAggregationPipeline('not valid json {')); +});