Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions scripts/benchmark-search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/usr/bin/env ts-node
/**
* benchmark-search.ts
* ===================
* Benchmarks the course full-text search before and after the GIN-index
* migration by running 200 randomised queries against a real database and
* reporting P50 / P95 / P99 latencies.
*
* Usage (requires a running Postgres with the DB specified in DATABASE_URL):
*
* # Seed 100k courses first (only needed once):
* DATABASE_URL=postgres://... ts-node scripts/benchmark-search.ts --seed
*
* # Run the benchmark (after migration has been applied):
* DATABASE_URL=postgres://... ts-node scripts/benchmark-search.ts
*
* The script will:
* 1. Connect directly via `pg` (no NestJS overhead) to measure raw DB time.
* 2. Run 200 search queries with realistic terms.
* 3. Print P50 / P95 / P99 timings and flag whether P95 < 50 ms (the
* acceptance criterion).
*
* Prerequisites:
* pnpm add -D ts-node pg @types/pg (already in devDependencies)
*/

import { Client } from 'pg';
import { performance } from 'perf_hooks';

// ─── Configuration ────────────────────────────────────────────────────────────

const DATABASE_URL = process.env.DATABASE_URL || 'postgres://postgres:postgres@localhost:5432/teachlink';
const SEED_COUNT = 100_000;
const WARMUP_RUNS = 20;
const BENCHMARK_RUNS = 200;
const P95_BUDGET_MS = 50; // Acceptance criterion

const SEARCH_TERMS = [
'javascript', 'python', 'web development', 'machine learning',
'data science', 'react hooks', 'node js', 'typescript',
'sql database', 'api design', 'docker kubernetes', 'css flexbox',
'vue angular', 'cloud computing', 'agile scrum', 'design patterns',
'functional programming', 'graphql rest', 'security testing', 'devops',
];

// ─── Helpers ──────────────────────────────────────────────────────────────────

function percentile(sorted: number[], p: number): number {
const idx = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, idx)];
}

function randomTerm(): string {
return SEARCH_TERMS[Math.floor(Math.random() * SEARCH_TERMS.length)];
}

async function seedCourses(client: Client): Promise<void> {
console.log(`\nSeeding ${SEED_COUNT.toLocaleString()} courses…`);
const batch = 1000;
for (let i = 0; i < SEED_COUNT; i += batch) {
const values: string[] = [];
const params: unknown[] = [];
let p = 1;
for (let j = 0; j < batch && i + j < SEED_COUNT; j++) {
const n = i + j;
const term = SEARCH_TERMS[n % SEARCH_TERMS.length];
values.push(`($${p++}, $${p++}, $${p++})`);
params.push(
`${term.charAt(0).toUpperCase() + term.slice(1)} Course #${n}`,
`A comprehensive guide to ${term}. Covers fundamentals through advanced topics. Row ${n}.`,
'published',
);
}
await client.query(
`INSERT INTO course (title, description, status)
VALUES ${values.join(', ')}
ON CONFLICT DO NOTHING`,
params,
);
process.stdout.write(`\r ${Math.min(i + batch, SEED_COUNT).toLocaleString()} / ${SEED_COUNT.toLocaleString()}`);
}
console.log('\n Done.');
}

// ─── Benchmark functions ──────────────────────────────────────────────────────

/** Legacy ILIKE path (before migration). */
async function benchmarkIlike(client: Client, term: string): Promise<number> {
const t0 = performance.now();
await client.query(
`SELECT id, title FROM course
WHERE title ILIKE $1 OR description ILIKE $1
LIMIT 20`,
[`%${term}%`],
);
return performance.now() - t0;
}

/** New FTS path (after migration). */
async function benchmarkFts(client: Client, term: string): Promise<number> {
const t0 = performance.now();
await client.query(
`SELECT id, title,
ts_rank(search_vector, plainto_tsquery('english', $1)) AS relevance
FROM course
WHERE search_vector @@ plainto_tsquery('english', $1)
ORDER BY relevance DESC
LIMIT 20`,
[term],
);
return performance.now() - t0;
}

async function runBenchmark(
label: string,
fn: (client: Client, term: string) => Promise<number>,
client: Client,
): Promise<void> {
console.log(`\n── ${label} ──`);

// Warmup
for (let i = 0; i < WARMUP_RUNS; i++) {
await fn(client, randomTerm());
}

const timings: number[] = [];
for (let i = 0; i < BENCHMARK_RUNS; i++) {
timings.push(await fn(client, randomTerm()));
}

timings.sort((a, b) => a - b);
const p50 = percentile(timings, 50);
const p95 = percentile(timings, 95);
const p99 = percentile(timings, 99);
const avg = timings.reduce((s, v) => s + v, 0) / timings.length;

console.log(` Runs : ${BENCHMARK_RUNS}`);
console.log(` Avg : ${avg.toFixed(2)} ms`);
console.log(` P50 : ${p50.toFixed(2)} ms`);
console.log(` P95 : ${p95.toFixed(2)} ms ${p95 < P95_BUDGET_MS ? '✅ under budget' : `❌ OVER ${P95_BUDGET_MS}ms budget`}`);
console.log(` P99 : ${p99.toFixed(2)} ms`);
}

// ─── Entry point ─────────────────────────────────────────────────────────────

async function main() {
const args = process.argv.slice(2);
const seed = args.includes('--seed');
const skipIlike = args.includes('--fts-only');
const skipFts = args.includes('--ilike-only');

console.log('Connecting to:', DATABASE_URL.replace(/:[^:@]+@/, ':***@'));
const client = new Client({ connectionString: DATABASE_URL });
await client.connect();

// Check row count
const { rows: [{ count }] } = await client.query<{ count: string }>('SELECT COUNT(*) FROM course');
console.log(`Courses in DB: ${parseInt(count, 10).toLocaleString()}`);

if (seed) {
await seedCourses(client);
}

if (parseInt(count, 10) < 1000 && !seed) {
console.warn('\n⚠️ Less than 1 000 courses in DB — run with --seed for a realistic benchmark.');
}

// Check if GIN index exists (i.e. migration has been applied)
const { rows: idxRows } = await client.query<{ indexname: string }>(
`SELECT indexname FROM pg_indexes
WHERE tablename = 'course' AND indexname = 'IDX_course_search_vector'`,
);
const ginExists = idxRows.length > 0;
console.log(`GIN index present: ${ginExists ? '✅ yes' : '❌ no (run migration first)'}`);

if (!skipIlike) {
await runBenchmark('BEFORE — ILIKE (sequential scan)', benchmarkIlike, client);
}

if (!skipFts) {
if (!ginExists) {
console.log('\n⚠️ Skipping FTS benchmark — GIN index not found. Apply the migration first.');
} else {
await runBenchmark('AFTER — FTS / GIN index', benchmarkFts, client);
}
}

await client.end();
console.log('\nDone.\n');
}

main().catch((err) => {
console.error(err);
process.exit(1);
});
16 changes: 16 additions & 0 deletions src/courses/entities/course.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,22 @@ export class Course {
@Column({ type: 'text', nullable: true })
submissionNote?: string;

/**
* Pre-built tsvector for full-text search.
* Generated by PostgreSQL (migration 1751200000000-add-course-fts-tsvector),
* indexed with GIN. `select: false` keeps it out of ordinary SELECT *
* queries; the SearchService adds it explicitly when scoring results.
*/
@Column({
type: 'tsvector',
nullable: true,
select: false,
insert: false,
update: false,
name: 'search_vector',
})
searchVector?: string;

@CreateDateColumn()
@Index()
createdAt: Date;
Expand Down
51 changes: 51 additions & 0 deletions src/migrations/1751200000000-add-course-fts-tsvector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { MigrationInterface, QueryRunner } from 'typeorm';

/**
* Adds PostgreSQL full-text search support to the `course` table.
*
* Changes:
* 1. Adds `search_vector` — a `tsvector` GENERATED ALWAYS AS STORED column
* that combines the title (weight A) and description (weight B) using the
* English text search configuration. PostgreSQL automatically keeps the
* column in sync whenever a row is inserted or updated, so no triggers are
* required.
*
* 2. Creates a GIN index `IDX_course_search_vector` on the generated column.
* GIN (Generalized Inverted Index) is the canonical index type for
* tsvector columns and enables sub-millisecond @@ lookups even on tables
* with millions of rows.
*
* Requires PostgreSQL ≥ 12 (generated stored columns).
*
* Performance expectation:
* Before: ILIKE '%query%' → sequential scan, O(n) per request.
* After: search_vector @@ plainto_tsquery(...) → GIN bitmap scan, O(log n + k).
*/
export class AddCourseFtsTsvector1751200000000 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
// 1. Add the tsvector generated column.
// - setweight('A') on title gives it higher relevance rank than
// description (weight 'B').
// - coalesce() prevents NULL from crashing the expression.
// - The column is STORED: computed once on write, not on every read.
await queryRunner.query(`
ALTER TABLE "course"
ADD COLUMN IF NOT EXISTS "search_vector" tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(description, '')), 'B')
) STORED
`);

// 2. Create the GIN index. IF NOT EXISTS makes the migration idempotent.
await queryRunner.query(`
CREATE INDEX IF NOT EXISTS "IDX_course_search_vector"
ON "course" USING GIN ("search_vector")
`);
}

public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query('DROP INDEX IF EXISTS "IDX_course_search_vector"');
await queryRunner.query('ALTER TABLE "course" DROP COLUMN IF EXISTS "search_vector"');
}
}
Loading
Loading