From 90d2646fa4854f8b85228ef37bb562b276eda767 Mon Sep 17 00:00:00 2001 From: Ciocanel Razvan Date: Tue, 7 Apr 2026 15:24:52 +0300 Subject: [PATCH] fix: robust pdfjs-dist worker resolution + Docker deployment support - Fix pdfjs-dist worker resolution using createRequire() instead of relative imports, which break in bundled/Docker environments - Add explicit worker options (useWorkerFetch, isEvalSupported, useSystemFonts) for reliable PDF parsing - Add web app Dockerfile with multi-stage build (alpine, standalone output) - Add ingestion-worker Dockerfile improvements - Add .dockerignore and docker-compose.prod.yml for self-hosted deployment --- .dockerignore | 22 +++++++ apps/web/Dockerfile | 58 ++++++++++++++++ apps/web/next.config.ts | 15 +++-- docker-compose.prod.yml | 84 ++++++++++++++++++++++++ services/ingestion-worker/Dockerfile | 1 + services/ingestion-worker/src/lib/pdf.ts | 32 +++++---- 6 files changed, 194 insertions(+), 18 deletions(-) create mode 100644 .dockerignore create mode 100644 apps/web/Dockerfile create mode 100644 docker-compose.prod.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1c41626 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,22 @@ +node_modules +.pnpm-store +.next +.turbo +dist +coverage +.git +.env +.env.local +.env.*.local +*.log +*.tsbuildinfo +local-blobs +test-parse.* +e2e/test-results +e2e/report +e2e/.auth +.DS_Store +.vscode +.idea +.claude +docs diff --git a/apps/web/Dockerfile b/apps/web/Dockerfile new file mode 100644 index 0000000..0ec815d --- /dev/null +++ b/apps/web/Dockerfile @@ -0,0 +1,58 @@ +FROM node:20-alpine AS base + +RUN corepack enable && corepack prepare pnpm@9.15.4 --activate + +WORKDIR /app + +# Copy workspace root files +COPY package.json pnpm-workspace.yaml pnpm-lock.yaml* turbo.json tsconfig.json ./ + +# Copy all package.json files for dependency resolution +COPY apps/web/package.json apps/web/ +COPY packages/common/package.json packages/common/ +COPY packages/database/package.json packages/database/ +COPY packages/blob-storage/package.json packages/blob-storage/ +COPY packages/ai/package.json packages/ai/ +COPY packages/ingestion/package.json packages/ingestion/ +COPY packages/events/package.json packages/events/ +COPY packages/sharing/package.json packages/sharing/ + +# Install dependencies (shamefully-hoist for Next.js bundler compatibility) +RUN echo "shamefully-hoist=true" > .npmrc && pnpm install --frozen-lockfile + +# Copy all source +COPY packages/ packages/ +COPY apps/web/ apps/web/ + +# Copy root .env.example as build-time .env (real values come at runtime) +COPY .env.example .env + +# Build the Next.js app via turbo (resolves workspace deps correctly) +RUN pnpm turbo build --filter=@openvitals/web + +# Production stage +FROM node:20-alpine AS runner +WORKDIR /app + +ENV NODE_ENV=production +ENV NEXT_TELEMETRY_DISABLED=1 + +RUN addgroup --system --gid 1001 nodejs && \ + adduser --system --uid 1001 nextjs + +# Create blob directory with correct ownership +RUN mkdir -p /data/blobs && chown nextjs:nodejs /data/blobs + +# Copy standalone output +COPY --from=base /app/apps/web/.next/standalone ./ +COPY --from=base /app/apps/web/.next/static ./apps/web/.next/static +COPY --from=base /app/apps/web/public ./apps/web/public + +USER nextjs + +EXPOSE 3000 + +ENV PORT=3000 +ENV HOSTNAME="0.0.0.0" + +CMD ["node", "apps/web/server.js"] diff --git a/apps/web/next.config.ts b/apps/web/next.config.ts index 236ca86..1b3e415 100644 --- a/apps/web/next.config.ts +++ b/apps/web/next.config.ts @@ -1,13 +1,14 @@ -import type { NextConfig } from 'next'; +import type { NextConfig } from "next"; const nextConfig: NextConfig = { + output: "standalone", transpilePackages: [ - '@openvitals/common', - '@openvitals/database', - '@openvitals/blob-storage', - '@openvitals/ai', - '@openvitals/events', - '@openvitals/sharing', + "@openvitals/common", + "@openvitals/database", + "@openvitals/blob-storage", + "@openvitals/ai", + "@openvitals/events", + "@openvitals/sharing", ], }; diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..2bb5e33 --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,84 @@ +services: + postgres: + image: postgres:16-alpine + restart: unless-stopped + environment: + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB:-openvitals} + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] + interval: 10s + timeout: 5s + retries: 5 + + web: + build: + context: . + dockerfile: apps/web/Dockerfile + restart: unless-stopped + ports: + - "${WEB_PORT:-3000}:3000" + environment: + DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-openvitals} + BETTER_AUTH_SECRET: ${BETTER_AUTH_SECRET} + BETTER_AUTH_URL: ${BETTER_AUTH_URL:-http://localhost:3000} + NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL:-http://localhost:3000} + OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} + AI_DEFAULT_MODEL: ${AI_DEFAULT_MODEL:-google/gemini-2.5-flash} + BLOB_STORAGE_PROVIDER: local + LOCAL_BLOB_DIR: /data/blobs + RENDER_WORKER_URL: http://worker:4000 + RENDER_WEBHOOK_SECRET: ${RENDER_WEBHOOK_SECRET} + ENCRYPTION_KEY: ${ENCRYPTION_KEY} + NODE_ENV: production + volumes: + - blobdata:/data/blobs + depends_on: + postgres: + condition: service_healthy + migrate: + condition: service_completed_successfully + + worker: + build: + context: . + dockerfile: services/ingestion-worker/Dockerfile + restart: unless-stopped + environment: + DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-openvitals} + OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} + AI_DEFAULT_MODEL: ${AI_DEFAULT_MODEL:-google/gemini-2.5-flash} + BLOB_STORAGE_PROVIDER: local + LOCAL_BLOB_DIR: /data/blobs + RENDER_WEBHOOK_SECRET: ${RENDER_WEBHOOK_SECRET} + NODE_ENV: production + volumes: + - blobdata:/data/blobs + depends_on: + postgres: + condition: service_healthy + migrate: + condition: service_completed_successfully + + # Run migrations and seed on deploy (one-shot, exits after completion) + migrate: + build: + context: . + dockerfile: apps/web/Dockerfile + target: base + restart: "no" + environment: + DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-openvitals} + working_dir: /app + command: > + sh -c "cd packages/database && pnpm db:migrate" + depends_on: + postgres: + condition: service_healthy + +volumes: + pgdata: + blobdata: diff --git a/services/ingestion-worker/Dockerfile b/services/ingestion-worker/Dockerfile index e08b229..2d2c400 100644 --- a/services/ingestion-worker/Dockerfile +++ b/services/ingestion-worker/Dockerfile @@ -35,6 +35,7 @@ COPY --from=base /app/services/ingestion-worker/dist ./dist COPY --from=base /app/services/ingestion-worker/package.json ./package.json COPY --from=base /app/node_modules/.pnpm/pdfjs-dist@*/node_modules/pdfjs-dist ./node_modules/pdfjs-dist + EXPOSE 4000 CMD ["node", "dist/server.js"] diff --git a/services/ingestion-worker/src/lib/pdf.ts b/services/ingestion-worker/src/lib/pdf.ts index c4ef5b6..0307300 100644 --- a/services/ingestion-worker/src/lib/pdf.ts +++ b/services/ingestion-worker/src/lib/pdf.ts @@ -14,17 +14,27 @@ interface TextItem { * with their column headers in table-based documents like lab trend reports. */ export async function extractTextFromPdf(buffer: Buffer): Promise { - const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs'); - const doc = await getDocument({ data: new Uint8Array(buffer) }).promise; + const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs"); + // Resolve the worker file from node_modules (not relative to this source file) + const { createRequire } = await import("module"); + const require = createRequire(import.meta.url); + pdfjs.GlobalWorkerOptions.workerSrc = + require.resolve("pdfjs-dist/legacy/build/pdf.worker.mjs"); + const doc = await pdfjs.getDocument({ + data: new Uint8Array(buffer), + useWorkerFetch: false, + isEvalSupported: false, + useSystemFonts: true, + }).promise; - let text = ''; + let text = ""; for (let i = 1; i <= doc.numPages; i++) { const page = await doc.getPage(i); const content = await page.getTextContent(); const items = content.items as TextItem[]; if (items.length === 0) { - text += '\n'; + text += "\n"; continue; } @@ -34,7 +44,7 @@ export async function extractTextFromPdf(buffer: Buffer): Promise { const rows: { y: number; items: { x: number; str: string }[] }[] = []; for (const item of items) { - if (!item.str.trim() && !item.str.includes(' ')) continue; + if (!item.str.trim() && !item.str.includes(" ")) continue; const x = item.transform[4]!; const y = item.transform[5]!; @@ -54,23 +64,23 @@ export async function extractTextFromPdf(buffer: Buffer): Promise { row.items.sort((a, b) => a.x - b.x); // Insert tab separators when there's a significant horizontal gap - let line = ''; + let line = ""; let prevEnd = -Infinity; for (const item of row.items) { const gap = item.x - prevEnd; if (prevEnd > -Infinity && gap > 15) { - line += '\t'; + line += "\t"; } else if (prevEnd > -Infinity && gap > 2) { - line += ' '; + line += " "; } line += item.str; // Estimate end position: x + approximate character width - prevEnd = item.x + (item.str.length * 5); + prevEnd = item.x + item.str.length * 5; } - text += line + '\n'; + text += line + "\n"; } - text += '\n'; + text += "\n"; } doc.destroy();