From 6b3fd6398ad298ab338531059f0b69216a3d09e4 Mon Sep 17 00:00:00 2001 From: xubinrui <1462311339@qq.com> Date: Thu, 9 Apr 2026 21:42:11 +0800 Subject: [PATCH 1/4] new transferagent --- .gitignore | 2 + apps/backend/src/index.js | 1 + apps/backend/src/routes/projects.js | 8 +- apps/backend/src/routes/transfer.js | 390 ++++- apps/backend/src/services/compileService.js | 225 ++- apps/backend/src/services/projectService.js | 7 +- .../src/services/transferAgent/fsTools.js | 30 + .../transferAgent/graphMineruAgent.js | 74 + .../services/transferAgent/graphNeurips.js | 87 + .../transferAgent/graphNeuripsAgent.js | 87 + .../services/transferAgent/llmUnifiedDiff.js | 281 ++++ .../services/transferAgent/neuripsRules.js | 65 + .../transferAgent/nodes/agentGenerator.js | 182 +++ .../transferAgent/nodes/agentPlanner.js | 224 +++ .../transferAgent/nodes/agentReviewer.js | 224 +++ .../transferAgent/nodes/analyzeSource.js | 57 +- .../transferAgent/nodes/analyzeTarget.js | 8 +- .../transferAgent/nodes/applyTransfer.js | 4 +- .../services/transferAgent/nodes/compile.js | 12 +- .../transferAgent/nodes/compileSource.js | 10 +- .../transferAgent/nodes/copyAssets.js | 35 +- .../services/transferAgent/nodes/draftPlan.js | 45 +- .../services/transferAgent/nodes/finalize.js | 48 +- .../transferAgent/nodes/fixCompile.js | 29 +- .../services/transferAgent/nodes/fixLayout.js | 6 + .../nodes/neurips/applyBibliography.js | 72 + .../transferAgent/nodes/neurips/applyBody.js | 74 + .../nodes/neurips/applyPreamble.js | 107 ++ .../nodes/neurips/blindConfirmBypass.js | 10 + .../nodes/neurips/consumeConfirmBlind.js | 13 + .../nodes/neurips/consumeConfirmPlan.js | 18 + .../transferAgent/nodes/neurips/intake.js | 16 + .../nodes/neurips/normalizeFigures.js | 313 ++++ .../nodes/neurips/policyCheck.js | 111 ++ .../nodes/neurips/prepareConfirmBlind.js | 47 + .../nodes/neurips/prepareConfirmPlan.js | 58 + .../nodes/neurips/sanitizeBlind.js | 71 + .../nodes/neurips/verifyBuild.js | 35 + .../services/transferAgent/progressMeta.js | 36 + .../src/services/transferAgent/skills/icml.js | 121 ++ .../services/transferAgent/skills/index.js | 34 + .../services/transferAgent/skills/neurips.js | 134 ++ .../transferAgent/skills/reviewerChecklist.js | 130 ++ .../src/services/transferAgent/state.js | 53 + .../services/transferAgent/tools/applyDiff.js | 52 + .../services/transferAgent/tools/copyAsset.js | 55 + .../services/transferAgent/tools/grepFile.js | 99 ++ .../src/services/transferAgent/tools/index.js | 77 + .../transferAgent/tools/listProjectTree.js | 42 + .../transferAgent/tools/measureFigures.js | 241 +++ .../transferAgent/tools/raiseQuestion.js | 58 + .../services/transferAgent/tools/readFile.js | 47 + .../services/transferAgent/tools/writeFile.js | 35 + .../transferAgent/transferDebugLog.js | 81 + .../transferAgent/transferNodeError.js | 25 + .../src/services/transferAgent/utils.js | 72 + apps/frontend/src/api/client.ts | 157 +- apps/frontend/src/app/App.css | 252 ++- apps/frontend/src/app/ProjectPage.tsx | 233 ++- apps/frontend/src/app/TransferPanel.tsx | 470 +++++- apps/frontend/src/vite-env.d.ts | 5 + apps/frontend/vite.config.ts | 32 +- icml/icml.md | 63 + neurips/checklist.tex | 251 +++ neurips/neurips.md | 421 +++++ neurips/neurips_2026.sty | 439 +++++ neurips/neurips_paper_template.tex | 86 + templates/icml/algorithm.sty | 79 + templates/icml/algorithmic.sty | 201 +++ templates/icml/fancyhdr.sty | 864 ++++++++++ templates/icml/icml2026.bst | 1443 +++++++++++++++++ templates/icml/icml2026.sty | 767 +++++++++ templates/icml/main.tex | 76 +- templates/manifest.json | 6 +- templates/neurips/checklist.tex | 251 +++ templates/neurips/main.tex | 83 +- templates/neurips/neurips_2026.sty | 439 +++++ 77 files changed, 10882 insertions(+), 214 deletions(-) create mode 100644 apps/backend/src/services/transferAgent/fsTools.js create mode 100644 apps/backend/src/services/transferAgent/graphMineruAgent.js create mode 100644 apps/backend/src/services/transferAgent/graphNeurips.js create mode 100644 apps/backend/src/services/transferAgent/graphNeuripsAgent.js create mode 100644 apps/backend/src/services/transferAgent/llmUnifiedDiff.js create mode 100644 apps/backend/src/services/transferAgent/neuripsRules.js create mode 100644 apps/backend/src/services/transferAgent/nodes/agentGenerator.js create mode 100644 apps/backend/src/services/transferAgent/nodes/agentPlanner.js create mode 100644 apps/backend/src/services/transferAgent/nodes/agentReviewer.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/applyBibliography.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/applyBody.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/applyPreamble.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/blindConfirmBypass.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmBlind.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmPlan.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/intake.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/normalizeFigures.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/policyCheck.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmBlind.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmPlan.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/sanitizeBlind.js create mode 100644 apps/backend/src/services/transferAgent/nodes/neurips/verifyBuild.js create mode 100644 apps/backend/src/services/transferAgent/progressMeta.js create mode 100644 apps/backend/src/services/transferAgent/skills/icml.js create mode 100644 apps/backend/src/services/transferAgent/skills/index.js create mode 100644 apps/backend/src/services/transferAgent/skills/neurips.js create mode 100644 apps/backend/src/services/transferAgent/skills/reviewerChecklist.js create mode 100644 apps/backend/src/services/transferAgent/tools/applyDiff.js create mode 100644 apps/backend/src/services/transferAgent/tools/copyAsset.js create mode 100644 apps/backend/src/services/transferAgent/tools/grepFile.js create mode 100644 apps/backend/src/services/transferAgent/tools/index.js create mode 100644 apps/backend/src/services/transferAgent/tools/listProjectTree.js create mode 100644 apps/backend/src/services/transferAgent/tools/measureFigures.js create mode 100644 apps/backend/src/services/transferAgent/tools/raiseQuestion.js create mode 100644 apps/backend/src/services/transferAgent/tools/readFile.js create mode 100644 apps/backend/src/services/transferAgent/tools/writeFile.js create mode 100644 apps/backend/src/services/transferAgent/transferDebugLog.js create mode 100644 apps/backend/src/services/transferAgent/transferNodeError.js create mode 100644 icml/icml.md create mode 100644 neurips/checklist.tex create mode 100644 neurips/neurips.md create mode 100644 neurips/neurips_2026.sty create mode 100644 neurips/neurips_paper_template.tex create mode 100644 templates/icml/algorithm.sty create mode 100644 templates/icml/algorithmic.sty create mode 100644 templates/icml/fancyhdr.sty create mode 100644 templates/icml/icml2026.bst create mode 100644 templates/icml/icml2026.sty create mode 100644 templates/neurips/checklist.tex create mode 100644 templates/neurips/neurips_2026.sty diff --git a/.gitignore b/.gitignore index 0f40381..c2868b4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ node_modules/ .DS_Store .env .env.local +.cursor + apps/frontend/dist/ apps/frontend/.vite/ apps/backend/.cache/ diff --git a/apps/backend/src/index.js b/apps/backend/src/index.js index 2a86173..a1245ff 100644 --- a/apps/backend/src/index.js +++ b/apps/backend/src/index.js @@ -24,6 +24,7 @@ const fastify = Fastify({ logger: true }); await fastify.register(cors, { origin: true }); await fastify.register(multipart, { + preservePath: true, limits: { fileSize: 200 * 1024 * 1024 } diff --git a/apps/backend/src/routes/projects.js b/apps/backend/src/routes/projects.js index 60f39fe..1b179c7 100644 --- a/apps/backend/src/routes/projects.js +++ b/apps/backend/src/routes/projects.js @@ -321,7 +321,13 @@ export function registerProjectRoutes(fastify) { const parts = req.parts(); for await (const part of parts) { if (part.type !== 'file') continue; - const relPath = sanitizeUploadPath(part.filename); + // With preservePath (busboy), filename keeps relative dirs (e.g. figs/a.png). + // Fallback for odd clients: basename-only still works. + const rawRel = + (typeof part.filename === 'string' && part.filename.trim()) || + (typeof part.filepath === 'string' && part.filepath.trim()) || + ''; + const relPath = sanitizeUploadPath(rawRel); if (!relPath) continue; const abs = safeJoin(projectRoot, relPath); await ensureDir(path.dirname(abs)); diff --git a/apps/backend/src/routes/transfer.js b/apps/backend/src/routes/transfer.js index afb467c..3118ff7 100644 --- a/apps/backend/src/routes/transfer.js +++ b/apps/backend/src/routes/transfer.js @@ -2,17 +2,94 @@ import crypto from 'crypto'; import path from 'path'; import { promises as fs } from 'fs'; import { buildTransferGraph } from '../services/transferAgent/graph.js'; +import { buildNeuripsLatexGraph } from '../services/transferAgent/graphNeurips.js'; +import { buildNeuripsAgentGraph } from '../services/transferAgent/graphNeuripsAgent.js'; import { buildMineruTransferGraph } from '../services/transferAgent/graphMineru.js'; +import { buildMineruAgentGraph } from '../services/transferAgent/graphMineruAgent.js'; import { resolveLLMConfig } from '../services/llmService.js'; import { resolveMineruConfig } from '../services/mineruService.js'; import { readTemplateManifest } from '../services/templateService.js'; import { DATA_DIR, TEMPLATE_DIR } from '../config/constants.js'; import { ensureDir, readJson, writeJson, copyDir } from '../utils/fsUtils.js'; +import { + transferDebugLog, + transferDebugProgressDelta, + transferDebugEntriesDelta, + announceTransferDebugOnce, +} from '../services/transferAgent/transferDebugLog.js'; +import { TransferNodeError } from '../services/transferAgent/transferNodeError.js'; // In-memory job store: jobId → { graph, state, status, progressLog } const jobs = new Map(); +const INVOKE_OPTS = { recursionLimit: 120 }; + +function isGraphInterruptErr(e) { + return e !== undefined && ['GraphInterrupt', 'NodeInterrupt'].includes(e?.name); +} + +function logTransferStepResult(jobId, job, st) { + if (!st || typeof st !== 'object') return; + announceTransferDebugOnce(); + transferDebugProgressDelta(jobId, job, st.progressLog); + transferDebugEntriesDelta(jobId, job, st.progressLogEntries); + transferDebugLog(jobId, 'log', 'step snapshot', { + status: st.status, + lastCompletedNode: st.lastCompletedNode, + currentPhase: st.currentPhase, + transferGraphKind: st.transferGraphKind, + completedNodesLen: Array.isArray(st.completedNodes) ? st.completedNodes.length : 0, + pendingQA: Array.isArray(st.pendingQA) ? st.pendingQA.length : st.pendingQA ? 1 : 0, + compileOk: st.compileResult?.ok, + compileExit: st.compileResult?.status, + verifyBuildOk: st.verifyBuildResult?.ok, + verifyPattern: st.verifyBuildResult?.pattern, + layoutCheckOk: st.layoutCheckResult?.ok, + }); + if (st.compileResult && !st.compileResult.ok && st.compileResult.log) { + transferDebugLog( + jobId, + 'error', + 'compile failed — log tail', + String(st.compileResult.log).slice(-12000), + ); + } + if (st.verifyBuildResult && !st.verifyBuildResult.ok) { + transferDebugLog(jobId, 'warn', 'verifyBuild failed', st.verifyBuildResult); + if (st.compileResult?.log) { + transferDebugLog( + jobId, + 'warn', + 'compile log tail (for verify debug)', + String(st.compileResult.log).slice(-8000), + ); + } + } +} + +function buildTransferApiPayload(job, state) { + const st = state || job.state || {}; + const log = st.progressLog || job.progressLog || []; + return { + status: st.status || job.status || 'running', + progressLog: Array.isArray(log) ? log : [], + progressLogEntries: st.progressLogEntries || [], + currentNode: st.lastCompletedNode || '', + phase: st.currentPhase || '', + agentPhase: st.agentPhase || null, + currentIteration: st.currentIteration ?? null, + interruptedBeforeNode: st.interruptedBeforeNode || '', + completedNodes: st.completedNodes || [], + pendingQA: st.pendingQA ?? null, + error: job.error || st.error || null, + bundleNotes: st.bundleNotes || null, + transferGraphKind: st.transferGraphKind || job.state?.transferGraphKind || 'legacy', + liveProgress: job.liveProgress || null, + }; +} + export function registerTransferRoutes(fastify) { + console.log('[transfer] Routes registered — ICML agent graph support: ENABLED (v2)'); /** * POST /api/transfer/start @@ -28,6 +105,10 @@ export function registerTransferRoutes(fastify) { engine = 'pdflatex', layoutCheck = false, llmConfig, + venue, + doubleBlind, + preprint, + outputNotes, } = request.body || {}; if (!sourceProjectId || !sourceMainFile || !targetTemplateId || !targetMainFile) { @@ -65,19 +146,31 @@ export function registerTransferRoutes(fastify) { const templateRoot = path.join(TEMPLATE_DIR, targetTemplateId); await copyDir(templateRoot, projectRoot); - // Build transfer graph const jobId = crypto.randomUUID(); - const graph = buildTransferGraph(); + const useNeuripsGraph = targetTemplateId === 'neurips'; + const useAgentGraph = useNeuripsGraph || targetTemplateId === 'icml'; + const graph = useAgentGraph ? buildNeuripsAgentGraph() : buildTransferGraph(); + + const transferIntake = { + venue: venue || targetTemplateId || 'neurips', + doubleBlind: doubleBlind !== false, + preprint: !!preprint, + outputNotes: outputNotes || '', + }; const initialState = { sourceProjectId, sourceMainFile, targetProjectId: newProjectId, targetMainFile, + targetTemplateId, engine, layoutCheck, llmConfig: resolveLLMConfig(llmConfig), jobId, + transferGraphKind: useAgentGraph ? targetTemplateId : 'legacy', + transferIntake, + userConfirmations: {}, }; jobs.set(jobId, { @@ -87,6 +180,21 @@ export function registerTransferRoutes(fastify) { progressLog: [], hasStarted: false, iterator: null, + liveProgress: null, + _transferDebugLogLen: 0, + _transferDebugEntriesLen: 0, + }); + + announceTransferDebugOnce(); + transferDebugLog(jobId, 'log', 'POST /transfer/start (legacy)', { + targetTemplateId, + transferGraphKind: useAgentGraph ? targetTemplateId : 'legacy', + newProjectId, + sourceProjectId, + sourceMainFile, + targetMainFile, + engine, + layoutCheck, }); return { jobId, newProjectId }; @@ -105,33 +213,104 @@ export function registerTransferRoutes(fastify) { return reply.code(404).send({ error: 'Job not found.' }); } - // If waiting for images, don't proceed if (job.status === 'waiting_images') { - return { status: 'waiting_images', progressLog: job.progressLog }; + return buildTransferApiPayload(job, job.state); + } + + if (job.status === 'waiting_confirm') { + return buildTransferApiPayload(job, job.state); } try { job.status = 'running'; - const runConfig = { configurable: { thread_id: jobId } }; + // Initialize live progress for tool-level granularity + job.liveProgress = { activeRole: '', toolName: '', toolArgs: '', toolRound: 0, maxToolRounds: 0, lastUpdate: Date.now() }; + const runConfig = { configurable: { thread_id: jobId, _liveProgress: job.liveProgress }, ...INVOKE_OPTS }; const input = job.hasStarted ? null : job.state; - const result = await job.graph.invoke(input, runConfig); + let result; + try { + // Use graph.stream() instead of graph.invoke() for node-level granularity. + // streamMode 'values' yields the full accumulated state after each node completes, + // allowing the SSE poll to pick up intermediate progress. + const stream = await job.graph.stream(input, { ...runConfig, streamMode: 'values' }); + for await (const snapshot of stream) { + // snapshot is the full accumulated state after this node completed + job.state = { ...job.state, ...snapshot }; + job.progressLog = job.state.progressLog || job.progressLog || []; + job.hasStarted = true; + transferDebugLog(jobId, 'log', `stream node completed: ${job.state.lastCompletedNode || '?'}`); + } + result = job.state; + } catch (invokeErr) { + if (isGraphInterruptErr(invokeErr)) { + const snap = await job.graph.getState(runConfig); + const values = snap?.values || {}; + job.hasStarted = true; + job.state = { ...job.state, ...values }; + job.progressLog = values.progressLog || job.progressLog || []; + + // Handle raiseQuestion interrupt from agentic nodes: + // The interrupt() call passes { type: 'raiseQuestion', pendingQA: [...] } + const interruptValues = snap?.tasks?.[0]?.interrupts?.[0]?.value; + if (interruptValues?.type === 'raiseQuestion' && interruptValues.pendingQA) { + job.state.pendingQA = interruptValues.pendingQA; + job.state.status = 'waiting_confirm'; + job.status = 'waiting_confirm'; + } else { + job.status = values.status || job.state.status || 'running'; + } + + job.error = undefined; + transferDebugLog(jobId, 'log', 'LangGraph interrupt — paused before next node (checkpoint saved)'); + logTransferStepResult(jobId, job, job.state); + return buildTransferApiPayload(job, job.state); + } + throw invokeErr; + } + // Also check for interrupt after stream completes normally (some LangGraph versions + // don't throw on interrupt when using stream) + try { + const snap = await job.graph.getState(runConfig); + const interruptValues = snap?.tasks?.[0]?.interrupts?.[0]?.value; + if (interruptValues?.type === 'raiseQuestion' && interruptValues.pendingQA) { + job.state.pendingQA = interruptValues.pendingQA; + job.state.status = 'waiting_confirm'; + job.status = 'waiting_confirm'; + job.error = undefined; + transferDebugLog(jobId, 'log', 'LangGraph interrupt detected after stream (checkpoint saved)'); + logTransferStepResult(jobId, job, job.state); + return buildTransferApiPayload(job, job.state); + } + } catch { /* getState may fail if graph fully completed — that's fine */ } + job.hasStarted = true; job.state = result; job.progressLog = result.progressLog || []; job.status = result.status || 'running'; + job.error = undefined; + // Clear live progress when step finishes + job.liveProgress = null; - return { - status: job.status, - progressLog: job.progressLog, - }; + logTransferStepResult(jobId, job, result); + + return buildTransferApiPayload(job, result); } catch (err) { const msg = err?.message || String(err || 'Unknown error'); job.status = 'error'; job.error = msg; - return reply.code(500).send({ + transferDebugLog(jobId, 'error', `POST /transfer/step failed: ${msg}`, err?.stack); + const payload = { error: msg, - progressLog: job.progressLog, - }); + ...buildTransferApiPayload(job, job.state), + }; + if (err instanceof TransferNodeError) { + payload.failedNode = err.node; + payload.failedPhase = err.phase; + payload.failedDetail = err.detail; + if (err.debugRelPath) payload.failedDebugPath = err.debugRelPath; + if (typeof err.inputChars === 'number') payload.failedInputChars = err.inputChars; + } + return reply.code(500).send(payload); } }); @@ -166,6 +345,44 @@ export function registerTransferRoutes(fastify) { job.state = { ...job.state, ...updated }; job.status = 'running'; + transferDebugLog(jobId, 'log', `submit-images: ${(images || []).length} page(s)`); + + return { ok: true }; + }); + + /** + * POST /api/transfer/submit-confirm + * Body: { jobId, answers: { [qaId]: string | string[] } } + */ + fastify.post('/api/transfer/submit-confirm', async (request, reply) => { + const { jobId, answers = {} } = request.body || {}; + const job = jobs.get(jobId); + if (!job) { + return reply.code(404).send({ error: 'Job not found.' }); + } + + if (job.status !== 'waiting_confirm') { + return reply.code(400).send({ error: 'Job is not waiting for confirmations.' }); + } + + const prev = job.state?.userConfirmations || {}; + const merged = { ...prev, ...answers }; + const updated = { userConfirmations: merged, status: 'running', pendingQA: null }; + + try { + if (job.hasStarted && typeof job.graph.updateState === 'function') { + await job.graph.updateState( + { configurable: { thread_id: jobId }, ...INVOKE_OPTS }, + updated, + ); + } + } catch { /* fallback below */ } + + job.state = { ...job.state, ...updated }; + job.status = 'running'; + + transferDebugLog(jobId, 'log', 'submit-confirm', { answerKeys: Object.keys(answers || {}) }); + return { ok: true }; }); @@ -180,10 +397,126 @@ export function registerTransferRoutes(fastify) { } return { - status: job.status, - progressLog: job.progressLog, - error: job.error || null, + transferGraphKind: job.state?.transferGraphKind || 'legacy', + ...buildTransferApiPayload(job, job.state), + }; + }); + + /** + * GET /api/transfer/stream/:jobId + * SSE endpoint — pushes real-time progress events to the frontend. + * + * Events emitted: + * event: progress — full payload (same shape as /status) + * event: done — final payload when job finishes (success/failed/error) + * + * The connection stays open and polls the in-memory job state + * every 500 ms, emitting an event whenever the state has changed + * (new completedNodes, phase change, status change, new log entries). + */ + fastify.get('/api/transfer/stream/:jobId', async (request, reply) => { + const { jobId } = request.params; + const job = jobs.get(jobId); + if (!job) { + return reply.code(404).send({ error: 'Job not found.' }); + } + + // SSE headers + reply.raw.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + 'X-Accel-Buffering': 'no', // disable nginx buffering + }); + + // Tracking: only send when something changed + let lastCompletedLen = 0; + let lastEntriesLen = 0; + let lastStatus = ''; + let lastPhase = ''; + let lastNode = ''; + let lastLpToolName = ''; + let lastLpToolRound = -1; + let lastLpActiveRole = ''; + let closed = false; + + request.raw.on('close', () => { closed = true; }); + + function sendEvent(eventName, data) { + if (closed) return; + try { + reply.raw.write(`event: ${eventName}\ndata: ${JSON.stringify(data)}\n\n`); + } catch { closed = true; } + } + + // Send initial state immediately + const initialPayload = { + transferGraphKind: job.state?.transferGraphKind || 'legacy', + ...buildTransferApiPayload(job, job.state), }; + sendEvent('progress', initialPayload); + lastCompletedLen = (initialPayload.completedNodes || []).length; + lastEntriesLen = (initialPayload.progressLogEntries || []).length; + lastStatus = initialPayload.status; + lastPhase = initialPayload.phase; + lastNode = initialPayload.currentNode; + + // Poll loop + const interval = setInterval(() => { + if (closed) { clearInterval(interval); return; } + + const j = jobs.get(jobId); + if (!j) { sendEvent('done', { status: 'not_found' }); clearInterval(interval); reply.raw.end(); return; } + + const payload = { + transferGraphKind: j.state?.transferGraphKind || 'legacy', + ...buildTransferApiPayload(j, j.state), + }; + + const completedLen = (payload.completedNodes || []).length; + const entriesLen = (payload.progressLogEntries || []).length; + const lp = payload.liveProgress; + + const changed = + payload.status !== lastStatus || + payload.phase !== lastPhase || + payload.currentNode !== lastNode || + completedLen !== lastCompletedLen || + entriesLen !== lastEntriesLen || + (lp && (lp.toolName !== lastLpToolName || lp.toolRound !== lastLpToolRound || lp.activeRole !== lastLpActiveRole)); + + if (changed) { + sendEvent('progress', payload); + lastCompletedLen = completedLen; + lastEntriesLen = entriesLen; + lastStatus = payload.status; + lastPhase = payload.phase; + lastNode = payload.currentNode; + if (lp) { + lastLpToolName = lp.toolName; + lastLpToolRound = lp.toolRound; + lastLpActiveRole = lp.activeRole; + } + } + + // Terminal states — send done and close + if (['success', 'failed', 'error'].includes(payload.status)) { + sendEvent('done', payload); + clearInterval(interval); + if (!closed) reply.raw.end(); + } + }, 500); + + // Keep-alive: send comment every 15s to prevent proxy timeout + const keepAlive = setInterval(() => { + if (closed) { clearInterval(keepAlive); return; } + try { reply.raw.write(': keepalive\n\n'); } catch { closed = true; } + }, 15000); + + request.raw.on('close', () => { + clearInterval(interval); + clearInterval(keepAlive); + }); }); /** @@ -245,21 +578,31 @@ export function registerTransferRoutes(fastify) { const templateRoot = path.join(TEMPLATE_DIR, targetTemplateId); await copyDir(templateRoot, projectRoot); - // Build MinerU transfer graph + // Build MinerU transfer graph — use agent hybrid for supported venues const jobId = crypto.randomUUID(); - const graph = buildMineruTransferGraph(); + const useAgentBackend = ['neurips', 'icml'].includes(targetTemplateId); + const graph = useAgentBackend ? buildMineruAgentGraph() : buildMineruTransferGraph(); const initialState = { sourceProjectId: sourceProjectId || '', sourceMainFile: sourceMainFile || '', targetProjectId: newProjectId, targetMainFile, + targetTemplateId, engine, layoutCheck, llmConfig: resolveLLMConfig(llmConfig), mineruConfig: resolveMineruConfig(mineruConfig), transferMode: 'mineru', jobId, + transferGraphKind: ['neurips', 'icml'].includes(targetTemplateId) ? targetTemplateId : 'legacy', + transferIntake: { + venue: targetTemplateId || 'neurips', + doubleBlind: true, + preprint: false, + outputNotes: '', + }, + userConfirmations: {}, }; jobs.set(jobId, { @@ -269,6 +612,17 @@ export function registerTransferRoutes(fastify) { progressLog: [], hasStarted: false, iterator: null, + liveProgress: null, + _transferDebugLogLen: 0, + _transferDebugEntriesLen: 0, + }); + + announceTransferDebugOnce(); + transferDebugLog(jobId, 'log', 'POST /transfer/start-mineru', { + targetTemplateId, + newProjectId, + transferGraphKind: ['neurips', 'icml'].includes(targetTemplateId) ? targetTemplateId : 'legacy', + hasSourceProject: !!sourceProjectId, }); return { jobId, newProjectId }; diff --git a/apps/backend/src/services/compileService.js b/apps/backend/src/services/compileService.js index 6e598df..91db5ec 100644 --- a/apps/backend/src/services/compileService.js +++ b/apps/backend/src/services/compileService.js @@ -8,16 +8,19 @@ import { getProjectRoot } from './projectService.js'; const SUPPORTED_ENGINES = ['pdflatex', 'xelatex', 'lualatex', 'latexmk', 'tectonic']; -function buildCommand(engine, outDir, mainFile) { +// Prefer system TeX Live binaries to avoid conda version mismatch with .fmt files +const SYSTEM_TEX_BIN = '/usr/bin'; + +function buildCommand(engine, mainFile) { switch (engine) { case 'pdflatex': case 'xelatex': case 'lualatex': - return { cmd: engine, args: ['-interaction=nonstopmode', `-output-directory=${outDir}`, mainFile] }; + return { cmd: path.join(SYSTEM_TEX_BIN, engine), args: ['-interaction=nonstopmode', mainFile] }; case 'latexmk': - return { cmd: 'latexmk', args: ['-pdf', '-interaction=nonstopmode', `-outdir=${outDir}`, mainFile] }; + return { cmd: 'latexmk', args: ['-pdf', '-interaction=nonstopmode', mainFile] }; case 'tectonic': - return { cmd: 'tectonic', args: ['--outdir', outDir, mainFile] }; + return { cmd: 'tectonic', args: [mainFile] }; default: return null; } @@ -64,56 +67,182 @@ export async function runCompile({ projectId, mainFile, engine = 'pdflatex' }) { logChunks.push(next.slice(0, remaining)); }; - const { cmd, args } = buildCommand(engine, outDir, mainFile); + const { cmd, args } = buildCommand(engine, mainFile); const needsBibPass = MULTI_PASS_ENGINES.includes(engine); + // Copy all project files to output directory + try { + const files = await fs.readdir(projectRoot, { withFileTypes: true }); + for (const file of files) { + if (file.name === '.compile' || file.name === 'node_modules') continue; + const srcPath = path.join(projectRoot, file.name); + const dstPath = path.join(outDir, file.name); + if (file.isDirectory()) { + await fs.cp(srcPath, dstPath, { recursive: true }); + } else { + await fs.copyFile(srcPath, dstPath); + } + } + pushLog(Buffer.from('[info] Copied project files to build directory.\n')); + } catch (err) { + await fs.rm(outDir, { recursive: true, force: true }); + return { ok: false, error: `Failed to copy project files: ${err.message}` }; + } + let code; try { // Pass 1: generate .aux with \citation{} entries - code = await runSpawn(cmd, args, projectRoot, pushLog); + code = await runSpawn(cmd, args, outDir, pushLog); if (needsBibPass) { const base = path.basename(mainFile, path.extname(mainFile)); const auxPath = path.join(outDir, `${base}.aux`); - // Detect whether to use biber or bibtex by checking .aux / source for biblatex - let useBiber = false; + // Check if user-provided .bib files exist in the project. + // IMPORTANT: check against the ORIGINAL project files (projectRoot), + // not outDir, because Pass 1 may auto-generate .bib files + // (e.g. revtex4-1 + apsrev4-1.bst creates *Notes.bib). + // Using outDir would give a false positive and cause bibtex to run, + // which overwrites the existing .bbl with an empty one. + // Also skip empty/placeholder .bib files (< 50 bytes, e.g. template stubs). + let hasUserBibFiles = false; try { - const auxContent = await fs.readFile(auxPath, 'utf8'); - // biblatex writes \abx@aux@... commands in .aux; traditional bibtex does not - useBiber = auxContent.includes('\\abx@aux@'); - } catch { /* .aux missing — skip bib pass */ } + const projFiles = await fs.readdir(projectRoot); + const bibFiles = projFiles.filter(f => f.endsWith('.bib')); + for (const bf of bibFiles) { + try { + const st = await fs.stat(path.join(projectRoot, bf)); + if (st.size >= 50) { hasUserBibFiles = true; break; } + } catch { /* ignore */ } + } + } catch { /* ignore */ } - // Also check the source .tex for \usepackage{biblatex} as a fallback - if (!useBiber) { + // Also check if a pre-compiled .bbl already exists in the project + const mainBblPath = path.join(outDir, `${base}.bbl`); + let hasPrecompiledBbl = false; + try { + await fs.access(mainBblPath); + hasPrecompiledBbl = true; + } catch { /* no main.bbl yet */ } + + // If no .bbl with the main name, look for one that matches + // \bibliography{} or \addbibresource{} or \input{*.bbl} references + if (!hasPrecompiledBbl) { try { - const texContent = await fs.readFile(safeJoin(projectRoot, mainFile), 'utf8'); - useBiber = /\\usepackage(\[.*?\])?\{biblatex\}/.test(texContent); + const texContent = await fs.readFile(path.join(outDir, mainFile), 'utf8'); + + // Check if tex uses \input{something.bbl} — in that case, + // the bbl is loaded directly and we don't need main.bbl at all. + const inputBblRe = /\\input\s*\{\s*([^}]*\.bbl)\s*\}/g; + let inputBblMatch; + while ((inputBblMatch = inputBblRe.exec(texContent)) !== null) { + const bblName = inputBblMatch[1].trim(); + const bblPath = path.join(outDir, bblName); + try { + const st = await fs.stat(bblPath); + if (st.size > 100) { + // The tex directly \input's a real .bbl file — skip bibtex entirely + hasPrecompiledBbl = true; + pushLog(Buffer.from(`[info] Found \\input{${bblName}} (${st.size} bytes); using it directly.\n`)); + break; + } + } catch { /* file not found, continue */ } + } + + if (!hasPrecompiledBbl) { + // Collect bibliography names from \bibliography{a,b} and \addbibresource{a.bib} + const bibNames = []; + const bibPatternTrad = /\\bibliography\{([^}]+)\}/g; + let m; + while ((m = bibPatternTrad.exec(texContent)) !== null) { + m[1].split(',').forEach(ref => bibNames.push(ref.trim())); + } + const bibPatternRes = /\\addbibresource\{([^}]+)\}/g; + while ((m = bibPatternRes.exec(texContent)) !== null) { + bibNames.push(m[1].trim().replace(/\.bib$/i, '')); + } + + // Try each matched name to find a corresponding .bbl and copy it + for (const bibName of bibNames) { + if (bibName === base) continue; + const candidateBbl = path.join(outDir, `${bibName}.bbl`); + try { + await fs.access(candidateBbl); + await fs.copyFile(candidateBbl, mainBblPath); + hasPrecompiledBbl = true; + pushLog(Buffer.from(`[info] Copied ${bibName}.bbl to ${base}.bbl for LaTeX to use.\n`)); + break; + } catch { /* this .bbl not found, try next */ } + } + } + + // Fallback: if still no main.bbl, try any lone .bbl in the directory + if (!hasPrecompiledBbl) { + const allFiles = await fs.readdir(outDir); + const bblFiles = allFiles.filter(f => f.endsWith('.bbl')); + // If there's exactly one .bbl that's substantial (> 100 bytes), use it + const realBbls = []; + for (const bf of bblFiles) { + try { + const st = await fs.stat(path.join(outDir, bf)); + if (st.size > 100) realBbls.push(bf); + } catch { /* ignore */ } + } + if (realBbls.length === 1) { + await fs.copyFile(path.join(outDir, realBbls[0]), mainBblPath); + hasPrecompiledBbl = true; + pushLog(Buffer.from(`[info] Copied ${realBbls[0]} to ${base}.bbl (only substantial .bbl found).\n`)); + } + } } catch { /* ignore */ } } - const bibCmd = useBiber ? 'biber' : 'bibtex'; - const bibEnv = { - ...process.env, - BIBINPUTS: `${projectRoot}:`, - BSTINPUTS: `${projectRoot}:`, - }; - // Run bibtex/biber with cwd=outDir and relative base name to avoid - // openout_any=p blocking writes to absolute paths. - const bibArgs = useBiber - ? [`--input-directory=${projectRoot}`, base] - : [base]; + if (!hasUserBibFiles) { + // No user .bib files — do NOT run bibtex/biber as it would overwrite + // the existing .bbl with an empty one. Just use whatever .bbl is available. + if (hasPrecompiledBbl) { + pushLog(Buffer.from(`[info] No .bib files found; using existing ${base}.bbl (skipping bibtex/biber).\n`)); + } else { + pushLog(Buffer.from('[warn] No .bib or .bbl files found, citations will not resolve.\n')); + } + } else { + // Detect whether to use biber or bibtex by checking .aux / source for biblatex + let useBiber = false; + try { + const auxContent = await fs.readFile(auxPath, 'utf8'); + // biblatex writes \abx@aux@... commands in .aux; traditional bibtex does not + useBiber = auxContent.includes('\\abx@aux@'); + } catch { /* .aux missing — skip bib pass */ } - try { - await runSpawn(bibCmd, bibArgs, outDir, pushLog, bibEnv); - } catch { - // bibtex/biber not installed or failed — continue without it - pushLog(Buffer.from(`[warn] ${bibCmd} not available, skipping bibliography pass.\n`)); + // Also check the source .tex for \usepackage{biblatex} as a fallback + if (!useBiber) { + try { + const texContent = await fs.readFile(path.join(outDir, mainFile), 'utf8'); + useBiber = /\\usepackage(\[.*?\])?\{biblatex\}/.test(texContent); + } catch { /* ignore */ } + } + + const bibCmd = useBiber + ? path.join(SYSTEM_TEX_BIN, 'biber') + : path.join(SYSTEM_TEX_BIN, 'bibtex'); + const bibEnv = { + ...process.env, + BIBINPUTS: `${outDir}:`, + BSTINPUTS: `${outDir}:`, + }; + const bibArgs = useBiber ? [base] : [base]; + + try { + await runSpawn(bibCmd, bibArgs, outDir, pushLog, bibEnv); + } catch { + // bibtex/biber not installed or failed — continue without it + pushLog(Buffer.from(`[warn] ${bibCmd} not available, skipping bibliography pass.\n`)); + } } // Pass 2 + 3: resolve citations and cross-references - code = await runSpawn(cmd, args, projectRoot, pushLog); - code = await runSpawn(cmd, args, projectRoot, pushLog); + code = await runSpawn(cmd, args, outDir, pushLog); + code = await runSpawn(cmd, args, outDir, pushLog); } } catch (err) { await fs.rm(outDir, { recursive: true, force: true }); @@ -129,7 +258,35 @@ export async function runCompile({ projectId, mainFile, engine = 'pdflatex' }) { } catch { pdfBase64 = ''; } + + // Copy all .bbl files back to project root + try { + const files = await fs.readdir(outDir); + const bblFiles = files.filter(f => f.endsWith('.bbl')); + for (const bblFile of bblFiles) { + const srcPath = path.join(outDir, bblFile); + const dstPath = path.join(projectRoot, bblFile); + await fs.copyFile(srcPath, dstPath); + } + if (bblFiles.length > 0) { + pushLog(Buffer.from(`[info] Copied ${bblFiles.length} .bbl file(s) back to project.\n`)); + } + } catch { + // Ignore errors copying .bbl files + } + const log = logChunks.join(''); + + // Save compile log to project directory + try { + const logPath = path.join(projectRoot, 'compile.log'); + const timestamp = new Date().toISOString(); + const logContent = `=== Compile Log (${timestamp}) ===\nEngine: ${engine}\nMain File: ${mainFile}\n\n${log}`; + await fs.writeFile(logPath, logContent, 'utf8'); + } catch { + // Ignore errors saving log + } + await fs.rm(outDir, { recursive: true, force: true }); if (!pdfBase64) { return { ok: false, error: 'No PDF generated.', log, status: code ?? -1 }; diff --git a/apps/backend/src/services/projectService.js b/apps/backend/src/services/projectService.js index f8bb752..7af8600 100644 --- a/apps/backend/src/services/projectService.js +++ b/apps/backend/src/services/projectService.js @@ -3,8 +3,13 @@ import path from 'path'; import { DATA_DIR } from '../config/constants.js'; export async function getProjectRoot(id) { + if (!id) throw new Error('getProjectRoot: project id is required'); const projectRoot = path.join(DATA_DIR, id); const metaPath = path.join(projectRoot, 'project.json'); - await fs.access(metaPath); + try { + await fs.access(metaPath); + } catch { + throw new Error(`Project not found: ${id} (missing ${metaPath})`); + } return projectRoot; } diff --git a/apps/backend/src/services/transferAgent/fsTools.js b/apps/backend/src/services/transferAgent/fsTools.js new file mode 100644 index 0000000..9771919 --- /dev/null +++ b/apps/backend/src/services/transferAgent/fsTools.js @@ -0,0 +1,30 @@ +import { promises as fs } from 'fs'; +import { safeJoin } from '../../utils/pathUtils.js'; + +/** + * Read a file relative to workspace root (target project); throws on escape. + */ +export async function readWorkspaceFile(workspaceRoot, relPath) { + const abs = safeJoin(workspaceRoot, relPath); + return fs.readFile(abs, 'utf8'); +} + +/** + * Read a file relative to source read root; throws on escape. + */ +export async function readSourceFile(sourceReadRoot, relPath) { + const abs = safeJoin(sourceReadRoot, relPath); + return fs.readFile(abs, 'utf8'); +} + +/** + * True if path exists under workspace. + */ +export async function workspaceFileExists(workspaceRoot, relPath) { + try { + await fs.access(safeJoin(workspaceRoot, relPath)); + return true; + } catch { + return false; + } +} diff --git a/apps/backend/src/services/transferAgent/graphMineruAgent.js b/apps/backend/src/services/transferAgent/graphMineruAgent.js new file mode 100644 index 0000000..f41fb18 --- /dev/null +++ b/apps/backend/src/services/transferAgent/graphMineruAgent.js @@ -0,0 +1,74 @@ +/** + * graphMineruAgent.js — MinerU + Agentic Transfer Graph + * + * Hybrid graph: MinerU front-end (PDF → Markdown) + Agent back-end (migration). + * + * compileSource → parsePdfWithMineru → planner → generator → reviewer ──┐ + * ↑ │ + * └──── revise (iteration < max) ┘ + * │ + * (pass)│ + * ▼ + * finalize + * + * The planner/generator/reviewer nodes receive the parsed Markdown content + * via state.sourceMarkdown and state.sourceImages, and use venue skills + * to produce the target LaTeX. + */ + +import { StateGraph, END, MemorySaver } from '@langchain/langgraph'; +import { TransferState } from './state.js'; +import { compileSource } from './nodes/compileSource.js'; +import { parsePdfWithMineru } from './nodes/parsePdfWithMineru.js'; +import { agentPlanner } from './nodes/agentPlanner.js'; +import { agentGenerator } from './nodes/agentGenerator.js'; +import { agentReviewer } from './nodes/agentReviewer.js'; +import { finalize } from './nodes/finalize.js'; + +/** + * Route after Reviewer: loop back to Planner or proceed to Finalize. + */ +function routeAfterReview(state) { + const review = state.reviewResult || {}; + const iteration = state.currentIteration || 0; + const maxIterations = state.maxIterations || 5; + + if (review.verdict === 'pass') return 'finalize'; + if (iteration >= maxIterations) return 'finalize'; + return 'planner'; +} + +/** + * Build the MinerU + Agent hybrid transfer graph. + */ +export function buildMineruAgentGraph() { + const graph = new StateGraph(TransferState); + + // MinerU front-end: PDF → Markdown + graph.addNode('compileSource', compileSource); + graph.addNode('parsePdfWithMineru', parsePdfWithMineru); + + // Agent back-end: Markdown → LaTeX (venue-aware) + graph.addNode('planner', agentPlanner); + graph.addNode('generator', agentGenerator); + graph.addNode('reviewer', agentReviewer); + graph.addNode('finalize', finalize); + + // Wire edges + graph.setEntryPoint('compileSource'); + graph.addEdge('compileSource', 'parsePdfWithMineru'); + graph.addEdge('parsePdfWithMineru', 'planner'); + graph.addEdge('planner', 'generator'); + graph.addEdge('generator', 'reviewer'); + + graph.addConditionalEdges('reviewer', routeAfterReview, { + planner: 'planner', + finalize: 'finalize', + }); + + graph.addEdge('finalize', END); + + return graph.compile({ + checkpointer: new MemorySaver(), + }); +} diff --git a/apps/backend/src/services/transferAgent/graphNeurips.js b/apps/backend/src/services/transferAgent/graphNeurips.js new file mode 100644 index 0000000..8d116c6 --- /dev/null +++ b/apps/backend/src/services/transferAgent/graphNeurips.js @@ -0,0 +1,87 @@ +import { StateGraph, END, MemorySaver } from '@langchain/langgraph'; +import { TransferState } from './state.js'; +import { intake } from './nodes/neurips/intake.js'; +import { analyzeSource } from './nodes/analyzeSource.js'; +import { analyzeTarget } from './nodes/analyzeTarget.js'; +import { draftPlan } from './nodes/draftPlan.js'; +import { prepareConfirmPlan } from './nodes/neurips/prepareConfirmPlan.js'; +import { consumeConfirmPlan } from './nodes/neurips/consumeConfirmPlan.js'; +import { applyPreamble } from './nodes/neurips/applyPreamble.js'; +import { applyBody } from './nodes/neurips/applyBody.js'; +import { normalizeFigures } from './nodes/neurips/normalizeFigures.js'; +import { copyAssets } from './nodes/copyAssets.js'; +import { applyBibliography } from './nodes/neurips/applyBibliography.js'; +import { prepareConfirmBlind } from './nodes/neurips/prepareConfirmBlind.js'; +import { consumeConfirmBlind } from './nodes/neurips/consumeConfirmBlind.js'; +import { blindConfirmBypass } from './nodes/neurips/blindConfirmBypass.js'; +import { sanitizeBlind } from './nodes/neurips/sanitizeBlind.js'; +import { policyCheck } from './nodes/neurips/policyCheck.js'; +import { finalize } from './nodes/finalize.js'; + +function routeBlind(state) { + if (state.pendingQA?.length) return 'consumeConfirmBlind'; + return 'blindConfirmBypass'; +} + +/** + * NeurIPS LaTeX→LaTeX transfer: stops after sanitizeBlind (no server pdflatex / fixCompile / layout). + * Authors compile locally. + */ +export function buildNeuripsLatexGraph() { + const graph = new StateGraph(TransferState); + + graph.addNode('intake', intake); + graph.addNode('analyzeSource', analyzeSource); + graph.addNode('analyzeTarget', analyzeTarget); + graph.addNode('draftPlan', draftPlan); + graph.addNode('prepareConfirmPlan', prepareConfirmPlan); + graph.addNode('consumeConfirmPlan', consumeConfirmPlan); + graph.addNode('applyPreamble', applyPreamble); + graph.addNode('applyBody', applyBody); + graph.addNode('normalizeFigures', normalizeFigures); + graph.addNode('copyAssets', copyAssets); + graph.addNode('applyBibliography', applyBibliography); + graph.addNode('prepareConfirmBlind', prepareConfirmBlind); + graph.addNode('consumeConfirmBlind', consumeConfirmBlind); + graph.addNode('blindConfirmBypass', blindConfirmBypass); + graph.addNode('sanitizeBlind', sanitizeBlind); + graph.addNode('policyCheck', policyCheck); + graph.addNode('finalize', finalize); + + graph.setEntryPoint('intake'); + + graph.addEdge('intake', 'analyzeSource'); + graph.addEdge('analyzeSource', 'analyzeTarget'); + graph.addEdge('analyzeTarget', 'draftPlan'); + graph.addEdge('draftPlan', 'prepareConfirmPlan'); + graph.addEdge('prepareConfirmPlan', 'consumeConfirmPlan'); + graph.addEdge('consumeConfirmPlan', 'applyPreamble'); + graph.addEdge('applyPreamble', 'applyBody'); + graph.addEdge('applyBody', 'normalizeFigures'); + graph.addEdge('normalizeFigures', 'copyAssets'); + graph.addEdge('copyAssets', 'applyBibliography'); + graph.addEdge('applyBibliography', 'prepareConfirmBlind'); + graph.addConditionalEdges('prepareConfirmBlind', routeBlind, { + consumeConfirmBlind: 'consumeConfirmBlind', + blindConfirmBypass: 'blindConfirmBypass', + }); + graph.addEdge('consumeConfirmBlind', 'sanitizeBlind'); + graph.addEdge('blindConfirmBypass', 'sanitizeBlind'); + graph.addEdge('sanitizeBlind', 'policyCheck'); + graph.addEdge('policyCheck', 'finalize'); + graph.addEdge('finalize', END); + + return graph.compile({ + checkpointer: new MemorySaver(), + interruptBefore: [ + 'consumeConfirmPlan', + 'applyPreamble', + 'applyBody', + 'normalizeFigures', + 'applyBibliography', + 'consumeConfirmBlind', + 'sanitizeBlind', + 'policyCheck', + ], + }); +} diff --git a/apps/backend/src/services/transferAgent/graphNeuripsAgent.js b/apps/backend/src/services/transferAgent/graphNeuripsAgent.js new file mode 100644 index 0000000..fe4aaee --- /dev/null +++ b/apps/backend/src/services/transferAgent/graphNeuripsAgent.js @@ -0,0 +1,87 @@ +/** + * graphNeuripsAgent.js — Agentic NeurIPS Transfer Graph + * + * Replaces the 17-node pipeline (graphNeurips.js) with a 3-node agentic loop: + * + * ┌──────────────────────────────────────┐ + * │ │ + * ▼ │ + * planner ──► generator ──► reviewer ──────┤ + * │ │ + * (pass)│ (revise)│ + * ▼ │ + * finalize │ + * │ + * (max_iterations)───────┘ + * + * Each node is a ReAct-style agent with tool-calling capabilities. + * The NeurIPS specification is injected as a "skill" (system prompt). + * + * Human-in-the-loop: the raiseQuestion tool triggers LangGraph interrupt(), + * pausing the graph until the user provides answers via the API. + */ + +import { StateGraph, END, MemorySaver } from '@langchain/langgraph'; +import { TransferState } from './state.js'; +import { agentPlanner } from './nodes/agentPlanner.js'; +import { agentGenerator } from './nodes/agentGenerator.js'; +import { agentReviewer } from './nodes/agentReviewer.js'; +import { finalize } from './nodes/finalize.js'; + +/** + * Route after Reviewer: loop back to Planner or proceed to Finalize. + */ +function routeAfterReview(state) { + const review = state.reviewResult || {}; + const iteration = state.currentIteration || 0; + const maxIterations = state.maxIterations || 5; + + // Pass → finalize + if (review.verdict === 'pass') { + return 'finalize'; + } + + // Max iterations exceeded → finalize anyway + if (iteration >= maxIterations) { + return 'finalize'; + } + + // Revise → loop back to planner + return 'planner'; +} + +/** + * Build the NeurIPS agentic transfer graph. + * + * This is a drop-in replacement for buildNeuripsLatexGraph(). + * The API surface (state shape, interrupt handling) is compatible + * with the existing route handlers. + */ +export function buildNeuripsAgentGraph() { + const graph = new StateGraph(TransferState); + + // Register nodes + graph.addNode('planner', agentPlanner); + graph.addNode('generator', agentGenerator); + graph.addNode('reviewer', agentReviewer); + graph.addNode('finalize', finalize); + + // Wire edges: linear planner → generator → reviewer + graph.setEntryPoint('planner'); + graph.addEdge('planner', 'generator'); + graph.addEdge('generator', 'reviewer'); + + // Conditional edge from reviewer: pass→finalize, revise→planner + graph.addConditionalEdges('reviewer', routeAfterReview, { + planner: 'planner', + finalize: 'finalize', + }); + + graph.addEdge('finalize', END); + + return graph.compile({ + checkpointer: new MemorySaver(), + // raiseQuestion tool triggers interrupt() internally; + // no need for interruptBefore on specific nodes. + }); +} diff --git a/apps/backend/src/services/transferAgent/llmUnifiedDiff.js b/apps/backend/src/services/transferAgent/llmUnifiedDiff.js new file mode 100644 index 0000000..259efd8 --- /dev/null +++ b/apps/backend/src/services/transferAgent/llmUnifiedDiff.js @@ -0,0 +1,281 @@ +import path from 'path'; +import { promises as fs } from 'fs'; +import { applyPatch } from 'diff'; +import { stripCodeFences, rejectCatastrophicFullTexRewrite } from './utils.js'; +import { TransferNodeError } from './transferNodeError.js'; +import { ensureDir } from '../../utils/fsUtils.js'; + +/** Set OPENPRISM_TRANSFER_SAVE_LLM_DIFF=0 to skip writing raw/patch files under .agent_runs/…/llm_diff/ */ +function isLlmDiffArtifactSaveEnabled() { + const e = process.env.OPENPRISM_TRANSFER_SAVE_LLM_DIFF; + if (e === '0' || e === 'false' || e === 'no') return false; + return true; +} + +/** + * @param {{ projectRoot: string, jobId: string }} debug + * @returns {{ absDir: string, relPosix: string } | null} + */ +function resolveDiffDebugDir(debug, nodeName, runId) { + if (!debug?.projectRoot || !debug?.jobId) return null; + const folder = `${nodeName}-${runId}`; + const relPosix = `.agent_runs/${debug.jobId}/llm_diff/${folder}`; + const absDir = path.join(debug.projectRoot, '.agent_runs', debug.jobId, 'llm_diff', folder); + return { absDir, relPosix }; +} + +async function persistDiffAttempt(absDir, attempt, payload) { + const p = (n) => path.join(absDir, n); + await fs.writeFile(p(`attempt_${attempt}_raw.txt`), payload.raw, 'utf8'); + await fs.writeFile(p(`attempt_${attempt}_extracted.patch`), payload.patchText, 'utf8'); + await fs.writeFile( + p(`attempt_${attempt}_meta.json`), + `${JSON.stringify(payload.meta, null, 2)}\n`, + 'utf8', + ); +} + +/** + * Prompt appendix: require git unified diff for a single virtual path (matches applyPatch on full file text). + */ +export function mainTexDiffInstructions(virtualPath = 'main.tex') { + const v = virtualPath.replace(/\\/g, '/'); + return ` + +Output ONLY a unified diff in git format. Do NOT output the full .tex file or any explanation outside the patch. +Patch headers MUST be exactly (use these paths): +--- a/${v} ++++ b/${v} + +Then @@ ... @@ hunks with context lines (space prefix), removals (-), additions (+). Every context line (leading space) and every removed line (-) MUST be copied verbatim from CURRENT_FILE — same characters, trailing spaces, and line breaks. Do not paraphrase or re-wrap lines. If the change is small, use a single hunk with 3+ lines of real context from the file. + +Multi-hunk / structure (critical for applyPatch): +- If edits are separated by any lines you are not changing (paragraphs, equations, \\subsection, blank lines, etc.), use SEPARATE @@ hunks. Do NOT end one hunk right after \\end{figure} and immediately continue with \\begin{figure*} unless those lines are truly adjacent in CURRENT_FILE with nothing between them. +- If one hunk spans two distant regions, EVERY intervening line must appear unchanged as context lines (leading space) inside that same hunk. Safer: split into multiple hunks, each anchored at the real line numbers in CURRENT_FILE. +- In each @@ -OLDSTART,OLDCOUNT +NEWSTART,NEWCOUNT @@ header: OLDCOUNT must equal the number of lines in this hunk that start with SPACE or MINUS (old-file side). NEWCOUNT must equal the number of lines that start with SPACE or PLUS (new-file side). Wrong counts cause patch rejection. + +Example of valid minimal patch: +--- a/${v} ++++ b/${v} +@@ -1,3 +1,3 @@ + line1 +-old ++new + line3 +`; +} + +/** + * Strip prose/fences and keep the first unified diff block. + */ +export function extractUnifiedDiff(raw) { + if (raw == null) return ''; + let s = typeof raw === 'string' ? raw : String(raw); + // Handle ```diff ... ``` wrapped output + const diffFence = s.match(/```(?:diff|patch)?\s*\n([\s\S]*?)```/i); + if (diffFence) s = diffFence[1].trim(); + else s = stripCodeFences(s); + + const gitIdx = s.search(/^diff --git\s/m); + const minusIdx = s.search(/^---\s+/m); + const start = + gitIdx >= 0 ? gitIdx : minusIdx >= 0 ? minusIdx : -1; + if (start === -1) return ''; + return s.slice(start).trimEnd(); +} + +/** + * @returns {{ ok: true, text: string } | { ok: false, reason: string }} + */ +export function applyUnifiedDiffToMainTex(baseTex, patchText) { + const patch = (patchText || '').trim(); + if (!patch) return { ok: false, reason: 'empty_diff' }; + try { + const result = applyPatch(baseTex, patch); + if (result === false) return { ok: false, reason: 'hunk_mismatch' }; + return { ok: true, text: result }; + } catch (e) { + return { + ok: false, + reason: `parse_or_apply: ${e?.message || String(e)}`, + }; + } +} + +/** + * After a successful apply: decide if we should retry the LLM. + * @returns {{ retry: boolean, reason?: string }} + */ +export function shouldRetryTexEdit(prevTex, nextTex) { + if (prevTex === nextTex) { + return { retry: true, reason: 'no_op_patch' }; + } + const catastrophic = rejectCatastrophicFullTexRewrite(prevTex, nextTex); + if (catastrophic) { + return { retry: true, reason: catastrophic }; + } + return { retry: false }; +} + +const DEFAULT_MAX = 3; + +/** Map machine reason → hint for the next LLM attempt */ +function retryHintForFailure(reason) { + const r = reason || ''; + if (r === 'hunk_mismatch') { + return `${r}: patch could not be aligned — context/remove lines must match CURRENT_FILE exactly. If you merged two distant edits into one hunk, split into separate @@ hunks and include every line between them as context (or do not skip intervening paragraphs/equations). Fix @@ OLDCOUNT/NEWCOUNT to match space/-/+ line counts.`; + } + if (r === 'empty_diff') { + return `${r}: no valid unified diff found in your reply. Output only the patch starting with --- a/`; + } + if (r.startsWith('parse_or_apply')) { + return `${r}: malformed patch syntax. Use standard unified diff with ---/+++/@@ and lines starting with space, -, or +.`; + } + if (r === 'no_op_patch') { + return `${r}: patch applied but file unchanged; include real +/- edits for the requested normalization.`; + } + if (r === 'output too short') { + return `${r}: result was far shorter than the source; do not delete large regions — small targeted hunks only.`; + } + return r; +} + +/** + * @param {object} opts + * @param {{ invoke: (messages: unknown[]) => Promise<{ content: unknown }> }} opts.llm + * @param {string} opts.baseTex - current file content + * @param {(failureNote: string) => string} opts.buildPrompt - full user prompt; failureNote is '' or PREVIOUS_ATTEMPT block + * @param {string} opts.nodeName + * @param {string} opts.phase + * @param {number} [opts.maxAttempts] + * @param {{ projectRoot: string, jobId: string }} [opts.debug] — saves each attempt under .agent_runs//llm_diff/-/ + * @returns {Promise} merged text after successful patch + */ +export async function runLlmUnifiedDiffWithRetries({ + llm, + baseTex, + buildPrompt, + nodeName, + phase, + maxAttempts = DEFAULT_MAX, + debug, +}) { + let lastFailure = ''; + const runId = Date.now(); + const debugResolved = + isLlmDiffArtifactSaveEnabled() ? resolveDiffDebugDir(debug, nodeName, runId) : null; + let absDebugDir = null; + if (debugResolved) { + absDebugDir = debugResolved.absDir; + await ensureDir(absDebugDir); + await fs.writeFile(path.join(absDebugDir, 'input_main.tex'), baseTex, 'utf8'); + await fs.writeFile( + path.join(absDebugDir, 'README.txt'), + [ + 'OpenPrism unified-diff LLM debug bundle.', + 'input_main.tex — file content before this node ran.', + 'attempt_N_raw.txt — full model reply.', + 'attempt_N_extracted.patch — text passed to applyPatch after extractUnifiedDiff.', + 'attempt_N_meta.json — apply result and retry reasons.', + 'summary.json — written if all attempts fail.', + '', + 'Disable: OPENPRISM_TRANSFER_SAVE_LLM_DIFF=0', + '', + ].join('\n'), + 'utf8', + ); + } + + const attemptSummaries = []; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + const failureNote = lastFailure + ? `\n\nPREVIOUS_ATTEMPT_FAILED: ${retryHintForFailure(lastFailure)}\nReply with ONLY a corrected unified diff; headers --- a/ and +++ b/ must match the instructions.` + : ''; + const prompt = buildPrompt(failureNote); + const response = await llm.invoke([{ role: 'user', content: prompt }]); + const raw = + typeof response.content === 'string' + ? response.content + : Array.isArray(response.content) + ? response.content.map((p) => (typeof p === 'string' ? p : p?.text || '')).join('') + : ''; + + const patchText = extractUnifiedDiff(raw); + const applied = applyUnifiedDiffToMainTex(baseTex, patchText); + + let retryAfterApply = false; + let postApplyReason = ''; + if (applied.ok) { + const { retry, reason } = shouldRetryTexEdit(baseTex, applied.text); + retryAfterApply = retry; + postApplyReason = reason || ''; + } + + if (absDebugDir) { + await persistDiffAttempt(absDebugDir, attempt, { + raw, + patchText, + meta: { + attempt, + ts: new Date().toISOString(), + baseTexLength: baseTex.length, + rawLength: raw.length, + patchLength: patchText.length, + applyOk: applied.ok, + applyReason: applied.ok ? undefined : applied.reason, + postApplyRetry: retryAfterApply, + postApplyReason: retryAfterApply ? postApplyReason : undefined, + }, + }); + } + + attemptSummaries.push({ + attempt, + applyOk: applied.ok, + applyReason: applied.ok ? null : applied.reason, + postApplyRetry: retryAfterApply, + postApplyReason: retryAfterApply ? postApplyReason : null, + }); + + if (!applied.ok) { + lastFailure = applied.reason || 'apply_failed'; + continue; + } + + if (retryAfterApply) { + lastFailure = retryHintForFailure(postApplyReason || 'retry'); + continue; + } + + return applied.text; + } + + const relPath = debugResolved?.relPosix; + if (absDebugDir) { + await fs.writeFile( + path.join(absDebugDir, 'summary.json'), + `${JSON.stringify( + { + nodeName, + phase, + lastFailure: lastFailure || 'unknown', + maxAttempts, + inputTexChars: baseTex.length, + attempts: attemptSummaries, + }, + null, + 2, + )}\n`, + 'utf8', + ); + } + + const detail = lastFailure || 'unknown'; + const inputLen = baseTex.length; + const msg = relPath + ? `[${nodeName}] Unified diff failed after ${maxAttempts} attempt(s): ${detail} — input ${inputLen} chars — LLM outputs saved under ${relPath}/` + : `[${nodeName}] Unified diff failed after ${maxAttempts} attempt(s): ${detail} — input ${inputLen} chars`; + + throw new TransferNodeError(nodeName, phase, detail, msg, relPath, inputLen); +} diff --git a/apps/backend/src/services/transferAgent/neuripsRules.js b/apps/backend/src/services/transferAgent/neuripsRules.js new file mode 100644 index 0000000..aeb8f29 --- /dev/null +++ b/apps/backend/src/services/transferAgent/neuripsRules.js @@ -0,0 +1,65 @@ +import { promises as fs } from 'fs'; +import path from 'path'; +import { REPO_ROOT } from '../../config/constants.js'; + +/** + * Per-venue rules cache: venueId → { content, mtimeMs } + */ +const cache = new Map(); + +/** + * Load venue rules markdown from disk (cached in process memory). + * + * Convention: rules file lives at `${REPO_ROOT}/${venueId}/${venueId}.md` + * e.g. neurips/neurips.md, icml/icml.md + * + * @param {string} venueId — e.g. 'neurips', 'icml', 'cvpr' + * @returns {Promise} + */ +export async function loadVenueRules(venueId) { + const filePath = path.join(REPO_ROOT, venueId, `${venueId}.md`); + try { + const st = await fs.stat(filePath); + const cached = cache.get(venueId); + if (cached && cached.mtimeMs === st.mtimeMs) { + return cached.content; + } + const content = await fs.readFile(filePath, 'utf8'); + cache.set(venueId, { content, mtimeMs: st.mtimeMs }); + return content; + } catch { + return ''; + } +} + +/** + * Synchronous accessor (after warm-up via loadVenueRules). + */ +export function getVenueRulesSync(venueId) { + return cache.get(venueId)?.content || ''; +} + +/** + * Format rules as an LLM prompt block. + */ +export function formatVenueHandbookBlock(venueId, fullMd) { + const label = venueId.toUpperCase(); + if (!fullMd?.trim()) { + return `\n\n[${label} handbook missing on disk — use template comments only.]\n`; + } + return `\n\n--- ${label}_FULL_HANDBOOK (Markdown, authoritative; follow strictly) ---\n${fullMd}\n--- END_${label}_FULL_HANDBOOK ---\n`; +} + +// ────────── Backward-compatible NeurIPS aliases ────────── + +export async function loadNeuripsRulesFull() { + return loadVenueRules('neurips'); +} + +export function getNeuripsRulesSync() { + return getVenueRulesSync('neurips'); +} + +export function formatNeuripsHandbookBlock(fullMd) { + return formatVenueHandbookBlock('neurips', fullMd); +} diff --git a/apps/backend/src/services/transferAgent/nodes/agentGenerator.js b/apps/backend/src/services/transferAgent/nodes/agentGenerator.js new file mode 100644 index 0000000..b9e5724 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/agentGenerator.js @@ -0,0 +1,182 @@ +/** + * agentGenerator — Generator node for the NeurIPS agentic transfer. + * + * The Generator takes the migration plan from the Planner and executes it + * by reading source files, writing/patching target files, and copying assets. + * It operates autonomously through tool calls, deciding the order and strategy + * of modifications (preamble first, then body, then figures, then bibliography, etc.). + * + * Tools available: readFile, writeFile, applyDiff, grepFile, listProjectTree, copyAsset + */ + +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; +import { buildVenueSkillFromState } from '../skills/index.js'; +import { createGeneratorTools } from '../tools/index.js'; +import { NeuripsPhase, progressUpdate } from '../progressMeta.js'; +import { briefToolArgs } from '../utils.js'; + +const MAX_TOOL_ROUNDS = 40; + +/** + * Run the Generator agent. + * + * Receives the migration plan and autonomously executes it through tool calls. + */ +export async function agentGenerator(state, config) { + const iteration = state.currentIteration || 0; + const plan = state.migrationPlan || state.transferPlan || {}; + const lp = config?.configurable?._liveProgress; + + // Build tools + const ctx = { + sourceReadRoot: state.sourceReadRoot || state.sourceProjectRoot, + workspaceRoot: state.workspaceRoot || state.targetProjectRoot, + jobId: state.jobId, + }; + const tools = createGeneratorTools(ctx); + + // Build LLM + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + const llmWithTools = llm.bindTools(tools); + + // Build system prompt + const skill = await buildVenueSkillFromState(state); + + // Build user message + const reviewContext = + iteration > 0 && state.reviewResult + ? `\n\nREVIEWER FEEDBACK FROM PREVIOUS ITERATION: +${JSON.stringify(state.reviewResult, null, 2)} + +Fix the issues identified by the Reviewer. Read the current state of files before making changes.` + : ''; + + const userConfirmations = state.userConfirmations || {}; + const hasConfirmations = Object.keys(userConfirmations).length > 0; + + const venue = (state.transferIntake?.venue || 'neurips').toUpperCase(); + const isMineruMode = state.transferMode === 'mineru'; + const sourceNote = isMineruMode + ? `\nSOURCE MODE: MinerU (PDF → Markdown → LaTeX) +The source content is in Markdown format under _mineru_output/ in the target project. +Read the Markdown files and convert the content to LaTeX for the ${venue} template. +Images from the PDF are also in _mineru_output/ — use copyAsset to move them to the project root if needed.\n` + : ''; + const userMessage = `You are the GENERATOR. Execute the migration plan by reading source files and writing/patching the target ${venue} project. +${sourceNote} +MIGRATION PLAN: +${JSON.stringify(plan, null, 2)} + +${hasConfirmations ? `USER CONFIRMATIONS:\n${JSON.stringify(userConfirmations, null, 2)}\n` : ''} + +Source main file: "${state.sourceMainFile}" +Target main file: "${state.targetMainFile}" +${reviewContext} + +EXECUTION INSTRUCTIONS: +1. First, use listProjectTree("source") and listProjectTree("target") to see what's available +2. Use readFile to read both source and target main .tex files +3. Execute the migration following the CRITICAL CONSTRAINTS in your system prompt. General order: + a. PREAMBLE: Read source preamble → generate venue-compliant preamble per your system prompt rules → writeFile or applyDiff + b. BODY: Read source body → migrate content following section mapping → writeFile or applyDiff + c. FIGURES/TABLES: Normalize figure environments per venue rules (single-column venues: figure*→figure; two-column venues: keep figure* for full-width) + d. ASSETS: Use copyAsset to copy all referenced .bib, .bbl, images, .sty/.cls/.bst files + e. BIBLIOGRAPHY: Align \\cite commands and bibliography mechanism per venue rules in your system prompt + f. BLIND COMPLIANCE (if doubleBlind): Sanitize \\hypersetup{pdfauthor={}}, anonymize identifying content + g. VENUE-SPECIFIC STRUCTURE: Follow any venue-specific structural requirements from your system prompt (e.g. checklist for NeurIPS, impact statement for ICML) +4. After each major step, re-read the file to verify your changes + +STRATEGY NOTES: +- For the initial full migration (iteration 0), prefer writeFile for the complete .tex rewrite +- For subsequent fix iterations, prefer applyDiff for surgical corrections +- Always use applyDiff if you're only changing a few lines +- Always readFile BEFORE writeFile or applyDiff to get the current file state + +When you are done with all modifications, output: +Migration complete. Applied: [brief summary of what was done]`; + + // Run tool-calling loop + const messages = [ + { role: 'system', content: skill }, + { role: 'user', content: userMessage }, + ]; + + let summary = ''; + let toolCallCount = 0; + + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + if (lp) { lp.activeRole = 'generator'; lp.toolName = 'llm'; lp.toolArgs = ''; lp.toolRound = round; lp.maxToolRounds = MAX_TOOL_ROUNDS; lp.lastUpdate = Date.now(); } + const response = await llmWithTools.invoke(messages); + messages.push(response); + + // Check for tool calls + if (response.tool_calls && response.tool_calls.length > 0) { + for (const toolCall of response.tool_calls) { + const tool = tools.find((t) => t.name === toolCall.name); + if (!tool) { + messages.push({ + role: 'tool', + content: `[ERROR] Unknown tool: ${toolCall.name}`, + tool_call_id: toolCall.id, + }); + continue; + } + if (lp) { lp.toolName = toolCall.name; lp.toolArgs = briefToolArgs(toolCall.name, toolCall.args); lp.lastUpdate = Date.now(); } + const result = await tool.invoke(toolCall.args); + toolCallCount++; + messages.push({ + role: 'tool', + content: typeof result === 'string' ? result : JSON.stringify(result), + tool_call_id: toolCall.id, + }); + } + continue; + } + + // No tool calls — check for completion signal + const content = + typeof response.content === 'string' + ? response.content + : Array.isArray(response.content) + ? response.content.map((p) => (typeof p === 'string' ? p : p?.text || '')).join('') + : ''; + + const doneMatch = content.match( + /([\s\S]*?)<\/GENERATOR_DONE>/, + ); + if (doneMatch) { + summary = doneMatch[1].trim(); + break; + } + + // If no done signal and no tool calls, it might be reasoning — let it continue + // but ask it to either use tools or signal completion + if (round > MAX_TOOL_ROUNDS - 5) { + messages.push({ + role: 'user', + content: + 'Please complete your remaining work and output summary when finished.', + }); + } + } + + if (!summary) { + summary = `Generator completed after ${toolCallCount} tool calls (max rounds reached).`; + } + + return { + agentPhase: 'reviewing', + ...progressUpdate( + 'agentGenerator', + NeuripsPhase.agent_generating, + `Iteration ${iteration}: ${summary} (${toolCallCount} tool calls).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/agentPlanner.js b/apps/backend/src/services/transferAgent/nodes/agentPlanner.js new file mode 100644 index 0000000..73f8a02 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/agentPlanner.js @@ -0,0 +1,224 @@ +/** + * agentPlanner — Planner node for the NeurIPS agentic transfer. + * + * The Planner autonomously explores source and target projects using tools, + * then produces a structured migration plan. On subsequent iterations + * (when Reviewer sends back issues), it revises the plan accordingly. + * + * Tools available: readFile, grepFile, listProjectTree, raiseQuestion + */ + +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; +import { buildVenueSkillFromState } from '../skills/index.js'; +import { createReadOnlyTools } from '../tools/index.js'; +import { NeuripsPhase, progressUpdate } from '../progressMeta.js'; +import { briefToolArgs } from '../utils.js'; +import { analyzeSource, buildSourceProfile } from './analyzeSource.js'; +import { analyzeTarget } from './analyzeTarget.js'; + +const MAX_TOOL_ROUNDS = 20; + +/** + * Run the Planner agent. + * + * On iteration 0: performs source/target analysis, then calls LLM with tools + * to explore and produce a migration plan. + * + * On iteration N>0: receives reviewer feedback, revises the plan. + */ +export async function agentPlanner(state, config) { + const iteration = state.currentIteration || 0; + const lp = config?.configurable?._liveProgress; + + // --- First iteration: run source + target analysis --- + let analysisState = {}; + if (iteration === 0) { + // Reuse existing analysis logic (no LLM, pure heuristic) + const sourceResult = await analyzeSource(state); + const targetResult = await analyzeTarget({ ...state, ...sourceResult }); + analysisState = { ...sourceResult, ...targetResult }; + } + + const mergedState = { ...state, ...analysisState }; + + // Build tools with project roots + const ctx = { + sourceReadRoot: mergedState.sourceReadRoot || mergedState.sourceProjectRoot, + workspaceRoot: mergedState.workspaceRoot || mergedState.targetProjectRoot, + jobId: mergedState.jobId, + }; + const tools = createReadOnlyTools(ctx); + + // Build LLM + const { endpoint, apiKey, model } = resolveLLMConfig(mergedState.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + const llmWithTools = llm.bindTools(tools); + + // Build system prompt + const skill = await buildVenueSkillFromState(mergedState); + + // Build user message for this iteration + const isMineruMode = mergedState.transferMode === 'mineru'; + const sourceDesc = isMineruMode + ? `The source content has been parsed from PDF by MinerU into Markdown format. + - Markdown content is available in the target project under _mineru_output/ + - Images extracted from the PDF are also in _mineru_output/ + - You should read the Markdown content and convert it to LaTeX for the target template. + - The source project may also have the original .tex files for reference.` + : `Source main file: "${mergedState.sourceMainFile}"`; + + let userMessage; + if (iteration === 0) { + userMessage = `You are the PLANNER. Your job is to explore the source and target projects and produce a detailed migration plan. + +${isMineruMode ? 'SOURCE MODE: MinerU (PDF → Markdown → LaTeX)\n' : ''}INSTRUCTIONS: +1. Use listProjectTree to see what files exist in both projects +2. Use readFile to examine key files (${isMineruMode ? 'look for Markdown files in _mineru_output/ and' : `source main file: "${mergedState.sourceMainFile}",`} target main file: "${mergedState.targetMainFile}") +3. Analyze the source paper's structure, ${isMineruMode ? 'sections, figures, tables, equations, and references from the Markdown' : 'packages, bibliography mechanism, figures, and special formatting'} +4. Study the target template structure (follow the venue-specific rules in your system prompt) +5. If you need user input on ambiguous decisions (e.g., float strategy, content dropping), use raiseQuestion + +After exploring, output your migration plan as a JSON object wrapped in tags: + + +{ + "sectionMapping": [ + { "sourceSection": "...", "targetSection": "...", "action": "map|merge|create|drop" } + ], + "assetStrategy": { + "bibFiles": ["files to copy"], + "images": ["image files to copy"], + "styles": ["style files to copy"], + "bibCommand": "bibliography|addbibresource|input_bbl" + }, + "preambleStrategy": "description of how to handle preamble migration", + "bodyStrategy": "description of how to handle body migration", + "bibliographyStrategy": "description of bibliography handling", + "blindStrategy": "description of double-blind compliance steps (if applicable)", + "figureStrategy": "description of figure/table normalization", + "risks": ["potential issues to watch for"], + "notes": "any special instructions" +} +`; + } else { + const review = mergedState.reviewResult || {}; + const issues = (review.issues || []) + .map((iss, i) => ` ${i + 1}. [${iss.severity || 'medium'}] ${iss.description}`) + .join('\n'); + const suggestions = (review.suggestions || []).join('\n - '); + + userMessage = `You are the PLANNER (revision iteration ${iteration}). + +The Reviewer found the following issues with the previous migration: + +ISSUES: +${issues || ' (none)'} + +SUGGESTIONS: + - ${suggestions || '(none)'} + +PREVIOUS PLAN: +${JSON.stringify(mergedState.migrationPlan || {}, null, 2)} + +Please revise the migration plan to address these issues. Use tools to inspect the current state of target files if needed. + +Output the revised plan in tags (same JSON format as before).`; + } + + // Run tool-calling loop + const messages = [ + { role: 'system', content: skill }, + { role: 'user', content: userMessage }, + ]; + + let plan = null; + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + if (lp) { lp.activeRole = 'planner'; lp.toolName = 'llm'; lp.toolArgs = ''; lp.toolRound = round; lp.maxToolRounds = MAX_TOOL_ROUNDS; lp.lastUpdate = Date.now(); } + const response = await llmWithTools.invoke(messages); + messages.push(response); + + // Check for tool calls + if (response.tool_calls && response.tool_calls.length > 0) { + for (const toolCall of response.tool_calls) { + const tool = tools.find((t) => t.name === toolCall.name); + if (!tool) { + messages.push({ + role: 'tool', + content: `[ERROR] Unknown tool: ${toolCall.name}`, + tool_call_id: toolCall.id, + }); + continue; + } + if (lp) { lp.toolName = toolCall.name; lp.toolArgs = briefToolArgs(toolCall.name, toolCall.args); lp.lastUpdate = Date.now(); } + const result = await tool.invoke(toolCall.args); + messages.push({ + role: 'tool', + content: typeof result === 'string' ? result : JSON.stringify(result), + tool_call_id: toolCall.id, + }); + } + continue; + } + + // No tool calls — extract plan from response + const content = + typeof response.content === 'string' + ? response.content + : Array.isArray(response.content) + ? response.content.map((p) => (typeof p === 'string' ? p : p?.text || '')).join('') + : ''; + + const planMatch = content.match( + /([\s\S]*?)<\/MIGRATION_PLAN>/, + ); + if (planMatch) { + try { + plan = JSON.parse(planMatch[1].trim()); + } catch { + // Try to extract JSON more aggressively + const { extractJSON } = await import('../utils.js'); + plan = extractJSON(planMatch[1]); + } + } + + if (!plan) { + // Ask the LLM to output the plan properly + messages.push({ + role: 'user', + content: + 'Please output your migration plan as a JSON object inside tags.', + }); + continue; + } + + break; + } + + // Fallback plan if LLM didn't produce one + if (!plan) { + plan = { + sectionMapping: [], + assetStrategy: {}, + notes: 'Planner failed to produce a structured plan after max rounds.', + _plannerError: true, + }; + } + + return { + ...analysisState, + migrationPlan: plan, + transferPlan: plan, // backward compat + agentPhase: 'generating', + ...progressUpdate( + 'agentPlanner', + NeuripsPhase.agent_planning, + `Iteration ${iteration}: migration plan ${plan._plannerError ? 'FAILED' : 'ready'} (${(plan.sectionMapping || []).length} section mappings).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/agentReviewer.js b/apps/backend/src/services/transferAgent/nodes/agentReviewer.js new file mode 100644 index 0000000..ba9baf1 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/agentReviewer.js @@ -0,0 +1,224 @@ +/** + * agentReviewer — Reviewer node for the agentic transfer. + * + * The Reviewer inspects the target project after the Generator has made changes, + * checking for venue compliance, correctness, and completeness. + * It produces a structured review result with verdict ('pass' or 'revise'). + * + * All venue-specific constraints are loaded from reviewerChecklist skill — + * the prompt skeleton here is venue-agnostic. + * + * Tools available: readFile, grepFile, listProjectTree, raiseQuestion + */ + +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; +import { buildVenueSkillFromState } from '../skills/index.js'; +import { buildReviewChecklist } from '../skills/reviewerChecklist.js'; +import { createReviewerTools } from '../tools/index.js'; +import { NeuripsPhase, progressUpdate } from '../progressMeta.js'; +import { extractJSON, briefToolArgs } from '../utils.js'; + +const MAX_TOOL_ROUNDS = 20; + +/** + * Run the Reviewer agent. + * + * Inspects the current state of target files and produces a review verdict. + */ +export async function agentReviewer(state, config) { + const iteration = state.currentIteration || 0; + const intake = state.transferIntake || {}; + const lp = config?.configurable?._liveProgress; + + // Build tools + const ctx = { + sourceReadRoot: state.sourceReadRoot || state.sourceProjectRoot, + workspaceRoot: state.workspaceRoot || state.targetProjectRoot, + jobId: state.jobId, + }; + const tools = createReviewerTools(ctx); + + // Build LLM + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.1, + }); + const llmWithTools = llm.bindTools(tools); + + // Build system prompt (venue-specific skill) + const skill = await buildVenueSkillFromState(state); + + // Load venue-specific review checklist + const venueId = (intake.venue || state.transferGraphKind || 'neurips').toLowerCase(); + const venueUpper = venueId.toUpperCase(); + const checklist = buildReviewChecklist(venueId, { intake }); + + // ── Build user message (venue-agnostic skeleton) ── + + const userMessage = `You are the REVIEWER (iteration ${iteration}). Inspect the target ${venueUpper} project and determine if the migration is complete and correct. + +Target main file: "${state.targetMainFile}" + +REVIEW CHECKLIST — check each item using tools: + +1. STRUCTURE & COMPILATION READINESS: + - \\documentclass{article} (not revtex, amsart, llncs, etc.) + ${checklist.structure} + - \\begin{document} ... \\end{document} present and well-formed + +2. CONTENT COMPLETENESS: + - All source sections mapped to target (compare with source) + - Mathematical content, equations preserved + - \\cite{}, \\ref{}, \\label{} references intact + - No placeholder text like "TODO", "INSERT HERE", "FIXME" in the body + +3. FIGURE/TABLE COMPLIANCE: + ${checklist.figures} + +4. BIBLIOGRAPHY: + - Bibliography mechanism is consistent (bibtex natbib, or \\input{.bbl}) + ${checklist.bibliography} + +5. ASSETS: + - All referenced images exist in target project + - Required .sty/.cls/.bst files present + +${checklist.policy} + +7. ${checklist.blind} + +INSTRUCTIONS: +1. Use readFile to read the target main .tex file +2. Use grepFile to check for specific patterns +3. Use listProjectTree to verify asset files exist +4. Compare key sections with the source if needed +5. If you discover an issue requiring user decision, use raiseQuestion + +After your review, output a JSON result in tags: + + +{ + "verdict": "pass" or "revise", + "issues": [ + { + "category": "structure|content|figures|bibliography|assets|policy|blind", + "severity": "high|medium|low", + "description": "What's wrong", + "suggestion": "How to fix it" + } + ], + "suggestions": ["General improvement suggestions"], + "summary": "Brief overall assessment" +} + + +Rules for verdict: +- "pass" = no high-severity issues, the file is submission-ready +- "revise" = has high or multiple medium-severity issues that must be fixed`; + + // Run tool-calling loop + const messages = [ + { role: 'system', content: skill }, + { role: 'user', content: userMessage }, + ]; + + let reviewResult = null; + + for (let round = 0; round < MAX_TOOL_ROUNDS; round++) { + if (lp) { lp.activeRole = 'reviewer'; lp.toolName = 'llm'; lp.toolArgs = ''; lp.toolRound = round; lp.maxToolRounds = MAX_TOOL_ROUNDS; lp.lastUpdate = Date.now(); } + const response = await llmWithTools.invoke(messages); + messages.push(response); + + // Check for tool calls + if (response.tool_calls && response.tool_calls.length > 0) { + for (const toolCall of response.tool_calls) { + const tool = tools.find((t) => t.name === toolCall.name); + if (!tool) { + messages.push({ + role: 'tool', + content: `[ERROR] Unknown tool: ${toolCall.name}`, + tool_call_id: toolCall.id, + }); + continue; + } + if (lp) { lp.toolName = toolCall.name; lp.toolArgs = briefToolArgs(toolCall.name, toolCall.args); lp.lastUpdate = Date.now(); } + const result = await tool.invoke(toolCall.args); + messages.push({ + role: 'tool', + content: typeof result === 'string' ? result : JSON.stringify(result), + tool_call_id: toolCall.id, + }); + } + continue; + } + + // No tool calls — extract review result + const content = + typeof response.content === 'string' + ? response.content + : Array.isArray(response.content) + ? response.content.map((p) => (typeof p === 'string' ? p : p?.text || '')).join('') + : ''; + + const reviewMatch = content.match( + /([\s\S]*?)<\/REVIEW_RESULT>/, + ); + if (reviewMatch) { + try { + reviewResult = JSON.parse(reviewMatch[1].trim()); + } catch { + reviewResult = extractJSON(reviewMatch[1]); + } + } + + if (!reviewResult) { + messages.push({ + role: 'user', + content: + 'Please output your review result as a JSON object inside tags.', + }); + continue; + } + + break; + } + + // Fallback + if (!reviewResult) { + reviewResult = { + verdict: 'pass', + issues: [], + suggestions: [], + summary: 'Reviewer could not complete structured review; passing by default.', + _reviewerError: true, + }; + } + + // Ensure verdict is valid + if (!['pass', 'revise'].includes(reviewResult.verdict)) { + reviewResult.verdict = reviewResult.issues?.some( + (i) => i.severity === 'high', + ) + ? 'revise' + : 'pass'; + } + + const isPass = reviewResult.verdict === 'pass'; + const nextIteration = isPass ? iteration : iteration + 1; + + return { + reviewResult, + currentIteration: nextIteration, + agentPhase: isPass ? 'finalized' : 'planning', + ...progressUpdate( + 'agentReviewer', + NeuripsPhase.agent_reviewing, + `Iteration ${iteration}: verdict=${reviewResult.verdict}, ${(reviewResult.issues || []).length} issues. ${reviewResult.summary || ''}`, + isPass ? 'info' : 'warn', + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/analyzeSource.js b/apps/backend/src/services/transferAgent/nodes/analyzeSource.js index 50b31a4..d68814c 100644 --- a/apps/backend/src/services/transferAgent/nodes/analyzeSource.js +++ b/apps/backend/src/services/transferAgent/nodes/analyzeSource.js @@ -3,7 +3,7 @@ import path from 'path'; import { getProjectRoot } from '../../projectService.js'; import { safeJoin } from '../../../utils/pathUtils.js'; import { listFilesRecursive } from '../../../utils/fsUtils.js'; -import { isTextFile } from '../../../utils/texUtils.js'; +import { progressUpdate } from '../progressMeta.js'; /** * Recursively resolve \input{} and \include{} references, @@ -87,6 +87,52 @@ function collectAssets(content, allFiles) { return assets; } +/** + * Heuristic profile of LaTeX source (no LLM). + */ +export function buildSourceProfile(content) { + const docMatch = content.match(/\\documentclass(?:\[[^\]]*\])?\{([^}]+)\}/); + const documentclass = docMatch ? docMatch[1].trim() : ''; + + const pkgRe = /\\usepackage(?:\[[^\]]*\])?\{([^}]+)\}/g; + const packages = new Set(); + let m; + while ((m = pkgRe.exec(content)) !== null) { + m[1].split(',').forEach((p) => packages.add(p.trim())); + } + + const twocolumn = /\\documentclass(?:\[[^\]]*twocolumn[^\]]*\])?\{[^}]+\}/.test(content) + || /\\usepackage(?:\[[^\]]*\])?\{twocolumn\}/.test(content); + + const hasBiblatex = packages.has('biblatex'); + const hasNatbib = packages.has('natbib'); + const hasInputBbl = /\\input\s*\{[^}]*\.bbl\}/i.test(content) + || /\\include\s*\{[^}]*\.bbl\}/i.test(content); + const hasBibtexCmd = /\\bibliography\s*\{/.test(content); + + let bibMechanism = 'none'; + if (hasBiblatex) bibMechanism = 'biblatex'; + else if (hasInputBbl) bibMechanism = 'input_bbl'; + else if (hasBibtexCmd || hasNatbib) bibMechanism = 'bibtex_natbib'; + + const figureStar = /\\begin\s*\{\s*figure\*\s*\}/i.test(content); + const tableStar = /\\begin\s*\{\s*table\*\s*\}/i.test(content); + + const revtex = /revtex|revtex4/i.test(documentclass); + + return { + documentclass, + packages: [...packages].sort(), + twocolumn, + figureStar, + tableStar, + revtex, + bibMechanism, + hasNatbib, + hasBiblatex, + }; +} + /** * analyzeSource node — reads source project, resolves inputs, * parses outline, collects assets. @@ -99,12 +145,19 @@ export async function analyzeSource(state) { const fullContent = await resolveInputs(projectRoot, state.sourceMainFile); const outline = parseOutline(fullContent); const assets = collectAssets(fullContent, allFiles); + const sourceProfile = buildSourceProfile(fullContent); return { sourceProjectRoot: projectRoot, + sourceReadRoot: projectRoot, sourceOutline: outline, sourceFullContent: fullContent, sourceAssets: assets, - progressLog: `[analyzeSource] Parsed ${outline.length} sections, found ${assets.bib.length} bib files, ${assets.images.length} images, ${assets.styles.length} style files.`, + sourceProfile, + ...progressUpdate( + 'analyzeSource', + 'source_analysis', + `Parsed ${outline.length} sections; bibMechanism=${sourceProfile.bibMechanism}; class=${sourceProfile.documentclass || '?'}`, + ), }; } diff --git a/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js b/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js index e29bb89..2924fb7 100644 --- a/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js +++ b/apps/backend/src/services/transferAgent/nodes/analyzeTarget.js @@ -3,6 +3,7 @@ import path from 'path'; import { getProjectRoot } from '../../projectService.js'; import { safeJoin } from '../../../utils/pathUtils.js'; import { listFilesRecursive } from '../../../utils/fsUtils.js'; +import { progressUpdate } from '../progressMeta.js'; /** * Recursively resolve \input{} and \include references. @@ -72,9 +73,14 @@ export async function analyzeTarget(state) { return { targetProjectRoot: projectRoot, + workspaceRoot: projectRoot, targetOutline: outline, targetPreamble: preamble, targetTemplateContent: fullContent, - progressLog: `[analyzeTarget] Template has ${outline.length} sections. Preamble length: ${preamble.length} chars.`, + ...progressUpdate( + 'analyzeTarget', + 'source_analysis', + `Template ${outline.length} sections; preamble ${preamble.length} chars.`, + ), }; } diff --git a/apps/backend/src/services/transferAgent/nodes/applyTransfer.js b/apps/backend/src/services/transferAgent/nodes/applyTransfer.js index 0c67c88..e93c2d2 100644 --- a/apps/backend/src/services/transferAgent/nodes/applyTransfer.js +++ b/apps/backend/src/services/transferAgent/nodes/applyTransfer.js @@ -23,7 +23,7 @@ SOURCE CONTENT (full): ${state.sourceFullContent} RULES: -1. Keep the target preamble (everything before \\begin{document}) EXACTLY as-is +1. Keep the target preamble (everything before \\begin{document}) EXACTLY as-is — do NOT change \\documentclass, \\usepackage for the venue style, or any template-specific commands 2. Only modify content between \\begin{document} and \\end{document} 3. Follow the section mapping in the migration plan 4. Preserve ALL \\cite{}, \\ref{}, \\label{} commands from the source @@ -59,7 +59,7 @@ ${state.targetTemplateContent} ${imageList || '(none)'} ## RULES: -1. Keep the target preamble (everything before \\begin{document}) EXACTLY as-is +1. Keep the target preamble (everything before \\begin{document}) EXACTLY as-is — do NOT change \\documentclass, \\usepackage for the venue style, or any template-specific commands 2. Only modify content between \\begin{document} and \\end{document} 3. Map Markdown headings to the corresponding \\section{}, \\subsection{} etc. in the template 4. Formulas in the Markdown are already in LaTeX format ($...$ or $$...$$) — preserve them as-is diff --git a/apps/backend/src/services/transferAgent/nodes/compile.js b/apps/backend/src/services/transferAgent/nodes/compile.js index 1523818..e510031 100644 --- a/apps/backend/src/services/transferAgent/nodes/compile.js +++ b/apps/backend/src/services/transferAgent/nodes/compile.js @@ -1,4 +1,5 @@ import { runCompile } from '../../compileService.js'; +import { progressUpdate } from '../progressMeta.js'; /** * compile node — runs LaTeX compilation on the target project @@ -12,10 +13,19 @@ export async function compile(state) { }); const attempt = (state.compileAttempt || 0) + 1; + const msg = `Attempt ${attempt}: ${result.ok ? 'SUCCESS' : 'FAILED'} (exit ${result.status}).`; + + if (state.transferGraphKind === 'neurips') { + return { + compileResult: result, + compileAttempt: attempt, + ...progressUpdate('compile', 'compile', msg, result.ok ? 'info' : 'warn'), + }; + } return { compileResult: result, compileAttempt: attempt, - progressLog: `[compile] Attempt ${attempt}: ${result.ok ? 'SUCCESS' : 'FAILED'} (exit ${result.status}).`, + progressLog: `[compile] ${msg}`, }; } diff --git a/apps/backend/src/services/transferAgent/nodes/compileSource.js b/apps/backend/src/services/transferAgent/nodes/compileSource.js index 0bfd592..e2ea8a4 100644 --- a/apps/backend/src/services/transferAgent/nodes/compileSource.js +++ b/apps/backend/src/services/transferAgent/nodes/compileSource.js @@ -10,7 +10,15 @@ import { ensureDir } from '../../../utils/fsUtils.js'; * (e.g. user uploaded a PDF directly). */ export async function compileSource(state) { - const sourceProjectRoot = state.sourceProjectId ? await getProjectRoot(state.sourceProjectId) : undefined; + let sourceProjectRoot; + if (state.sourceProjectId) { + try { + sourceProjectRoot = await getProjectRoot(state.sourceProjectId); + } catch { + // Source project not found — not fatal if we have a PDF + sourceProjectRoot = undefined; + } + } // If user uploaded a PDF directly, skip compilation if (state.sourcePdfPath) { diff --git a/apps/backend/src/services/transferAgent/nodes/copyAssets.js b/apps/backend/src/services/transferAgent/nodes/copyAssets.js index b0f9cd1..928e31c 100644 --- a/apps/backend/src/services/transferAgent/nodes/copyAssets.js +++ b/apps/backend/src/services/transferAgent/nodes/copyAssets.js @@ -38,7 +38,7 @@ async function copySingleAsset(srcRoot, destRoot, relPath) { } /** - * Legacy mode: copy bib files, images, and style files from source project. + * Legacy mode: copy bib/bbl files, images, and style files from source project. */ async function copyAssetsLegacy(state) { const assets = state.sourceAssets || {}; @@ -49,6 +49,19 @@ async function copyAssetsLegacy(state) { results.push(r); } + // Copy .bbl files + if (state.sourceProjectRoot) { + const allFiles = await listFilesRecursive(state.sourceProjectRoot); + const bblFiles = allFiles + .filter(f => f.type === 'file' && path.extname(f.path).toLowerCase() === '.bbl') + .map(f => f.path); + + for (const bbl of bblFiles) { + const r = await copySingleAsset(state.sourceProjectRoot, state.targetProjectRoot, bbl); + results.push(r); + } + } + for (const img of (assets.images || [])) { const r = await copySingleAsset(state.sourceProjectRoot, state.targetProjectRoot, img); results.push(r); @@ -73,7 +86,7 @@ async function copyAssetsLegacy(state) { /** * MinerU mode: copy MinerU-extracted images to target project images/ dir, - * and optionally copy bib files from source project if available. + * and optionally copy bib/bbl files from source project if available. */ async function copyAssetsMineru(state) { const images = state.sourceImages || []; @@ -110,8 +123,24 @@ async function copyAssetsMineru(state) { } } + // Copy .bbl files from source project if available + let bblCount = 0; + if (state.sourceProjectRoot) { + const allFiles = await listFilesRecursive(state.sourceProjectRoot); + const bblFiles = allFiles + .filter(f => f.type === 'file' && path.extname(f.path).toLowerCase() === '.bbl') + .map(f => f.path); + + for (const bbl of bblFiles) { + const r = await copySingleAsset( + state.sourceProjectRoot, state.targetProjectRoot, bbl + ); + if (r.status === 'copied') bblCount++; + } + } + return { - progressLog: `[copyAssets:mineru] Copied ${copiedCount} images, ${bibCount} bib files.`, + progressLog: `[copyAssets:mineru] Copied ${copiedCount} images, ${bibCount} bib files, ${bblCount} bbl files.`, }; } diff --git a/apps/backend/src/services/transferAgent/nodes/draftPlan.js b/apps/backend/src/services/transferAgent/nodes/draftPlan.js index b053141..5b4da47 100644 --- a/apps/backend/src/services/transferAgent/nodes/draftPlan.js +++ b/apps/backend/src/services/transferAgent/nodes/draftPlan.js @@ -1,6 +1,8 @@ import { ChatOpenAI } from '@langchain/openai'; import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; import { invokeLLMForJSON } from '../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../neuripsRules.js'; +import { progressUpdate } from '../progressMeta.js'; /** * draftPlan node — LLM generates a structured transfer plan @@ -16,6 +18,30 @@ export async function draftPlan(state) { temperature: 0.2, }); + const isNeurips = state.transferGraphKind === 'neurips'; + const handbook = isNeurips + ? formatNeuripsHandbookBlock(await loadNeuripsRulesFull()) + : ''; + + const extraNeurips = isNeurips + ? ` +SOURCE_PROFILE (heuristic JSON): +${JSON.stringify(state.sourceProfile || {}, null, 2)} + +TRANSFER_INTAKE: +${JSON.stringify(state.transferIntake || {}, null, 2)} +${handbook} +` + : ''; + + const neuripsStructure = isNeurips + ? `, + "dependencies": ["ordered strings, e.g. natbib before cite fixes"], + "humanReview": ["items needing author judgment"], + "preambleNotes": "short preamble migration notes", + "bodyNotes": "short body migration notes"` + : ''; + const prompt = `You are a LaTeX template migration planner. Given a SOURCE paper outline and a TARGET template outline, produce a JSON migration plan. @@ -31,6 +57,7 @@ ${JSON.stringify(state.sourceAssets, null, 2)} TARGET PREAMBLE (first 2000 chars): ${(state.targetPreamble || '').slice(0, 2000)} +${extraNeurips} Produce a JSON object with this structure: { @@ -42,7 +69,7 @@ Produce a JSON object with this structure: "images": ["copy list"], "bibCommand": "bibliography|addbibresource" }, - "notes": "any special instructions for the migration" + "notes": "any special instructions for the migration"${neuripsStructure} } Rules: @@ -50,13 +77,17 @@ Rules: - If target has no matching section, use action "create" - If source section has no place in target, use action "drop" (rare) - Preserve all citations, references, labels, and figure/table environments -- Keep the target preamble unchanged +${isNeurips ? '- Follow NeurIPS handbook above for anonymous mode, floats, bibliography, and page limits' : '- Keep the target preamble unchanged'} - Output ONLY valid JSON, no markdown fences`; const planSchema = { sectionMapping: { type: 'array', required: true }, - assetStrategy: { type: 'object', required: true }, - notes: { type: 'string', required: false }, + assetStrategy: { type: 'object', required: true }, + notes: { type: 'string', required: false }, + dependencies: { type: 'array', required: false }, + humanReview: { type: 'array', required: false }, + preambleNotes: { type: 'string', required: false }, + bodyNotes: { type: 'string', required: false }, }; const { parsed, raw, retries } = await invokeLLMForJSON( @@ -70,6 +101,10 @@ Rules: return { transferPlan: plan, - progressLog: `[draftPlan] Generated migration plan with ${plan.sectionMapping?.length || 0} section mappings${retryNote}.`, + ...progressUpdate( + 'draftPlan', + 'migration_plan', + `Generated migration plan with ${plan.sectionMapping?.length || 0} section mappings${retryNote}.`, + ), }; } diff --git a/apps/backend/src/services/transferAgent/nodes/finalize.js b/apps/backend/src/services/transferAgent/nodes/finalize.js index f0387be..ecba67e 100644 --- a/apps/backend/src/services/transferAgent/nodes/finalize.js +++ b/apps/backend/src/services/transferAgent/nodes/finalize.js @@ -1,19 +1,57 @@ +import { NeuripsPhase, progressUpdate } from '../progressMeta.js'; + +/** + * Venue-specific bundle notes for the user. + */ +const VENUE_BUNDLE_NOTES = { + neurips: [ + 'NeurIPS 流程在服务端编译前结束;请在本地用 pdflatex/bibtex 等自行生成 PDF。', + '提交材料:main.tex、neurips_2026.sty、checklist.tex、插图、所用 .bib 或 .bbl。', + '勿上传:.aux、.log、.out、.synctex.gz、.compile/、.agent_runs/', + ].join(' '), + icml: [ + 'ICML 流程在服务端编译前结束;请在本地用 pdflatex/bibtex 等自行生成 PDF。', + '提交材料:main.tex、icml2026.sty、icml2026.bst、插图、所用 .bib 或 .bbl。', + '勿上传:.aux、.log、.out、.synctex.gz、.compile/、.agent_runs/', + ].join(' '), +}; + /** * finalize node — sets final status and collects results. */ export async function finalize(state) { + const venue = (state.transferIntake?.venue || state.transferGraphKind || 'legacy').toLowerCase(); + const isAgentVenue = ['neurips', 'icml', 'cvpr', 'acl'].includes(venue); const compileOk = state.compileResult?.ok || false; const hasPdf = !!state.compileResult?.pdf; - const finalStatus = compileOk && hasPdf ? 'success' : 'failed'; - const error = !hasPdf - ? (state.compileResult?.error || 'No PDF generated after all attempts.') - : undefined; + const finalStatus = isAgentVenue + ? 'success' + : compileOk && hasPdf + ? 'success' + : 'failed'; + const error = isAgentVenue + ? undefined + : !hasPdf + ? (state.compileResult?.error || 'No PDF generated after all attempts.') + : undefined; + + const bundleNotes = VENUE_BUNDLE_NOTES[venue] || ''; + + const summaryMsg = isAgentVenue + ? `Transfer ${finalStatus} (no server compile). ${bundleNotes}` + : `Transfer ${finalStatus}. Compile attempts: ${state.compileAttempt}, Layout attempts: ${state.layoutAttempt}.${bundleNotes ? ` ${bundleNotes}` : ''}`; return { status: finalStatus, finalPdf: state.compileResult?.pdf || '', error, - progressLog: `[finalize] Transfer ${finalStatus}. Compile attempts: ${state.compileAttempt}, Layout attempts: ${state.layoutAttempt}.`, + bundleNotes, + ...progressUpdate( + 'finalize', + NeuripsPhase.finalize, + summaryMsg, + finalStatus === 'success' ? 'info' : 'error', + ), }; } diff --git a/apps/backend/src/services/transferAgent/nodes/fixCompile.js b/apps/backend/src/services/transferAgent/nodes/fixCompile.js index 8d1e1da..45562f7 100644 --- a/apps/backend/src/services/transferAgent/nodes/fixCompile.js +++ b/apps/backend/src/services/transferAgent/nodes/fixCompile.js @@ -3,6 +3,8 @@ import { ChatOpenAI } from '@langchain/openai'; import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; import { safeJoin } from '../../../utils/pathUtils.js'; import { writeFileWithSnapshot, stripCodeFences } from '../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../neuripsRules.js'; +import { progressUpdate } from '../progressMeta.js'; const MAX_LOG_TAIL = 8000; @@ -24,15 +26,30 @@ export async function fixCompile(state) { temperature: 0.2, }); + // Determine venue context so the LLM doesn't switch templates + const venue = state.transferIntake?.venue || state.transferGraphKind || 'unknown'; + const venueConstraint = ` +CRITICAL: This paper targets the "${venue.toUpperCase()}" venue. +- Do NOT change the \\usepackage{} for the venue style (e.g. icml2026, neurips_2026). +- Do NOT switch from one venue template to another. +- If a .sty file is missing, do NOT replace it with a different venue's .sty. +- Only fix actual LaTeX errors; preserve the venue template structure. +`; + + const neuripsBlock = state.transferGraphKind === 'neurips' + ? formatNeuripsHandbookBlock(await loadNeuripsRulesFull()) + : ''; + const prompt = `You are a LaTeX compilation error fixer. The following LaTeX file failed to compile. Fix the errors and return the corrected COMPLETE file. - +${venueConstraint} COMPILE LOG (last ${MAX_LOG_TAIL} chars): ${log} CURRENT FILE (${state.targetMainFile}): ${currentTex} +${neuripsBlock} Common fixes: - Missing packages: add \\usepackage{...} in preamble @@ -53,6 +70,16 @@ Output ONLY the complete corrected LaTeX file. No explanations, no markdown fenc state.jobId ); + if (state.transferGraphKind === 'neurips') { + return { + ...progressUpdate( + 'fixCompile', + 'compile', + `Applied LLM fix for compile attempt ${state.compileAttempt}.`, + ), + }; + } + return { progressLog: `[fixCompile] Applied LLM fix for compile attempt ${state.compileAttempt}.`, }; diff --git a/apps/backend/src/services/transferAgent/nodes/fixLayout.js b/apps/backend/src/services/transferAgent/nodes/fixLayout.js index 05e6ff5..c6f6caf 100644 --- a/apps/backend/src/services/transferAgent/nodes/fixLayout.js +++ b/apps/backend/src/services/transferAgent/nodes/fixLayout.js @@ -3,6 +3,7 @@ import { ChatOpenAI } from '@langchain/openai'; import { resolveLLMConfig, normalizeBaseURL } from '../../llmService.js'; import { safeJoin } from '../../../utils/pathUtils.js'; import { writeFileWithSnapshot, stripCodeFences } from '../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../neuripsRules.js'; /** * fixLayout node — LLM reads current main.tex + VLM layout issues, @@ -23,6 +24,10 @@ export async function fixLayout(state) { temperature: 0.2, }); + const neuripsBlock = state.transferGraphKind === 'neurips' + ? formatNeuripsHandbookBlock(await loadNeuripsRulesFull()) + : ''; + const prompt = `You are a LaTeX layout fixer. The following LaTeX file has layout issues identified by visual inspection. @@ -33,6 +38,7 @@ ${issuesText} CURRENT FILE (${state.targetMainFile}): ${currentTex} +${neuripsBlock} Common layout fixes: - Overflow: adjust figure width, use \\resizebox, or \\adjustbox diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/applyBibliography.js b/apps/backend/src/services/transferAgent/nodes/neurips/applyBibliography.js new file mode 100644 index 0000000..50250d6 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/applyBibliography.js @@ -0,0 +1,72 @@ +import { promises as fs } from 'fs'; +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../../llmService.js'; +import { safeJoin } from '../../../../utils/pathUtils.js'; +import { writeFileWithSnapshot } from '../../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../../neuripsRules.js'; +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; +import { + mainTexDiffInstructions, + runLlmUnifiedDiffWithRetries, +} from '../../llmUnifiedDiff.js'; + +export async function applyBibliography(state) { + const root = state.workspaceRoot || state.targetProjectRoot; + const rel = state.targetMainFile; + const abs = safeJoin(root, rel); + const currentTex = await fs.readFile(abs, 'utf8'); + const virtualPath = rel.replace(/\\/g, '/'); + + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + + const handbook = formatNeuripsHandbookBlock(await loadNeuripsRulesFull()); + const diffInstr = mainTexDiffInstructions(virtualPath); + const basePrompt = `Fix bibliography / citations block for NeurIPS 2026. + +CRITICAL RULES: +1. NeurIPS uses NUMERIC citations [1,2,3], NOT author-year (Author [2007]). +2. The file MUST contain "\\\\PassOptionsToPackage{numbers,compress,sort}{natbib}" BEFORE "\\\\documentclass". + If it is missing or commented out, ADD it before \\\\documentclass. +3. Use \\\\bibliographystyle{unsrtnat} (NOT plainnat, which defaults to author-year). +4. Keep \\\\bibliography{...} pointing to the correct .bib file name. +5. If no .bib file exists and a .bbl file is present, that is fine — LaTeX will use the .bbl directly. + +USER_CONFIRMATIONS_JSON: +${JSON.stringify(state.userConfirmations || {})} + +SOURCE_PROFILE_JSON: +${JSON.stringify(state.sourceProfile || {}, null, 2)} + +CURRENT_FILE: +${currentTex} +${handbook} + +Align \\\\cite with the bibliography mechanism chosen; keep \\\\input{checklist.tex} and ack/references structure valid.${diffInstr}`; + + const merged = await runLlmUnifiedDiffWithRetries({ + llm, + baseTex: currentTex, + buildPrompt: (failureNote) => basePrompt + (failureNote || ''), + nodeName: 'applyBibliography', + phase: NeuripsPhase.bibliography, + maxAttempts: 3, + debug: { projectRoot: root, jobId: state.jobId }, + }); + + await writeFileWithSnapshot(root, rel, merged, state.jobId); + + return { + lastGoodPhase: 'bib', + ...progressUpdate( + 'applyBibliography', + NeuripsPhase.bibliography, + `Bibliography pass (unified diff applied, ${merged.length} chars).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/applyBody.js b/apps/backend/src/services/transferAgent/nodes/neurips/applyBody.js new file mode 100644 index 0000000..053d317 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/applyBody.js @@ -0,0 +1,74 @@ +import { promises as fs } from 'fs'; +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../../llmService.js'; +import { safeJoin } from '../../../../utils/pathUtils.js'; +import { + writeFileWithSnapshot, + stripCodeFences, + splitTexDocument, + mergeTexDocument, +} from '../../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../../neuripsRules.js'; +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +export async function applyBody(state) { + const root = state.workspaceRoot || state.targetProjectRoot; + const rel = state.targetMainFile; + const abs = safeJoin(root, rel); + const currentTex = await fs.readFile(abs, 'utf8'); + const srcParts = splitTexDocument(state.sourceFullContent || ''); + const tgtParts = splitTexDocument(currentTex); + + if (!tgtParts.hasDocument || !srcParts.hasDocument) { + return { + ...progressUpdate('applyBody', NeuripsPhase.body, 'Missing document environment; skipped.'), + }; + } + + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + + const handbook = formatNeuripsHandbookBlock(await loadNeuripsRulesFull()); + const prompt = `You migrate the DOCUMENT BODY to NeurIPS 2026 structure. + +USER_CONFIRMATIONS_JSON: +${JSON.stringify(state.userConfirmations || {})} + +MIGRATION_PLAN_JSON: +${JSON.stringify(state.transferPlan || {}, null, 2)} + +SOURCE_BODY (\\begin{document}...\\end{document}): +${srcParts.body} + +CURRENT_TARGET_FILE (full, for reference of checklist/ack placement): +${currentTex} +${handbook} + +Output ONLY the document body block: from \\begin{document} through \\end{document} inclusive. Map sections per plan. Preserve all \\\\cite{}, \\\\ref{}, \\\\label{} and substantive math/figures/tables. Follow NeurIPS abstract (one paragraph) and sectioning rules from the handbook. No markdown fences.`; + + const response = await llm.invoke([{ role: 'user', content: prompt }]); + let newBody = stripCodeFences( + typeof response.content === 'string' ? response.content : '', + ).trim(); + + if (!newBody.includes('\\begin{document}')) { + newBody = `\\begin{document}\n\n${newBody}\n\n\\end{document}`; + } + + const merged = mergeTexDocument(tgtParts.preamble, newBody, tgtParts.tail); + await writeFileWithSnapshot(root, rel, merged, state.jobId); + + return { + lastGoodPhase: 'body', + ...progressUpdate( + 'applyBody', + NeuripsPhase.body, + `Wrote document body (${newBody.length} chars).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/applyPreamble.js b/apps/backend/src/services/transferAgent/nodes/neurips/applyPreamble.js new file mode 100644 index 0000000..62b1c48 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/applyPreamble.js @@ -0,0 +1,107 @@ +import { promises as fs } from 'fs'; +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../../llmService.js'; +import { safeJoin } from '../../../../utils/pathUtils.js'; +import { + writeFileWithSnapshot, + stripCodeFences, + splitTexDocument, + mergeTexDocument, +} from '../../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../../neuripsRules.js'; +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +export async function applyPreamble(state) { + const root = state.workspaceRoot || state.targetProjectRoot; + const rel = state.targetMainFile; + const abs = safeJoin(root, rel); + const currentTex = await fs.readFile(abs, 'utf8'); + const srcParts = splitTexDocument(state.sourceFullContent || ''); + const tgtParts = splitTexDocument(currentTex); + + if (!tgtParts.hasDocument) { + return { + ...progressUpdate('applyPreamble', NeuripsPhase.preamble, 'Target missing \\begin{document}; skipped preamble merge.'), + }; + } + + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + + const handbook = formatNeuripsHandbookBlock(await loadNeuripsRulesFull()); + const intake = state.transferIntake || {}; + const isDoubleBlind = intake.doubleBlind !== false; + const isPreprint = !!intake.preprint; + const neuripsOption = isPreprint ? '[preprint]' : '[main]'; + const prompt = `You migrate a LaTeX preamble to NeurIPS 2026 (see handbook below). + +SUBMISSION MODE: +- doubleBlind: ${isDoubleBlind} +- preprint: ${isPreprint} +- THEREFORE: use \\\\usepackage${neuripsOption}{neurips_2026} +${isDoubleBlind ? '- MUST use [main] option (gives line numbers + anonymous mode). Do NOT use [preprint].' : '- Using [preprint] option (non-anonymous, no line numbers).'} +- MUST add \\\\PassOptionsToPackage{numbers,compress,sort}{natbib} BEFORE \\\\documentclass for numeric citations [1,2,3] + +USER_CONFIRMATIONS_JSON: +${JSON.stringify(state.userConfirmations || {})} + +MIGRATION_PLAN_JSON: +${JSON.stringify(state.transferPlan || {}, null, 2)} + +SOURCE_PROFILE_JSON: +${JSON.stringify(state.sourceProfile || {}, null, 2)} + +SOURCE_PREAMBLE_ONLY: +${srcParts.preamble || '(empty)'} + +CURRENT_TARGET_FILE: +${currentTex} +${handbook} + +Output ONLY the new preamble: from \\documentclass through the line immediately before \\begin{document}. Do NOT output \\begin{document} or anything after it. No markdown fences.`; + + const response = await llm.invoke([{ role: 'user', content: prompt }]); + let newPreamble = stripCodeFences( + typeof response.content === 'string' ? response.content : '', + ).trim(); + + if (newPreamble.includes('\\begin{document}')) { + newPreamble = newPreamble.split('\\begin{document}')[0].trimEnd(); + } + + // ---- Deterministic post-processing (do NOT rely on LLM for these) ---- + + // 1. Force correct neurips_2026 package option based on submission mode + const correctOption = isPreprint ? '[preprint]' : '[main]'; + // Match any \usepackage[...]{neurips_2026} or \usepackage{neurips_2026} + newPreamble = newPreamble.replace( + /\\usepackage(?:\s*\[[^\]]*\])?\s*\{neurips_2026\}/, + `\\usepackage${correctOption}{neurips_2026}`, + ); + + // 2. Ensure \PassOptionsToPackage{numbers,compress,sort}{natbib} exists before \documentclass + if (!/\\PassOptionsToPackage\s*\{[^}]*numbers[^}]*\}\s*\{natbib\}/.test(newPreamble)) { + // Insert before \documentclass + newPreamble = newPreamble.replace( + /(\\documentclass)/, + '\\PassOptionsToPackage{numbers,compress,sort}{natbib}\n$1', + ); + } + + const merged = mergeTexDocument(newPreamble, tgtParts.body, tgtParts.tail); + await writeFileWithSnapshot(root, rel, merged, state.jobId); + + return { + lastGoodPhase: 'preamble', + ...progressUpdate( + 'applyPreamble', + NeuripsPhase.preamble, + `Wrote preamble (${newPreamble.length} chars); body preserved for next step.`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/blindConfirmBypass.js b/apps/backend/src/services/transferAgent/nodes/neurips/blindConfirmBypass.js new file mode 100644 index 0000000..4db383a --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/blindConfirmBypass.js @@ -0,0 +1,10 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +/** Used when blind QA is skipped — avoids interrupt-before consumeConfirmBlind. */ +export async function blindConfirmBypass() { + return progressUpdate( + 'blindConfirmBypass', + NeuripsPhase.compile, + 'Skipped consumeConfirmBlind (no blind QA).', + ); +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmBlind.js b/apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmBlind.js new file mode 100644 index 0000000..e2a9955 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmBlind.js @@ -0,0 +1,13 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +export async function consumeConfirmBlind(state) { + return { + pendingQA: null, + status: 'running', + ...progressUpdate( + 'consumeConfirmBlind', + NeuripsPhase.compile, + 'Blind confirmations recorded; proceeding to compile.', + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmPlan.js b/apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmPlan.js new file mode 100644 index 0000000..822b858 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/consumeConfirmPlan.js @@ -0,0 +1,18 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +/** + * Runs after user submits answers via /api/transfer/submit-confirm + */ +export async function consumeConfirmPlan(state) { + const answers = state.userConfirmations || {}; + const keys = Object.keys(answers); + return { + pendingQA: null, + status: 'running', + ...progressUpdate( + 'consumeConfirmPlan', + NeuripsPhase.migration_plan, + `Applied ${keys.length} confirmation answer(s).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/intake.js b/apps/backend/src/services/transferAgent/nodes/neurips/intake.js new file mode 100644 index 0000000..32b840f --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/intake.js @@ -0,0 +1,16 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; +import { loadNeuripsRulesFull } from '../../neuripsRules.js'; + +export async function intake(state) { + if (state.transferGraphKind === 'neurips') { + await loadNeuripsRulesFull(); + } + const t = state.transferIntake || {}; + return { + ...progressUpdate( + 'intake', + NeuripsPhase.intake, + `venue=${t.venue || 'neurips'} preprint=${!!t.preprint} doubleBlind=${t.doubleBlind !== false}`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/normalizeFigures.js b/apps/backend/src/services/transferAgent/nodes/neurips/normalizeFigures.js new file mode 100644 index 0000000..d79ebf5 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/normalizeFigures.js @@ -0,0 +1,313 @@ +import { promises as fs } from 'fs'; +import path from 'path'; +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../../llmService.js'; +import { safeJoin } from '../../../../utils/pathUtils.js'; +import { writeFileWithSnapshot } from '../../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../../neuripsRules.js'; +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; +import { + mainTexDiffInstructions, + runLlmUnifiedDiffWithRetries, +} from '../../llmUnifiedDiff.js'; + +/* ------------------------------------------------------------------ */ +/* Lightweight figure / layout measurement (no external binaries) */ +/* ------------------------------------------------------------------ */ + +/** Known column-width (pt) for common document classes. */ +const LAYOUT_DB = { + neurips: { textwidthPt: 396, columnwidthPt: 396, columns: 1 }, + article: { textwidthPt: 345, columnwidthPt: 345, columns: 1 }, + 'revtex4-1': { textwidthPt: 510, columnwidthPt: 246, columns: 2 }, + 'revtex4-2': { textwidthPt: 510, columnwidthPt: 246, columns: 2 }, + revtex: { textwidthPt: 510, columnwidthPt: 246, columns: 2 }, + IEEEtran: { textwidthPt: 516, columnwidthPt: 252, columns: 2 }, + llncs: { textwidthPt: 336, columnwidthPt: 336, columns: 1 }, + acmart: { textwidthPt: 506, columnwidthPt: 241, columns: 2 }, + cvpr: { textwidthPt: 496, columnwidthPt: 237, columns: 2 }, + icml: { textwidthPt: 487, columnwidthPt: 233, columns: 2 }, +}; + +/** Read PDF MediaBox from the first 8 KB of the file. */ +async function pdfPageSize(filePath) { + try { + const fd = await fs.open(filePath, 'r'); + const buf = Buffer.alloc(8192); + await fd.read(buf, 0, 8192, 0); + await fd.close(); + const str = buf.toString('latin1'); + const m = str.match(/\/MediaBox\s*\[\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\]/); + if (m) { + const w = parseFloat(m[3]) - parseFloat(m[1]); + const h = parseFloat(m[4]) - parseFloat(m[2]); + if (w > 0 && h > 0) return { widthPt: Math.round(w * 10) / 10, heightPt: Math.round(h * 10) / 10 }; + } + } catch { /* ignore */ } + return null; +} + +/** Resolve source layout: handle twocolumn flag overriding a single-col class. */ +function resolveSourceLayout(sourceProfile) { + const cls = (sourceProfile?.documentclass || '').toLowerCase(); + let layout = LAYOUT_DB[cls] || null; + + // Check if twocolumn was set explicitly even though the class DB entry is single-column + if (layout && sourceProfile?.twocolumn && layout.columns === 1) { + layout = { + ...layout, + columnwidthPt: Math.round((layout.textwidthPt - 20) / 2), + columns: 2, + }; + } + // If no DB entry but twocolumn is true, fall back to a reasonable guess + if (!layout && sourceProfile?.twocolumn) { + layout = { textwidthPt: 500, columnwidthPt: 240, columns: 2 }; + } + return layout; +} + +/** + * Collect all \includegraphics from the tex, measure each image file, + * and compute recommended widths for the target layout. + */ +async function measureAllFigures(texContent, workspaceRoot, sourceProfile, venue) { + const srcLayout = resolveSourceLayout(sourceProfile); + const tgtLayout = LAYOUT_DB[(venue || 'neurips').toLowerCase()] || LAYOUT_DB.neurips; + + // Parse every \includegraphics[...]{file} + const figRe = /\\begin\s*\{\s*figure(\*?)\s*\}[\s\S]*?\\includegraphics(?:\[([^\]]*)\])?\{([^}]+)\}[\s\S]*?\\end\s*\{\s*figure\*?\s*\}/g; + const figures = []; + let m; + while ((m = figRe.exec(texContent)) !== null) { + const isStar = m[1] === '*'; + const opts = m[2] || ''; + const file = m[3].trim(); + figures.push({ file, opts, isStar }); + } + + if (figures.length === 0) return null; + + const measurements = []; + for (const fig of figures) { + const absPath = safeJoin(workspaceRoot, fig.file); + let naturalSize = null; + const ext = path.extname(fig.file).toLowerCase(); + if (ext === '.pdf') { + naturalSize = await pdfPageSize(absPath); + } + + // Source effective width = columnwidth for normal figure, textwidth for figure* + const srcEffective = srcLayout + ? (fig.isStar ? srcLayout.textwidthPt : srcLayout.columnwidthPt) + : null; + const tgtLinewidth = tgtLayout.columnwidthPt; + + let recommendedSpec = '\\linewidth'; + let reason = ''; + + if (srcEffective && tgtLinewidth) { + const ratio = srcEffective / tgtLinewidth; + + if (ratio < 0.75) { + // Source figure was narrower than NeurIPS \linewidth + const r = Math.round(ratio * 100) / 100; + recommendedSpec = `${r}\\linewidth`; + reason = `source colwidth ${Math.round(srcEffective)}pt < target ${Math.round(tgtLinewidth)}pt → scale to ${r}\\linewidth`; + } else if (ratio <= 1.05) { + recommendedSpec = '\\linewidth'; + reason = 'source and target widths similar → \\linewidth is fine'; + } else { + // Source was wider (figure* in twocolumn or wide class) + // Scale down to avoid overflow; cap at \linewidth + recommendedSpec = '\\linewidth'; + reason = `source was wider (${Math.round(srcEffective)}pt) but capped at \\linewidth (${Math.round(tgtLinewidth)}pt)`; + } + } + + // Height check: will the figure be taller than 60% of the page? + let heightWarning = ''; + if (naturalSize && tgtLinewidth) { + // If using recommended width, what is the resulting height? + let usedWidth = tgtLinewidth; + const ratioMatch = recommendedSpec.match(/([\d.]+)\\linewidth/); + if (ratioMatch) usedWidth = parseFloat(ratioMatch[1]) * tgtLinewidth; + const scaledHeight = naturalSize.heightPt * (usedWidth / naturalSize.widthPt); + const pageTextHeight = 650; // NeurIPS ≈ 650 pt + if (scaledHeight > 0.60 * pageTextHeight) { + const safeRatio = Math.round((0.55 * pageTextHeight / naturalSize.heightPt) * (naturalSize.widthPt / tgtLinewidth) * 100) / 100; + const capped = Math.min(safeRatio, 1.0); + recommendedSpec = `${capped}\\linewidth`; + heightWarning = `at full width figure would be ${Math.round(scaledHeight)}pt tall (${Math.round(scaledHeight / pageTextHeight * 100)}% of page) → reduced to ${capped}\\linewidth`; + reason = heightWarning; + } + } + + measurements.push({ + file: fig.file, + isStar: fig.isStar, + currentOpts: fig.opts, + naturalSizePt: naturalSize ? `${naturalSize.widthPt} × ${naturalSize.heightPt}` : 'unknown', + recommendedWidth: recommendedSpec, + reason, + }); + } + + return { + sourceLayout: srcLayout + ? `${sourceProfile.documentclass}, ${srcLayout.columns}-col, colwidth=${srcLayout.columnwidthPt}pt, textwidth=${srcLayout.textwidthPt}pt` + : `${sourceProfile?.documentclass || 'unknown'} (layout not in DB)`, + targetLayout: `neurips, 1-col, linewidth=${tgtLayout.columnwidthPt}pt`, + figures: measurements, + }; +} + +/* ------------------------------------------------------------------ */ +/* Node entry point */ +/* ------------------------------------------------------------------ */ + +export async function normalizeFigures(state) { + const root = state.workspaceRoot || state.targetProjectRoot; + const rel = state.targetMainFile; + const abs = safeJoin(root, rel); + const currentTex = await fs.readFile(abs, 'utf8'); + const virtualPath = rel.replace(/\\/g, '/'); + + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + + const handbook = formatNeuripsHandbookBlock(await loadNeuripsRulesFull()); + const diffInstr = mainTexDiffInstructions(virtualPath); + const lineCount = currentTex.split(/\r\n|\r|\n/).length; + const hasFigure = /\\begin\s*\{\s*figure\*?\s*\}/.test(currentTex); + const hasTable = /\\begin\s*\{\s*table\*?\s*\}/.test(currentTex); + + // ---- Measure figures and compute scaling recommendations ---- + const venue = state.transferIntake?.venue || 'neurips'; + const measurement = hasFigure + ? await measureAllFigures(currentTex, root, state.sourceProfile, venue) + : null; + + let figureScalingBlock = ''; + if (measurement && measurement.figures.length > 0) { + figureScalingBlock = ` +FIGURE SCALING REPORT (computed from source and target layouts — follow these): +Source layout: ${measurement.sourceLayout} +Target layout: ${measurement.targetLayout} + +${measurement.figures.map((f, i) => + ` Figure ${i + 1}: ${f.file} + Natural size: ${f.naturalSizePt} + Current opts: ${f.currentOpts || '(none)'} + → Recommended width: ${f.recommendedWidth} + Reason: ${f.reason}` +).join('\n')} + +IMPORTANT: Apply the recommended widths above to each \\includegraphics. +`; + } + + const basePrompt = `Adjust figures/tables/paths in this NeurIPS-bound LaTeX file. + +FILE_FACTS (read before writing any @@ hunk): +- CURRENT_FILE has exactly ${lineCount} lines (including blanks). @@ line numbers must stay within this range. +- Contains \\begin{figure} or \\begin{figure*}: ${hasFigure ? 'yes' : 'NO — do not invent figure environments or PDF names that are not in FILE'}. +- Contains \\begin{table}: ${hasTable ? 'yes' : 'NO — do not invent table environments'}. +${figureScalingBlock} +FLOAT PLACEMENT RULES: +- Use \\begin{figure}[htbp] (NOT just [t]) so LaTeX can place figures near their first reference. +- NEVER use \\begin{figure}[H] (requires extra package and forces bad page breaks). +- Convert figure* to figure (NeurIPS is single-column; figure* is unnecessary). + +USER_CONFIRMATIONS_JSON: +${JSON.stringify(state.userConfirmations || {})} + +FILE: +${currentTex} +${handbook} + +Rules: prefer single-column figure/table; fix \\includegraphics widths per FIGURE SCALING REPORT above; add \\graphicspath if needed; respect float policy in handbook. +Do NOT add substantive caption prose, "explain the figure", or editorial instructions inside \\caption{...} — only layout/path/float-type fixes per handbook. +If there are no figure/table environments in FILE, only change preamble (e.g. \\graphicspath, packages) or make no structural edits; never hallucinate missing floats. + +Figure/table patches: each float environment you change should usually be its own @@ hunk (or a short group of adjacent lines). Two figures are often separated by paragraphs, \\beq/\\eeq, or \\subsection — those lines stay in the file; do not skip them in the diff. Open CURRENT_FILE, locate each \\begin{figure}…\\end{figure} (or figure*) block you touch, and emit a hunk whose context includes only lines that really appear consecutively there.${diffInstr}`; + + const merged = await runLlmUnifiedDiffWithRetries({ + llm, + baseTex: currentTex, + buildPrompt: (failureNote) => basePrompt + (failureNote || ''), + nodeName: 'normalizeFigures', + phase: NeuripsPhase.figures, + maxAttempts: 3, + debug: { projectRoot: root, jobId: state.jobId }, + }); + + // ---- Deterministic post-processing (do NOT rely on LLM for these) ---- + let postProcessed = merged; + let postFixLog = []; + + // 1. Fix float placement: [t], [b], [!t], [!b] → [htbp] + // This is critical: [t]-only often pushes figures to the end of the document. + postProcessed = postProcessed.replace( + /\\begin\s*\{(figure|table)\*?\}\s*\[([^\]]*)\]/g, + (match, env, opts) => { + // Already has h or htbp — leave alone + if (/h/.test(opts) && /[tbp]/.test(opts)) return match; + const star = match.includes('*') ? '*' : ''; + postFixLog.push(`\\begin{${env}${star}}[${opts}] → [htbp]`); + return `\\begin{${env}${star}}[htbp]`; + }, + ); + + // 2. Convert figure* → figure ONLY for single-column venues (e.g. NeurIPS) + // Two-column venues (ICML, CVPR, ACL) NEED figure* for full-width figures. + const tgtLayout = LAYOUT_DB[(state.transferIntake?.venue || 'neurips').toLowerCase()] || LAYOUT_DB.neurips; + if (tgtLayout.columns === 1) { + postProcessed = postProcessed.replace(/\\begin\s*\{\s*figure\*\s*\}/g, (m) => { + postFixLog.push('figure* → figure (single-column venue)'); + return '\\begin{figure}'; + }); + postProcessed = postProcessed.replace(/\\end\s*\{\s*figure\*\s*\}/g, () => '\\end{figure}'); + } + + // 3. Apply recommended widths from measurement (deterministic, not LLM) + if (measurement && measurement.figures.length > 0) { + for (const fig of measurement.figures) { + if (fig.recommendedWidth && fig.recommendedWidth !== '\\linewidth') { + // Replace width=\linewidth or width=\columnwidth for this specific file + const fileEscaped = fig.file.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const widthRe = new RegExp( + `(\\\\includegraphics\\s*\\[(?:[^\\]]*?)width\\s*=\\s*)(?:\\\\linewidth|\\\\columnwidth|1(?:\\.0)?\\\\(?:linewidth|columnwidth))(([^\\]]*?)\\]\\s*\\{${fileEscaped}\\})`, + 'g', + ); + const before = postProcessed; + postProcessed = postProcessed.replace(widthRe, `$1${fig.recommendedWidth}$2`); + if (postProcessed !== before) { + postFixLog.push(`${fig.file}: width → ${fig.recommendedWidth}`); + } + } + } + } + + if (postFixLog.length > 0) { + // Log what deterministic fixes were applied + const logMsg = `[normalizeFigures] Deterministic post-fixes: ${postFixLog.join('; ')}`; + // We don't have pushLog here, but the progressUpdate message will carry the info + } + + await writeFileWithSnapshot(root, rel, postProcessed, state.jobId); + + return { + figureMeasurement: measurement, + ...progressUpdate( + 'normalizeFigures', + NeuripsPhase.figures, + `Normalized floats and graphics (${measurement?.figures.length || 0} figures measured, ${postFixLog.length} deterministic fixes: ${postFixLog.join('; ') || 'none'}).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/policyCheck.js b/apps/backend/src/services/transferAgent/nodes/neurips/policyCheck.js new file mode 100644 index 0000000..96092a3 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/policyCheck.js @@ -0,0 +1,111 @@ +import { promises as fs } from 'fs'; +import { safeJoin } from '../../../../utils/pathUtils.js'; +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +/** + * Lightweight policy check + deterministic structure fixes (no LLM). + * + * Ensures NeurIPS structural rules that the LLM agent often gets wrong: + * 1. \input{checklist.tex} must be the LAST thing before \end{document} + * 2. \appendix + appendix content must come BEFORE checklist, not after + */ +export async function policyCheck(state) { + const root = state.workspaceRoot || state.targetProjectRoot; + const rel = state.targetMainFile; + const abs = safeJoin(root, rel); + let tex = ''; + try { + tex = await fs.readFile(abs, 'utf8'); + } catch { + return { + ...progressUpdate( + 'policyCheck', + NeuripsPhase.policy, + 'Could not read main.tex for policy check.', + 'error', + ), + }; + } + + const issues = []; + const fixes = []; + const venue = (state.transferIntake?.venue || 'neurips').toLowerCase(); + const isNeurips = venue === 'neurips'; + + // NeurIPS-specific: checklist checks + if (isNeurips) { + if (/\\answerTODO/.test(tex)) { + issues.push('found \\\\answerTODO (fill checklist)'); + } + } + + const hasChecklist = /\\input\s*\{\s*checklist(?:\.tex)?\s*\}/.test(tex) + || /\\include\s*\{\s*checklist(?:\.tex)?\s*\}/.test(tex); + if (isNeurips && !hasChecklist) { + issues.push('checklist.tex not \\input/include'); + } + + // ---- Deterministic fix: ensure checklist is LAST before \end{document} ---- + // This is a NeurIPS-specific requirement; other venues don't have mandatory checklist. + if (isNeurips && hasChecklist) { + // Find the checklist \input line and \end{document} + const checklistRe = /^[ \t]*\\(?:input|include)\s*\{\s*checklist(?:\.tex)?\s*\}[ \t]*$/m; + const endDocRe = /^[ \t]*\\end\s*\{\s*document\s*\}[ \t]*$/m; + const checklistMatch = checklistRe.exec(tex); + const endDocMatch = endDocRe.exec(tex); + + if (checklistMatch && endDocMatch) { + const checklistPos = checklistMatch.index; + const endDocPos = endDocMatch.index; + + // Get everything between checklist and \end{document} + const afterChecklist = tex.slice( + checklistPos + checklistMatch[0].length, + endDocPos, + ).trim(); + + // If there's substantive content after checklist (appendix, \input, \section, etc.) + // that is NOT just whitespace/newpage, we need to reorder + const hasContentAfterChecklist = afterChecklist.length > 0 + && !/^[\s]*(?:\\newpage[\s]*)*$/.test(afterChecklist); + + if (hasContentAfterChecklist) { + // Extract the content that's wrongly after checklist + const contentAfterChecklist = afterChecklist; + + // Also grab any \newpage before checklist + const beforeChecklist = tex.slice(0, checklistPos); + const afterEndDoc = tex.slice(endDocPos); + + // Rebuild: beforeChecklist + movedContent + \newpage + checklist + \end{document} + const checklistLine = checklistMatch[0]; + + tex = beforeChecklist.trimEnd() + + '\n\n' + contentAfterChecklist.trim() + + '\n\n\\newpage\n' + checklistLine + '\n\n' + + afterEndDoc; + + fixes.push('moved appendix/content before checklist (checklist must be last before \\end{document})'); + } + } + } + + // Write back if fixes were applied + if (fixes.length > 0) { + try { + await fs.writeFile(abs, tex, 'utf8'); + } catch { + issues.push('failed to write structure fix'); + } + } + + const allNotes = [...issues, ...fixes]; + const level = issues.length ? 'warn' : 'info'; + const msg = allNotes.length + ? `Policy: ${allNotes.join('; ')}` + : 'Policy check: checklist present and correctly positioned, no answerTODO.'; + + return { + ...progressUpdate('policyCheck', NeuripsPhase.policy, msg, level), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmBlind.js b/apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmBlind.js new file mode 100644 index 0000000..464d41e --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmBlind.js @@ -0,0 +1,47 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +export async function prepareConfirmBlind(state) { + const intake = state.transferIntake || {}; + if (intake.doubleBlind === false || intake.preprint) { + return { + pendingQA: null, + status: 'running', + ...progressUpdate( + 'prepareConfirmBlind', + NeuripsPhase.blind, + 'Skipped blind QA (preprint or non-double-blind).', + ), + }; + } + + const pendingQA = [ + { + id: 'anon_citations', + prompt: '参考文献中是否可能存在可识别本人/本组的条目,需要匿名化或改为第三人称引用?', + type: 'single', + options: [ + '需要,请按 neurips.md 双盲条款尽量匿名化文内与文献表', + '不需要,源稿已匿名', + ], + }, + { + id: 'self_referential', + prompt: '正文是否包含 “our previous work” / 项目主页 / GitHub 等可识别链接?', + type: 'single', + options: [ + '有,请改写为匿名表述或删除链接', + '无或已处理', + ], + }, + ]; + + return { + pendingQA, + status: 'waiting_confirm', + ...progressUpdate( + 'prepareConfirmBlind', + NeuripsPhase.blind_qa, + 'Blind compliance questions ready.', + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmPlan.js b/apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmPlan.js new file mode 100644 index 0000000..ebca88c --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/prepareConfirmPlan.js @@ -0,0 +1,58 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +/** + * Sets pending QA before consumeConfirmPlan (graph interrupts before consume). + */ +export async function prepareConfirmPlan(state) { + const plan = state.transferPlan || {}; + const profile = state.sourceProfile || {}; + + const pendingQA = [ + { + id: 'float_strategy', + prompt: '源稿含双栏通栏图 (figure*/table*) 或自定义浮动体策略。迁移时如何处理?', + type: 'single', + options: [ + '改为 NeurIPS 单栏 figure/table(推荐)', + '尽量保留结构,我稍后手动改', + ], + }, + { + id: 'bibliography_strategy', + prompt: `检测到文献机制倾向:${profile.bibMechanism || 'unknown'}。是否按 NeurIPS 模板默认 thebibliography / BibTeX 路径收敛?`, + type: 'single', + options: [ + '是,按模板与 neurips.md 收敛', + '否,保留现有 .bbl / biblatex 结构并仅做最小修补', + ], + }, + { + id: 'content_drop', + prompt: '迁移计划中有 drop/merge 段落时,是否允许删除源稿中无法映射的小节?', + type: 'single', + options: [ + '不允许删除正文;无法映射则合并到最近小节', + '允许按计划在极少数情况 drop(我会在 QA 后检查)', + ], + }, + ]; + + if (plan.notes) { + pendingQA.push({ + id: 'plan_notes_ack', + prompt: `Planner notes(请确认已理解):\n${plan.notes.slice(0, 1200)}`, + type: 'single', + options: ['已理解并继续', '暂停,我先改源项目'], + }); + } + + return { + pendingQA, + status: 'waiting_confirm', + ...progressUpdate( + 'prepareConfirmPlan', + NeuripsPhase.qa_plan, + `Prepared ${pendingQA.length} confirmation question(s).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/sanitizeBlind.js b/apps/backend/src/services/transferAgent/nodes/neurips/sanitizeBlind.js new file mode 100644 index 0000000..400923c --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/sanitizeBlind.js @@ -0,0 +1,71 @@ +import { promises as fs } from 'fs'; +import { ChatOpenAI } from '@langchain/openai'; +import { resolveLLMConfig, normalizeBaseURL } from '../../../llmService.js'; +import { safeJoin } from '../../../../utils/pathUtils.js'; +import { writeFileWithSnapshot } from '../../utils.js'; +import { loadNeuripsRulesFull, formatNeuripsHandbookBlock } from '../../neuripsRules.js'; +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; +import { + mainTexDiffInstructions, + runLlmUnifiedDiffWithRetries, +} from '../../llmUnifiedDiff.js'; + +export async function sanitizeBlind(state) { + const root = state.workspaceRoot || state.targetProjectRoot; + const rel = state.targetMainFile; + const abs = safeJoin(root, rel); + const currentTex = await fs.readFile(abs, 'utf8'); + const virtualPath = rel.replace(/\\/g, '/'); + + const intake = state.transferIntake || {}; + if (intake.doubleBlind === false || intake.preprint) { + return { + ...progressUpdate( + 'sanitizeBlind', + NeuripsPhase.blind, + 'Skipped anonymization (preprint or non-double-blind).', + ), + }; + } + + const { endpoint, apiKey, model } = resolveLLMConfig(state.llmConfig); + const llm = new ChatOpenAI({ + modelName: model, + openAIApiKey: apiKey, + configuration: { baseURL: normalizeBaseURL(endpoint) }, + temperature: 0.2, + }); + + const handbook = formatNeuripsHandbookBlock(await loadNeuripsRulesFull()); + const diffInstr = mainTexDiffInstructions(virtualPath); + const basePrompt = `Apply double-blind / PDF metadata sanitization for NeurIPS anonymous submission. + +BLIND_QA_ANSWERS_JSON: +${JSON.stringify(state.userConfirmations || {})} + +FILE: +${currentTex} +${handbook} + +Ensure \\\\hypersetup{pdfauthor={}} (or equivalent), remove identifying URLs in text if required by answers, anonymize self-citations per handbook.${diffInstr}`; + + const merged = await runLlmUnifiedDiffWithRetries({ + llm, + baseTex: currentTex, + buildPrompt: (failureNote) => basePrompt + (failureNote || ''), + nodeName: 'sanitizeBlind', + phase: NeuripsPhase.blind, + maxAttempts: 3, + debug: { projectRoot: root, jobId: state.jobId }, + }); + + await writeFileWithSnapshot(root, rel, merged, state.jobId); + + return { + ...progressUpdate( + 'sanitizeBlind', + NeuripsPhase.blind, + `Blind sanitization pass (unified diff applied, ${merged.length} chars).`, + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/nodes/neurips/verifyBuild.js b/apps/backend/src/services/transferAgent/nodes/neurips/verifyBuild.js new file mode 100644 index 0000000..49d7651 --- /dev/null +++ b/apps/backend/src/services/transferAgent/nodes/neurips/verifyBuild.js @@ -0,0 +1,35 @@ +import { NeuripsPhase, progressUpdate } from '../../progressMeta.js'; + +const BAD_PATTERNS = [ + /undefined references/i, + /Citation.*undefined/i, + /There were undefined citations/i, + /^! LaTeX Error/m, + /Fatal error/i, +]; + +/** + * Post-compile log gate (compile may exit 0 with residual issues). + */ +export async function verifyBuild(state) { + const log = state.compileResult?.log || ''; + let hit = ''; + for (const re of BAD_PATTERNS) { + if (re.test(log)) { + hit = re.source; + break; + } + } + + const ok = !hit; + return { + verifyBuildResult: { ok, pattern: hit || null }, + buildFailureReason: ok ? '' : `verifyBuild: log matched ${hit}`, + ...progressUpdate( + 'verifyBuild', + NeuripsPhase.verify, + ok ? 'Log check passed (no fatal/undefined patterns).' : `Log check FAILED (${hit}).`, + ok ? 'info' : 'warn', + ), + }; +} diff --git a/apps/backend/src/services/transferAgent/progressMeta.js b/apps/backend/src/services/transferAgent/progressMeta.js new file mode 100644 index 0000000..e188d63 --- /dev/null +++ b/apps/backend/src/services/transferAgent/progressMeta.js @@ -0,0 +1,36 @@ +/** + * Stable phase ids for NeurIPS UI timeline (map nodes → phase). + */ +export const NeuripsPhase = { + intake: 'intake', + source_analysis: 'source_analysis', + migration_plan: 'migration_plan', + qa_plan: 'qa_plan', + preamble: 'preamble', + body: 'body', + figures: 'figures', + assets: 'assets', + bibliography: 'bibliography', + blind_qa: 'blind_qa', + blind: 'blind', + compile: 'compile', + verify: 'verify', + policy: 'policy', + finalize: 'finalize', + layout: 'layout', + // --- Agent loop phases --- + agent_planning: 'agent_planning', + agent_generating: 'agent_generating', + agent_reviewing: 'agent_reviewing', +}; + +export function progressUpdate(node, phase, message, level = 'info') { + return { + lastCompletedNode: node, + currentPhase: phase, + interruptedBeforeNode: '', + completedNodes: [node], + progressLog: `[${node}] ${message}`, + progressLogEntries: [{ node, level, message, ts: Date.now() }], + }; +} diff --git a/apps/backend/src/services/transferAgent/skills/icml.js b/apps/backend/src/services/transferAgent/skills/icml.js new file mode 100644 index 0000000..ce56593 --- /dev/null +++ b/apps/backend/src/services/transferAgent/skills/icml.js @@ -0,0 +1,121 @@ +/** + * ICML Skill — system prompt builder for ICML 2026. + */ + +import { loadVenueRules } from '../neuripsRules.js'; + +/** + * Build the ICML skill system prompt for the agentic transfer. + */ +export function buildIcmlSkill({ + icmlHandbook, + sourceProfile, + transferIntake, + sourceOutline, + targetOutline, + sourceAssets, +}) { + const intake = transferIntake || {}; + const profile = sourceProfile || {}; + + return `You are an expert LaTeX paper template migration agent specializing in ICML 2026. + +Your mission: migrate a user's source paper into the ICML 2026 template, producing a submission-ready .tex file that compiles cleanly and passes all ICML formatting requirements. + +═══════════════════════════════════════════════════ +AVAILABLE TOOLS +═══════════════════════════════════════════════════ + +You have the following tools at your disposal. Call them as needed: + +• readFile(project, path) — Read a file from source or target project +• writeFile(path, content) — Write/overwrite a file in the target project (auto-snapshots) +• applyDiff(path, diff) — Apply a unified diff to a target file (surgical edits) +• grepFile(project, pattern, glob) — Regex search across project files +• listProjectTree(project) — List all files in a project +• copyAsset(srcPath, destPath?) — Copy a resource file from source to target +• raiseQuestion(questions) — Ask the user a question (ONLY when truly needed) + +═══════════════════════════════════════════════════ +ICML 2026 COMPLETE HANDBOOK +═══════════════════════════════════════════════════ + +${icmlHandbook || '[ICML handbook not available — use template comments and standard ICML conventions.]'} + +═══════════════════════════════════════════════════ +SOURCE PAPER PROFILE +═══════════════════════════════════════════════════ + +documentclass: ${profile.documentclass || 'unknown'} +packages: ${(profile.packages || []).join(', ') || 'unknown'} +bibMechanism: ${profile.bibMechanism || 'unknown'} +twocolumn: ${profile.twocolumn ?? 'unknown'} +figureStar: ${profile.figureStar ?? false} +tableStar: ${profile.tableStar ?? false} +revtex: ${profile.revtex ?? false} +natbib: ${profile.hasNatbib ?? false} +biblatex: ${profile.hasBiblatex ?? false} + +${sourceOutline ? `SOURCE OUTLINE:\n${JSON.stringify(sourceOutline, null, 2)}` : ''} +${targetOutline ? `TARGET TEMPLATE OUTLINE:\n${JSON.stringify(targetOutline, null, 2)}` : ''} +${sourceAssets ? `SOURCE ASSETS:\n${JSON.stringify(sourceAssets, null, 2)}` : ''} + +═══════════════════════════════════════════════════ +MIGRATION PARAMETERS +═══════════════════════════════════════════════════ + +venue: ${intake.venue || 'icml'} +doubleBlind: ${intake.doubleBlind !== false} +preprint: ${!!intake.preprint} +${intake.outputNotes ? `notes: ${intake.outputNotes}` : ''} + +═══════════════════════════════════════════════════ +CRITICAL CONSTRAINTS (MUST FOLLOW) +═══════════════════════════════════════════════════ + +1. \\documentclass MUST be {article} — never revtex, amsart, llncs, etc. +2. ICML package option depends on submission mode: + - doubleBlind=true → \\usepackage{icml2026} (anonymous, with line numbers) + - camera-ready → \\usepackage[accepted]{icml2026} (non-anonymous) + Check doubleBlind flag above and pick the correct option. +3. NEVER modify icml2026.sty — any geometry/font changes inside .sty → desk rejection +4. Paper size: US Letter. Do NOT load geometry. +5. Preserve ALL \\cite{}, \\ref{}, \\label{}, mathematical content, figures, tables +6. ICML is TWO-COLUMN: keep figure* for full-width figures, figure for single-column. Do NOT convert figure* to figure. +7. Use \\icmlauthor{Name}{affiliation} and \\icmlaffiliation{label}{...} for authors (NOT \\author{}) +8. Double-blind: NO author info, NO identifying URLs, self-cite in third person +9. No $$ ... $$ for display math — use equation/align environments (lineno compat) +10. Use \\bibliographystyle{icml2026} and \\bibliography{references} — APA author-year citations (NOT numeric) +11. If source uses biblatex: switch to natbib (loaded by icml2026.sty) +12. Acknowledgements: hidden in anonymous mode — keep section but content won't show +13. Impact Statement: required unnumbered section before References +14. Appendix goes AFTER references, submitted in same PDF (NOT separate file) +15. Main body max 8 pages (excluding references and appendices) + +═══════════════════════════════════════════════════ +BEST PRACTICES +═══════════════════════════════════════════════════ + +• Use applyDiff for surgical edits (small targeted changes) — safer than full rewrites +• Use writeFile for initial full-file generation or when the diff would be larger than the file +• Always readFile the current state of a file before modifying it +• Copy ALL referenced assets (images, .bib, .bbl, .sty, .cls, .bst) from source +• When uncertain about user intent, prefer conservative choices over raising questions +• ONLY call raiseQuestion for genuinely ambiguous decisions that affect the final output +`; +} + +/** + * Convenience: load handbook and build the skill. + */ +export async function buildIcmlSkillFromState(state) { + const handbook = await loadVenueRules('icml'); + return buildIcmlSkill({ + icmlHandbook: handbook, + sourceProfile: state.sourceProfile, + transferIntake: state.transferIntake, + sourceOutline: state.sourceOutline, + targetOutline: state.targetOutline, + sourceAssets: state.sourceAssets, + }); +} diff --git a/apps/backend/src/services/transferAgent/skills/index.js b/apps/backend/src/services/transferAgent/skills/index.js new file mode 100644 index 0000000..b7adc7c --- /dev/null +++ b/apps/backend/src/services/transferAgent/skills/index.js @@ -0,0 +1,34 @@ +/** + * Venue skill dispatcher — selects the correct skill builder based on venue. + * + * This is the single entry point all agent nodes should use instead of + * importing venue-specific skill builders directly. + */ + +import { buildNeuripsSkillFromState } from './neurips.js'; +import { buildIcmlSkillFromState } from './icml.js'; + +/** + * Resolve the venue from state (checks transferIntake.venue and transferGraphKind). + */ +function resolveVenue(state) { + const intake = state.transferIntake || {}; + return intake.venue || state.transferGraphKind || ''; +} + +/** + * Build the venue-specific skill system prompt from state. + * + * @param {object} state — LangGraph TransferState + * @returns {Promise} — The system prompt string + */ +export async function buildVenueSkillFromState(state) { + const venue = resolveVenue(state); + switch (venue) { + case 'icml': + return buildIcmlSkillFromState(state); + case 'neurips': + default: + return buildNeuripsSkillFromState(state); + } +} diff --git a/apps/backend/src/services/transferAgent/skills/neurips.js b/apps/backend/src/services/transferAgent/skills/neurips.js new file mode 100644 index 0000000..361dafe --- /dev/null +++ b/apps/backend/src/services/transferAgent/skills/neurips.js @@ -0,0 +1,134 @@ +/** + * NeurIPS Skill — system prompt builder. + * + * Encapsulates the entire NeurIPS 2026 specification as an agent "skill". + * Instead of hardcoding rules in each pipeline node, the agent receives + * the full handbook + migration context as its system prompt and makes + * autonomous decisions through tool calls. + */ + +import { loadNeuripsRulesFull } from '../neuripsRules.js'; + +/** + * Build the NeurIPS skill system prompt for the agentic transfer. + * + * @param {object} opts + * @param {string} opts.neuripsHandbook — full neurips.md content + * @param {object} opts.sourceProfile — heuristic source analysis (documentclass, packages, bibMechanism, …) + * @param {object} opts.transferIntake — { venue, doubleBlind, preprint, outputNotes } + * @param {object} [opts.sourceOutline] — parsed section outline of source + * @param {object} [opts.targetOutline] — parsed section outline of target template + * @param {object} [opts.sourceAssets] — { bib, images, styles } from source analysis + * @returns {string} + */ +export function buildNeuripsSkill({ + neuripsHandbook, + sourceProfile, + transferIntake, + sourceOutline, + targetOutline, + sourceAssets, +}) { + const intake = transferIntake || {}; + const profile = sourceProfile || {}; + + return `You are an expert LaTeX paper template migration agent specializing in NeurIPS 2026. + +Your mission: migrate a user's source paper into the NeurIPS 2026 template, producing a submission-ready .tex file that compiles cleanly and passes all NeurIPS formatting requirements. + +═══════════════════════════════════════════════════ +AVAILABLE TOOLS +═══════════════════════════════════════════════════ + +You have the following tools at your disposal. Call them as needed: + +• readFile(project, path) — Read a file from source or target project +• writeFile(path, content) — Write/overwrite a file in the target project (auto-snapshots) +• applyDiff(path, diff) — Apply a unified diff to a target file (surgical edits) +• grepFile(project, pattern, glob) — Regex search across project files +• listProjectTree(project) — List all files in a project +• copyAsset(srcPath, destPath?) — Copy a resource file from source to target +• raiseQuestion(questions) — Ask the user a question (ONLY when truly needed) + +═══════════════════════════════════════════════════ +NEURIPS 2026 COMPLETE HANDBOOK +═══════════════════════════════════════════════════ + +${neuripsHandbook || '[NeurIPS handbook not available — use template comments and standard NeurIPS conventions.]'} + +═══════════════════════════════════════════════════ +SOURCE PAPER PROFILE +═══════════════════════════════════════════════════ + +documentclass: ${profile.documentclass || 'unknown'} +packages: ${(profile.packages || []).join(', ') || 'unknown'} +bibMechanism: ${profile.bibMechanism || 'unknown'} +twocolumn: ${profile.twocolumn ?? 'unknown'} +figureStar: ${profile.figureStar ?? false} +tableStar: ${profile.tableStar ?? false} +revtex: ${profile.revtex ?? false} +natbib: ${profile.hasNatbib ?? false} +biblatex: ${profile.hasBiblatex ?? false} + +${sourceOutline ? `SOURCE OUTLINE:\n${JSON.stringify(sourceOutline, null, 2)}` : ''} +${targetOutline ? `TARGET TEMPLATE OUTLINE:\n${JSON.stringify(targetOutline, null, 2)}` : ''} +${sourceAssets ? `SOURCE ASSETS:\n${JSON.stringify(sourceAssets, null, 2)}` : ''} + +═══════════════════════════════════════════════════ +MIGRATION PARAMETERS +═══════════════════════════════════════════════════ + +venue: ${intake.venue || 'neurips'} +doubleBlind: ${intake.doubleBlind !== false} +preprint: ${!!intake.preprint} +${intake.outputNotes ? `notes: ${intake.outputNotes}` : ''} + +═══════════════════════════════════════════════════ +CRITICAL CONSTRAINTS (MUST FOLLOW) +═══════════════════════════════════════════════════ + +1. \\documentclass MUST be {article} — never revtex, amsart, llncs, etc. +2. neurips_2026 package option MUST match the submission mode: + - doubleBlind=true → \\usepackage[main]{neurips_2026} (anonymous + line numbers) + - preprint=true → \\usepackage[preprint]{neurips_2026} (non-anonymous, no line numbers) + - camera-ready → \\usepackage[main,final]{neurips_2026} + Check doubleBlind/preprint flags above and pick the correct option. +3. NEVER modify neurips_2026.sty — any geometry/font changes inside .sty → desk rejection +4. Paper size: US Letter. Do NOT load geometry with A4. +5. Preserve ALL \\cite{}, \\ref{}, \\label{}, mathematical content, figures, tables +6. figure* → figure, table* → table (NeurIPS is single-column); use \\begin{figure}[htbp] for flexible float placement +7. MUST \\input{checklist.tex} — missing checklist → desk rejection +8. Double-blind: \\hypersetup{pdfauthor={}} and author block shows "Anonymous Author(s)" +9. No $$ ... $$ for display math — use equation/align environments (lineno compat) +10. \\bibliography{} or \\input{*.bbl} for references; natbib loaded by default with numeric citations +11. If source uses biblatex: switch to natbib or use [nonatbib]{neurips_2026} +12. ack environment is hidden in anonymous mode — keep it but content won't show +13. MUST add \\PassOptionsToPackage{numbers,compress,sort}{natbib} BEFORE \\documentclass for numeric [1,2,3] citations + +═══════════════════════════════════════════════════ +BEST PRACTICES +═══════════════════════════════════════════════════ + +• Use applyDiff for surgical edits (small targeted changes) — safer than full rewrites +• Use writeFile for initial full-file generation or when the diff would be larger than the file +• Always readFile the current state of a file before modifying it +• Copy ALL referenced assets (images, .bib, .bbl, .sty, .cls, .bst) from source +• When uncertain about user intent, prefer conservative choices over raising questions +• ONLY call raiseQuestion for genuinely ambiguous decisions that affect the final output +`; +} + +/** + * Convenience: load handbook and build the skill. + */ +export async function buildNeuripsSkillFromState(state) { + const handbook = await loadNeuripsRulesFull(); + return buildNeuripsSkill({ + neuripsHandbook: handbook, + sourceProfile: state.sourceProfile, + transferIntake: state.transferIntake, + sourceOutline: state.sourceOutline, + targetOutline: state.targetOutline, + sourceAssets: state.sourceAssets, + }); +} diff --git a/apps/backend/src/services/transferAgent/skills/reviewerChecklist.js b/apps/backend/src/services/transferAgent/skills/reviewerChecklist.js new file mode 100644 index 0000000..dbd620b --- /dev/null +++ b/apps/backend/src/services/transferAgent/skills/reviewerChecklist.js @@ -0,0 +1,130 @@ +/** + * Reviewer skills — venue-specific review checklists. + * + * Each venue exports a function that returns the review checklist string + * to be injected into the reviewer's user message. + * + * The reviewer prompt skeleton is venue-agnostic; all venue-specific + * constraints live here for progressive disclosure and extensibility. + */ + +// ───────────────────────────────────────────── +// NeurIPS +// ───────────────────────────────────────────── + +function neuripsReviewChecklist({ intake }) { + const isBlind = intake.doubleBlind !== false && !intake.preprint; + return { + structure: `- \\usepackage[main]{neurips_2026} (anonymous + line numbers) or \\usepackage[preprint]{neurips_2026} + - \\PassOptionsToPackage{numbers,compress,sort}{natbib} BEFORE \\documentclass + - No \\usepackage{geometry} (neurips_2026 handles layout)`, + + figures: `- No figure* or table* environments (NeurIPS is single-column) + - \\includegraphics paths point to files that exist + - Reasonable \\includegraphics widths (\\linewidth or fraction)`, + + bibliography: `- \\bibliographystyle is NeurIPS-compatible (unsrtnat, plainnat, abbrvnat) + - Numeric citations [1,2,3] (NOT author-year) + - .bib or .bbl files present in target project`, + + policy: `6. NEURIPS POLICY: + - \\input{checklist.tex} or \\include{checklist.tex} present + - No \\answerTODO remaining (checklist should be filled or template default) + - $$ ... $$ display math → equation/align (lineno compatibility)`, + + blind: isBlind + ? `BLIND COMPLIANCE: + - \\hypersetup{pdfauthor={}} or equivalent + - No identifying URLs (GitHub repos, project pages) unless user confirmed + - Self-citations in third-person form + - Author block shows "Anonymous Author(s)"` + : '(Single-blind or preprint — no anonymization needed)', + }; +} + +// ───────────────────────────────────────────── +// ICML +// ───────────────────────────────────────────── + +function icmlReviewChecklist({ intake }) { + const isBlind = intake.doubleBlind !== false; + return { + structure: `- \\usepackage{icml2026} (anonymous) or \\usepackage[accepted]{icml2026} (camera-ready) + - Do NOT add \\PassOptionsToPackage{numbers}{natbib} (ICML uses author-year) + - No \\usepackage{geometry} (icml2026.sty handles layout) + - Use \\icmlauthor / \\icmlaffiliation for author info (NOT \\author{})`, + + figures: `- figure* for full-width figures, figure for single-column (ICML is two-column) + - Do NOT convert figure* to figure + - \\includegraphics paths point to files that exist + - Reasonable \\includegraphics widths`, + + bibliography: `- \\bibliographystyle{icml2026} (APA author-year, NOT numeric) + - natbib loaded by icml2026.sty automatically + - .bib or .bbl files present in target project`, + + policy: `6. ICML POLICY: + - Impact Statement section present (unnumbered, before References) + - No checklist required (this is NOT NeurIPS — do NOT create checklist.tex) + - Do NOT create or reference neurips_2026.sty + - $$ ... $$ display math → equation/align (lineno compatibility) + - Appendix (if any) goes AFTER references in same PDF + - Main body max 8 pages (excluding references and appendices)`, + + blind: isBlind + ? `BLIND COMPLIANCE: + - \\hypersetup{pdfauthor={}} or equivalent + - No identifying URLs unless user confirmed + - Self-citations in third-person form + - Author info hidden (only visible with [accepted] option)` + : '(Camera-ready — author info should be visible)', + }; +} + +// ───────────────────────────────────────────── +// Fallback (generic) +// ───────────────────────────────────────────── + +function genericReviewChecklist({ intake }) { + return { + structure: `- Correct \\documentclass and style package for the target venue + - No conflicting geometry/font packages`, + + figures: `- Figure environments appropriate for the venue's column layout + - \\includegraphics paths point to files that exist + - Reasonable \\includegraphics widths`, + + bibliography: `- Bibliography mechanism consistent with venue requirements + - .bib or .bbl files present in target project`, + + policy: `6. VENUE POLICY: + - Follow venue-specific rules from your system prompt`, + + blind: intake.doubleBlind + ? `BLIND COMPLIANCE: + - No author-identifying information visible + - Self-citations in third-person form` + : '(No anonymization needed)', + }; +} + +// ───────────────────────────────────────────── +// Dispatcher +// ───────────────────────────────────────────── + +const VENUE_CHECKLIST_BUILDERS = { + neurips: neuripsReviewChecklist, + icml: icmlReviewChecklist, +}; + +/** + * Build the venue-specific review checklist sections. + * + * @param {string} venueId — e.g. 'neurips', 'icml' + * @param {{ intake: object }} ctx — context with transferIntake + * @returns {{ structure, figures, bibliography, policy, blind }} + */ +export function buildReviewChecklist(venueId, ctx) { + const builder = VENUE_CHECKLIST_BUILDERS[venueId] || genericReviewChecklist; + return builder(ctx); +} diff --git a/apps/backend/src/services/transferAgent/state.js b/apps/backend/src/services/transferAgent/state.js index 367e15f..a063af5 100644 --- a/apps/backend/src/services/transferAgent/state.js +++ b/apps/backend/src/services/transferAgent/state.js @@ -9,6 +9,7 @@ export const TransferState = Annotation.Root({ sourceMainFile: Annotation({ reducer: replace }), targetProjectId: Annotation({ reducer: replace }), targetMainFile: Annotation({ reducer: replace }), + targetTemplateId: Annotation({ reducer: replace }), engine: Annotation({ reducer: replace, default: () => 'pdflatex' }), maxCompileLoops: Annotation({ reducer: replace, default: () => 5 }), maxLayoutLoops: Annotation({ reducer: replace, default: () => 3 }), @@ -16,11 +17,30 @@ export const TransferState = Annotation.Root({ llmConfig: Annotation({ reducer: replace }), jobId: Annotation({ reducer: replace }), + /** 'legacy' | 'neurips' — selects LangGraph topology */ + transferGraphKind: Annotation({ reducer: replace, default: () => 'legacy' }), + + // --- Workspace roots (explicit tool boundary) --- + workspaceRoot: Annotation({ reducer: replace }), + sourceReadRoot: Annotation({ reducer: replace }), + + // --- Intake (POST /transfer/start); no network tools --- + transferIntake: Annotation({ + reducer: replace, + default: () => ({ + venue: '', + doubleBlind: true, + preprint: false, + outputNotes: '', + }), + }), + // --- Source analysis --- sourceProjectRoot: Annotation({ reducer: replace }), sourceOutline: Annotation({ reducer: replace }), sourceFullContent: Annotation({ reducer: replace }), sourceAssets: Annotation({ reducer: replace }), + sourceProfile: Annotation({ reducer: replace }), // --- Target analysis --- targetProjectRoot: Annotation({ reducer: replace }), @@ -31,15 +51,33 @@ export const TransferState = Annotation.Root({ // --- Transfer plan --- transferPlan: Annotation({ reducer: replace }), + // --- Human QA --- + pendingQA: Annotation({ reducer: replace }), + userConfirmations: Annotation({ reducer: replace, default: () => ({}) }), + + // --- UI / progress (API surfaces these) --- + lastCompletedNode: Annotation({ reducer: replace, default: () => '' }), + currentPhase: Annotation({ reducer: replace, default: () => '' }), + /** Next node name when graph paused (interrupt-before); filled by route on GraphInterrupt */ + interruptedBeforeNode: Annotation({ reducer: replace, default: () => '' }), + completedNodes: Annotation({ reducer: appendList, default: () => [] }), + progressLogEntries: Annotation({ reducer: appendList, default: () => [] }), + // --- Compile loop --- compileResult: Annotation({ reducer: replace }), compileAttempt: Annotation({ reducer: replace, default: () => 0 }), + verifyBuildResult: Annotation({ reducer: replace }), + lastGoodPhase: Annotation({ reducer: replace }), + buildFailureReason: Annotation({ reducer: replace }), // --- Layout check --- pageImages: Annotation({ reducer: replace }), layoutCheckResult: Annotation({ reducer: replace }), layoutAttempt: Annotation({ reducer: replace, default: () => 0 }), + // --- Figure measurement (normalizeFigures) --- + figureMeasurement: Annotation({ reducer: replace }), + // --- MinerU pipeline --- transferMode: Annotation({ reducer: replace, default: () => 'legacy' }), mineruConfig: Annotation({ reducer: replace }), @@ -48,9 +86,24 @@ export const TransferState = Annotation.Root({ sourceImages: Annotation({ reducer: replace }), mineruOutputDir: Annotation({ reducer: replace }), + // --- Agentic loop (neurips-agent graph) --- + /** LLM message history for the agentic loop (accumulated across iterations) */ + agentMessages: Annotation({ reducer: appendList, default: () => [] }), + /** Current Planner→Generator→Reviewer iteration (0-based) */ + currentIteration: Annotation({ reducer: replace, default: () => 0 }), + /** Maximum allowed iterations before forced finalize */ + maxIterations: Annotation({ reducer: replace, default: () => 5 }), + /** Structured migration plan produced by Planner */ + migrationPlan: Annotation({ reducer: replace }), + /** Review result from Reviewer: { verdict: 'pass'|'revise', issues: [], suggestions: [] } */ + reviewResult: Annotation({ reducer: replace }), + /** Current agent phase: 'planning' | 'generating' | 'reviewing' | 'finalized' */ + agentPhase: Annotation({ reducer: replace, default: () => 'planning' }), + // --- Final output --- finalPdf: Annotation({ reducer: replace }), status: Annotation({ reducer: replace, default: () => 'pending' }), error: Annotation({ reducer: replace }), progressLog: Annotation({ reducer: appendList, default: () => [] }), + bundleNotes: Annotation({ reducer: replace }), }); diff --git a/apps/backend/src/services/transferAgent/tools/applyDiff.js b/apps/backend/src/services/transferAgent/tools/applyDiff.js new file mode 100644 index 0000000..66732f2 --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/applyDiff.js @@ -0,0 +1,52 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { promises as fs } from 'fs'; +import { safeJoin } from '../../../utils/pathUtils.js'; +import { writeFileWithSnapshot } from '../utils.js'; +import { + extractUnifiedDiff, + applyUnifiedDiffToMainTex, +} from '../llmUnifiedDiff.js'; + +/** + * Creates the applyDiff tool — applies a unified diff patch to a target file. + * Wraps the existing llmUnifiedDiff infrastructure. + * + * @param {{ workspaceRoot: string, jobId: string }} ctx + */ +export function createApplyDiffTool(ctx) { + return new DynamicStructuredTool({ + name: 'applyDiff', + description: + 'Apply a unified diff (git format) to a file in the target project. ' + + 'The diff must include proper --- a/ and +++ b/ headers and @@ hunk headers. ' + + 'Context lines (space prefix) and removed lines (-) must match the file exactly. ' + + 'Returns OK with the new file length, or an error reason if the patch cannot be applied.', + schema: z.object({ + path: z + .string() + .describe('Relative file path in target project, e.g. "main.tex"'), + diff: z + .string() + .describe('Unified diff in git format (--- a/path, +++ b/path, @@ hunks)'), + }), + func: async ({ path, diff }) => { + try { + const abs = safeJoin(ctx.workspaceRoot, path); + const baseTex = await fs.readFile(abs, 'utf8'); + const patchText = extractUnifiedDiff(diff); + if (!patchText) { + return '[ERROR] No valid unified diff found in the provided text. Ensure --- a/ and +++ b/ headers are present.'; + } + const result = applyUnifiedDiffToMainTex(baseTex, patchText); + if (!result.ok) { + return `[ERROR] Patch failed: ${result.reason}. Context/remove lines must match the file exactly.`; + } + await writeFileWithSnapshot(ctx.workspaceRoot, path, result.text, ctx.jobId); + return `[OK] Patch applied successfully. File is now ${result.text.length} chars.`; + } catch (err) { + return `[ERROR] applyDiff failed on target:${path} — ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/copyAsset.js b/apps/backend/src/services/transferAgent/tools/copyAsset.js new file mode 100644 index 0000000..4acc67b --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/copyAsset.js @@ -0,0 +1,55 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { promises as fs } from 'fs'; +import path from 'path'; +import { safeJoin } from '../../../utils/pathUtils.js'; +import { ensureDir } from '../../../utils/fsUtils.js'; + +/** + * Creates the copyAsset tool — copies a file from source project to target project. + * + * @param {{ sourceReadRoot: string, workspaceRoot: string }} ctx + */ +export function createCopyAssetTool(ctx) { + return new DynamicStructuredTool({ + name: 'copyAsset', + description: + 'Copy a file from the source project to the target (workspace) project. ' + + 'Use this for .bib, .bbl, images (.png, .jpg, .pdf, .eps), ' + + 'and style files (.sty, .cls, .bst). ' + + 'If destPath is omitted, the file is placed at the same relative path.', + schema: z.object({ + srcPath: z + .string() + .describe('Relative file path in the source project, e.g. "refs.bib" or "figures/fig1.png"'), + destPath: z + .string() + .optional() + .describe('Destination path in target project. Defaults to same as srcPath.'), + }), + func: async ({ srcPath, destPath }) => { + try { + const dest = destPath || srcPath; + const srcAbs = safeJoin(ctx.sourceReadRoot, srcPath); + const destAbs = safeJoin(ctx.workspaceRoot, dest); + + // Check source exists + try { + await fs.access(srcAbs); + } catch { + return `[ERROR] Source file not found: source:${srcPath}`; + } + + // Ensure destination directory exists + await ensureDir(path.dirname(destAbs)); + + // Copy + await fs.copyFile(srcAbs, destAbs); + const stat = await fs.stat(destAbs); + return `[OK] Copied source:${srcPath} → target:${dest} (${stat.size} bytes)`; + } catch (err) { + return `[ERROR] Copy failed: ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/grepFile.js b/apps/backend/src/services/transferAgent/tools/grepFile.js new file mode 100644 index 0000000..887bdf6 --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/grepFile.js @@ -0,0 +1,99 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { promises as fs } from 'fs'; +import path from 'path'; +import { safeJoin } from '../../../utils/pathUtils.js'; +import { listFilesRecursive } from '../../../utils/fsUtils.js'; + +/** + * Creates the grepFile tool — searches file contents with a regex pattern. + * + * @param {{ sourceReadRoot: string, workspaceRoot: string }} ctx + */ +export function createGrepFileTool(ctx) { + return new DynamicStructuredTool({ + name: 'grepFile', + description: + 'Search file contents in the source or target project using a regular expression. ' + + 'Returns matching lines with line numbers and surrounding context. ' + + 'Use glob to filter by file extension (e.g. "*.tex", "*.bib").', + schema: z.object({ + project: z + .enum(['source', 'target']) + .describe('Which project to search'), + pattern: z + .string() + .describe('Regular expression pattern to search for'), + glob: z + .string() + .optional() + .describe('File glob pattern to filter, e.g. "*.tex" or "*.bib"'), + }), + func: async ({ project, pattern, glob }) => { + try { + const root = + project === 'source' ? ctx.sourceReadRoot : ctx.workspaceRoot; + const allFiles = await listFilesRecursive(root); + const files = allFiles + .filter((f) => f.type === 'file') + .filter((f) => { + if (!glob) return true; + // Simple glob: *.ext matching + if (glob.startsWith('*.')) { + const ext = glob.slice(1); // e.g. ".tex" + return f.path.endsWith(ext); + } + return f.path.includes(glob); + }); + + let re; + try { + re = new RegExp(pattern, 'gim'); + } catch { + return `[ERROR] Invalid regex pattern: ${pattern}`; + } + + const results = []; + let totalMatches = 0; + const MAX_MATCHES = 100; + + for (const file of files) { + if (totalMatches >= MAX_MATCHES) break; + let content; + try { + content = await fs.readFile(safeJoin(root, file.path), 'utf8'); + } catch { + continue; + } + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + if (totalMatches >= MAX_MATCHES) break; + if (re.test(lines[i])) { + re.lastIndex = 0; // reset for global regex + const ctxStart = Math.max(0, i - 1); + const ctxEnd = Math.min(lines.length - 1, i + 1); + const snippet = []; + for (let j = ctxStart; j <= ctxEnd; j++) { + const prefix = j === i ? '>>>' : ' '; + snippet.push(`${prefix} ${j + 1}: ${lines[j]}`); + } + results.push(`--- ${file.path} ---\n${snippet.join('\n')}`); + totalMatches++; + } + } + } + + if (!results.length) { + return `No matches found for /${pattern}/ in ${project} project${glob ? ` (glob: ${glob})` : ''}.`; + } + const truncNote = + totalMatches >= MAX_MATCHES + ? `\n\n[TRUNCATED — showing first ${MAX_MATCHES} matches]` + : ''; + return results.join('\n\n') + truncNote; + } catch (err) { + return `[ERROR] grep failed: ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/index.js b/apps/backend/src/services/transferAgent/tools/index.js new file mode 100644 index 0000000..9370f16 --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/index.js @@ -0,0 +1,77 @@ +/** + * Agent tools registry. + * + * Creates all tools bound to a specific job context (workspace roots, jobId). + * Returns an array of DynamicStructuredTool instances ready for bind_tools(). + */ + +import { createReadFileTool } from './readFile.js'; +import { createWriteFileTool } from './writeFile.js'; +import { createApplyDiffTool } from './applyDiff.js'; +import { createGrepFileTool } from './grepFile.js'; +import { createListProjectTreeTool } from './listProjectTree.js'; +import { createCopyAssetTool } from './copyAsset.js'; +import { createRaiseQuestionTool } from './raiseQuestion.js'; +import { createMeasureFiguresTool } from './measureFigures.js'; + +/** + * @param {object} ctx + * @param {string} ctx.sourceReadRoot — absolute path to source project + * @param {string} ctx.workspaceRoot — absolute path to target workspace + * @param {string} ctx.jobId — transfer job ID (for snapshots) + * @returns {import('@langchain/core/tools').DynamicStructuredTool[]} + */ +export function createAllTools(ctx) { + return [ + createReadFileTool(ctx), + createWriteFileTool(ctx), + createApplyDiffTool(ctx), + createGrepFileTool(ctx), + createListProjectTreeTool(ctx), + createCopyAssetTool(ctx), + createRaiseQuestionTool(ctx), + createMeasureFiguresTool(ctx), + ]; +} + +/** + * Create a subset of tools (read-only) for Planner and Reviewer nodes. + * These nodes should NOT write files or apply diffs. + */ +export function createReadOnlyTools(ctx) { + return [ + createReadFileTool(ctx), + createGrepFileTool(ctx), + createListProjectTreeTool(ctx), + createRaiseQuestionTool(ctx), + ]; +} + +/** + * Create the full tool set for the Generator node. + * Generator can read, write, diff, copy, and grep. + */ +export function createGeneratorTools(ctx) { + return [ + createReadFileTool(ctx), + createWriteFileTool(ctx), + createApplyDiffTool(ctx), + createGrepFileTool(ctx), + createListProjectTreeTool(ctx), + createCopyAssetTool(ctx), + createMeasureFiguresTool(ctx), + ]; +} + +/** + * Create tools for the Reviewer node. + * Reviewer can read, grep, list, but also raiseQuestion for user confirmations. + */ +export function createReviewerTools(ctx) { + return [ + createReadFileTool(ctx), + createGrepFileTool(ctx), + createListProjectTreeTool(ctx), + createRaiseQuestionTool(ctx), + ]; +} diff --git a/apps/backend/src/services/transferAgent/tools/listProjectTree.js b/apps/backend/src/services/transferAgent/tools/listProjectTree.js new file mode 100644 index 0000000..25aafd6 --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/listProjectTree.js @@ -0,0 +1,42 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { listFilesRecursive } from '../../../utils/fsUtils.js'; + +/** + * Creates the listProjectTree tool — lists files in a project directory. + * + * @param {{ sourceReadRoot: string, workspaceRoot: string }} ctx + */ +export function createListProjectTreeTool(ctx) { + return new DynamicStructuredTool({ + name: 'listProjectTree', + description: + 'List all files in the source or target project directory tree. ' + + 'Returns file paths with their types (file/directory). ' + + 'Useful for understanding project structure before reading specific files.', + schema: z.object({ + project: z + .enum(['source', 'target']) + .describe('Which project to list files from'), + }), + func: async ({ project }) => { + try { + const root = + project === 'source' ? ctx.sourceReadRoot : ctx.workspaceRoot; + const entries = await listFilesRecursive(root); + if (!entries.length) { + return `(empty — no files found in ${project} project)`; + } + const tree = entries + .map((e) => { + const icon = e.type === 'file' ? ' ' : ' [dir]'; + return `${icon} ${e.path}`; + }) + .join('\n'); + return `${project} project files:\n${tree}`; + } catch (err) { + return `[ERROR] Failed to list ${project} project tree: ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/measureFigures.js b/apps/backend/src/services/transferAgent/tools/measureFigures.js new file mode 100644 index 0000000..044de22 --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/measureFigures.js @@ -0,0 +1,241 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { promises as fs } from 'fs'; +import path from 'path'; +import { safeJoin } from '../../../utils/pathUtils.js'; + +/** + * Known text-width (in pt) for common document classes / layouts. + * These are the width of the text body (single column) at default settings. + * + * For twocolumn documents the *column* width is roughly half of + * textwidth minus columnsep, which is what \linewidth resolves to + * inside a column. + */ +const LAYOUT_DB = { + // NeurIPS: 5.5 in text width => 396 pt + neurips: { textwidthPt: 396, columnwidthPt: 396, columns: 1 }, + // Standard article 10 pt, letterpaper: ~345 pt + article: { textwidthPt: 345, columnwidthPt: 345, columns: 1 }, + // revtex4-1 / revtex4-2 twocolumn (APS default): textwidth ≈ 510 pt, colwidth ≈ 246 pt + 'revtex4-1': { textwidthPt: 510, columnwidthPt: 246, columns: 2 }, + 'revtex4-2': { textwidthPt: 510, columnwidthPt: 246, columns: 2 }, + revtex: { textwidthPt: 510, columnwidthPt: 246, columns: 2 }, + // IEEEtran twocolumn: textwidth ≈ 516 pt, colwidth ≈ 252 pt + IEEEtran: { textwidthPt: 516, columnwidthPt: 252, columns: 2 }, + // LNCS (Springer): textwidth ≈ 336 pt + llncs: { textwidthPt: 336, columnwidthPt: 336, columns: 1 }, + // ACM acmart sigconf twocolumn: textwidth ≈ 506 pt, colwidth ≈ 241 pt + acmart: { textwidthPt: 506, columnwidthPt: 241, columns: 2 }, + // CVPR / ICCV twocolumn: textwidth ≈ 496 pt, colwidth ≈ 237 pt + cvpr: { textwidthPt: 496, columnwidthPt: 237, columns: 2 }, + // ICML: textwidth ≈ 487 pt, colwidth ≈ 233 pt + icml: { textwidthPt: 487, columnwidthPt: 233, columns: 2 }, +}; + +/** + * Parse the MediaBox / page size from a PDF file header (first 4 KB). + * Returns { widthPt, heightPt } or null. + */ +async function pdfPageSize(filePath) { + let buf; + try { + const fd = await fs.open(filePath, 'r'); + buf = Buffer.alloc(8192); + await fd.read(buf, 0, 8192, 0); + await fd.close(); + } catch { + return null; + } + + const str = buf.toString('latin1'); + + // Try /MediaBox [x0 y0 x1 y1] + const mediaMatch = str.match(/\/MediaBox\s*\[\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\]/); + if (mediaMatch) { + const w = parseFloat(mediaMatch[3]) - parseFloat(mediaMatch[1]); + const h = parseFloat(mediaMatch[4]) - parseFloat(mediaMatch[2]); + if (w > 0 && h > 0) return { widthPt: Math.round(w * 100) / 100, heightPt: Math.round(h * 100) / 100 }; + } + + return null; +} + +/** + * Measure the natural dimensions of a raster image (PNG/JPG) in pt. + * Falls back to pixel dimensions / 72 dpi. + */ +async function rasterSize(filePath) { + try { + const buf = Buffer.alloc(32); + const fd = await fs.open(filePath, 'r'); + await fd.read(buf, 0, 32, 0); + await fd.close(); + + // PNG: width at byte 16-19, height at byte 20-23 (big-endian) + if (buf[0] === 0x89 && buf[1] === 0x50) { + const w = buf.readUInt32BE(16); + const h = buf.readUInt32BE(20); + return { widthPt: w * 72 / 150, heightPt: h * 72 / 150 }; // assume 150 dpi + } + + // JPEG: need to find SOF marker — simpler: just return null and let the tool skip + return null; + } catch { + return null; + } +} + +/** + * Compute the effective \linewidth (in pt) a figure sees. + * For twocolumn documents, \linewidth inside a normal figure = columnwidth. + * For figure*, \linewidth = textwidth. + */ +function effectiveLinewidth(layout, isStar) { + if (!layout) return null; + return isStar ? layout.textwidthPt : layout.columnwidthPt; +} + +/** + * Create the measureFigures tool. + * + * Does NOT call any external binary — reads PDF headers directly and uses + * a built-in layout database for document-class dimensions. + * + * @param {{ sourceReadRoot: string, workspaceRoot: string }} ctx + */ +export function createMeasureFiguresTool(ctx) { + return new DynamicStructuredTool({ + name: 'measureFigures', + description: + 'Measure figure image dimensions and compute recommended \\includegraphics width ' + + 'based on source and target document layouts. ' + + 'Returns a JSON report with per-figure measurements and scaling advice.', + schema: z.object({ + sourceClass: z + .string() + .describe('Source document class, e.g. "revtex4-1", "article", "IEEEtran"'), + sourceTwocolumn: z + .boolean() + .describe('Whether the source document uses twocolumn layout'), + targetClass: z + .string() + .default('neurips') + .describe('Target document class / template, e.g. "neurips"'), + figures: z + .array( + z.object({ + file: z.string().describe('Image file path relative to workspace, e.g. "fig1.pdf"'), + currentWidth: z + .string() + .optional() + .describe('Current \\includegraphics width spec, e.g. "\\linewidth", "0.48\\textwidth", "3in"'), + isStar: z + .boolean() + .default(false) + .describe('Whether this figure is in a figure* (full-width) environment'), + }), + ) + .describe('List of figures to measure'), + }), + func: async ({ sourceClass, sourceTwocolumn, targetClass, figures }) => { + try { + // Resolve layouts + let srcLayout = LAYOUT_DB[sourceClass] || null; + // If the class itself is single-column but twocolumn flag is set, + // approximate column width as (textwidth - 20 pt columnsep) / 2 + if (srcLayout && sourceTwocolumn && srcLayout.columns === 1) { + srcLayout = { + ...srcLayout, + columnwidthPt: Math.round((srcLayout.textwidthPt - 20) / 2), + columns: 2, + }; + } + const tgtLayout = LAYOUT_DB[targetClass] || LAYOUT_DB.neurips; + + const results = []; + + for (const fig of figures) { + const absPath = safeJoin(ctx.workspaceRoot, fig.file); + let naturalSize = null; + + // Try to measure the image + const ext = path.extname(fig.file).toLowerCase(); + if (ext === '.pdf') { + naturalSize = await pdfPageSize(absPath); + } else if (['.png', '.jpg', '.jpeg'].includes(ext)) { + naturalSize = await rasterSize(absPath); + } + + // Compute the effective width the figure occupied in the source + const srcLinewidth = effectiveLinewidth(srcLayout, fig.isStar); + const tgtLinewidth = effectiveLinewidth(tgtLayout, false); // NeurIPS is always single-col + + // Determine recommended width + let recommendation = ''; + let recommendedSpec = ''; + + if (srcLinewidth && tgtLinewidth) { + // The ratio: how much of \linewidth in the source did the figure use? + // If currentWidth is "\linewidth" or "1\linewidth", ratio = 1.0 + // If "0.48\textwidth" in twocolumn source, actual = 0.48 * textwidth + const srcEffectivePt = srcLinewidth; // assume width=\linewidth by default + const ratio = srcEffectivePt / tgtLinewidth; + + if (ratio < 0.75) { + // Source figure was narrower than target \linewidth — keep as-is or minor adjust + recommendedSpec = `${Math.round(ratio * 100) / 100}\\linewidth`; + recommendation = `Source figure occupied ${Math.round(srcLinewidth)}pt; target \\linewidth is ${Math.round(tgtLinewidth)}pt. Scale to ${recommendedSpec} to preserve visual proportion.`; + } else if (ratio >= 0.75 && ratio <= 1.05) { + // Close to full width — use \linewidth + recommendedSpec = '\\linewidth'; + recommendation = `Source and target widths are similar — \\linewidth is fine.`; + } else { + // Source column was wider than target (unusual) or figure* in twocolumn → very wide + // Scale down to fit + const scaledRatio = Math.min(ratio, 1.0); + recommendedSpec = `${Math.round(scaledRatio * 100) / 100}\\linewidth`; + recommendation = `Source figure was ${Math.round(srcLinewidth)}pt wide (${srcLayout?.columns === 2 ? 'figure* spanning full textwidth' : 'single column'}); target is ${Math.round(tgtLinewidth)}pt. Use ${recommendedSpec}.`; + } + } + + // If the figure is very tall relative to the target page, also warn + let heightWarning = ''; + if (naturalSize && tgtLinewidth) { + const scaledWidth = tgtLinewidth; // if using \linewidth + const scaledHeight = naturalSize.heightPt * (scaledWidth / naturalSize.widthPt); + const pageHeight = 650; // NeurIPS text height ≈ 650 pt + const heightRatio = scaledHeight / pageHeight; + if (heightRatio > 0.65) { + heightWarning = `At \\linewidth, figure height would be ${Math.round(scaledHeight)}pt (${Math.round(heightRatio * 100)}% of page). Consider reducing width to ${Math.round(0.6 / heightRatio * 100) / 100}\\linewidth so it fits alongside text.`; + } + } + + results.push({ + file: fig.file, + naturalSizePt: naturalSize + ? `${naturalSize.widthPt} x ${naturalSize.heightPt}` + : 'unknown', + sourceEffectiveWidthPt: srcLinewidth ? Math.round(srcLinewidth) : null, + targetLinewidthPt: tgtLinewidth ? Math.round(tgtLinewidth) : null, + recommendedWidth: recommendedSpec, + recommendation, + heightWarning: heightWarning || null, + }); + } + + const summary = { + sourceLayout: srcLayout + ? `${sourceClass}, ${srcLayout.columns}-column, textwidth=${srcLayout.textwidthPt}pt, colwidth=${srcLayout.columnwidthPt}pt` + : `${sourceClass} (unknown layout)`, + targetLayout: `${targetClass}, ${tgtLayout.columns}-column, textwidth=${tgtLayout.textwidthPt}pt`, + figures: results, + }; + + return `[OK] ${JSON.stringify(summary, null, 2)}`; + } catch (err) { + return `[ERROR] measureFigures failed: ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/raiseQuestion.js b/apps/backend/src/services/transferAgent/tools/raiseQuestion.js new file mode 100644 index 0000000..4ba7f2d --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/raiseQuestion.js @@ -0,0 +1,58 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { interrupt } from '@langchain/langgraph'; + +/** + * Creates the raiseQuestion tool — pauses the graph and asks the user a question. + * The frontend will display the question(s) and resume with answers via POST /submit-confirm. + * + * When this tool is called, it triggers a LangGraph interrupt. The graph will + * be suspended until the user provides answers via the API. + * + * @param {{ getState: () => object }} ctx — accessor for current graph state + */ +export function createRaiseQuestionTool(ctx) { + return new DynamicStructuredTool({ + name: 'raiseQuestion', + description: + 'Ask the user one or more questions and pause execution until they respond. ' + + 'Use this ONLY when you genuinely need user input to proceed ' + + '(e.g. ambiguous migration choices, blind-review decisions). ' + + 'Do NOT use this for information you can determine from the files.', + schema: z.object({ + questions: z + .array( + z.object({ + id: z.string().describe('Unique question identifier, e.g. "float_strategy"'), + prompt: z.string().describe('The question text to show the user'), + options: z + .array(z.string()) + .describe('Available answer choices'), + }), + ) + .min(1) + .max(5) + .describe('Array of questions to ask'), + }), + func: async ({ questions }) => { + // Format questions for the pendingQA state field + const pendingQA = questions.map((q) => ({ + id: q.id, + prompt: q.prompt, + type: 'single', + options: q.options, + })); + + // Trigger LangGraph interrupt — this suspends the graph + // The interrupt value is picked up by the graph runner + interrupt({ + type: 'raiseQuestion', + pendingQA, + }); + + // This return value is used if/when the graph resumes + // The actual answers will be in state.userConfirmations + return '[PAUSED] Questions sent to user. Awaiting response. When resumed, check state.userConfirmations for answers.'; + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/readFile.js b/apps/backend/src/services/transferAgent/tools/readFile.js new file mode 100644 index 0000000..ab11abd --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/readFile.js @@ -0,0 +1,47 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { readSourceFile, readWorkspaceFile } from '../fsTools.js'; + +/** + * Creates the readFile tool bound to a specific job's workspace roots. + * + * The agent uses this to read any file from the source or target project, + * with automatic \input{} resolution for .tex files if needed. + * + * @param {{ sourceReadRoot: string, workspaceRoot: string }} ctx + */ +export function createReadFileTool(ctx) { + return new DynamicStructuredTool({ + name: 'readFile', + description: + 'Read a file from the source or target (workspace) project. ' + + 'Use project="source" to read the original paper, project="target" to read the NeurIPS workspace being built. ' + + 'Returns the file content as a string (truncated to 60 000 chars).', + schema: z.object({ + project: z + .enum(['source', 'target']) + .describe('Which project to read from'), + path: z + .string() + .describe('Relative file path, e.g. "main.tex" or "sections/intro.tex"'), + }), + func: async ({ project, path }) => { + try { + const root = + project === 'source' ? ctx.sourceReadRoot : ctx.workspaceRoot; + const reader = + project === 'source' ? readSourceFile : readWorkspaceFile; + const content = await reader(root, path); + if (content.length > 60_000) { + return ( + content.slice(0, 60_000) + + `\n\n[TRUNCATED — file is ${content.length} chars total]` + ); + } + return content; + } catch (err) { + return `[ERROR] Could not read ${project}:${path} — ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/tools/writeFile.js b/apps/backend/src/services/transferAgent/tools/writeFile.js new file mode 100644 index 0000000..56594f3 --- /dev/null +++ b/apps/backend/src/services/transferAgent/tools/writeFile.js @@ -0,0 +1,35 @@ +import { z } from 'zod'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { writeFileWithSnapshot } from '../utils.js'; + +/** + * Creates the writeFile tool — writes (or overwrites) a file in the target + * workspace with automatic snapshot backup. + * + * @param {{ workspaceRoot: string, jobId: string }} ctx + */ +export function createWriteFileTool(ctx) { + return new DynamicStructuredTool({ + name: 'writeFile', + description: + 'Write content to a file in the target (workspace) project. ' + + 'A snapshot of the previous version is saved automatically. ' + + 'Use this for creating or replacing .tex files, preambles, bib files, etc.', + schema: z.object({ + path: z + .string() + .describe('Relative file path inside target project, e.g. "main.tex"'), + content: z + .string() + .describe('The full file content to write'), + }), + func: async ({ path, content }) => { + try { + await writeFileWithSnapshot(ctx.workspaceRoot, path, content, ctx.jobId); + return `[OK] Wrote ${content.length} chars to target:${path}`; + } catch (err) { + return `[ERROR] Failed to write target:${path} — ${err.message}`; + } + }, + }); +} diff --git a/apps/backend/src/services/transferAgent/transferDebugLog.js b/apps/backend/src/services/transferAgent/transferDebugLog.js new file mode 100644 index 0000000..810b207 --- /dev/null +++ b/apps/backend/src/services/transferAgent/transferDebugLog.js @@ -0,0 +1,81 @@ +/** + * Console logging for transfer agent debugging. + * + * - Default ON when NODE_ENV !== 'production' + * - OPENPRISM_TRANSFER_DEBUG=1|true|yes — force ON + * - OPENPRISM_TRANSFER_DEBUG=0|false|no — force OFF + */ + +const env = process.env.OPENPRISM_TRANSFER_DEBUG; + +export function isTransferDebugEnabled() { + if (env === '0' || env === 'false' || env === 'no') return false; + if (env === '1' || env === 'true' || env === 'yes') return true; + return process.env.NODE_ENV !== 'production'; +} + +function shortJobId(jobId) { + if (!jobId) return '—'; + const s = String(jobId); + return s.length > 8 ? `${s.slice(0, 8)}…` : s; +} + +/** + * @param {string} [jobId] + * @param {'log'|'info'|'warn'|'error'} level + * @param {string} message + * @param {unknown} [detail] — object/array printed on next line(s) + */ +export function transferDebugLog(jobId, level, message, detail) { + if (!isTransferDebugEnabled()) return; + const prefix = `[OpenPrism:transfer:${shortJobId(jobId)}]`; + const fn = level === 'error' ? console.error : level === 'warn' ? console.warn : console.log; + const ts = new Date().toISOString(); + fn(`${ts} ${prefix} ${message}`); + if (detail !== undefined && detail !== null) { + fn(detail); + } +} + +/** + * Emit new progress lines since last call (by array length). + */ +export function transferDebugProgressDelta(jobId, job, nextLogArray) { + if (!isTransferDebugEnabled()) return; + const log = Array.isArray(nextLogArray) ? nextLogArray : []; + const prevLen = job._transferDebugLogLen ?? 0; + if (log.length <= prevLen) { + job._transferDebugLogLen = log.length; + return; + } + const added = log.slice(prevLen); + job._transferDebugLogLen = log.length; + transferDebugLog(jobId, 'log', `progress +${added.length} line(s):`); + added.forEach((line) => transferDebugLog(jobId, 'log', ` | ${line}`)); +} + +/** + * Structured entries (NeurIPS / progressMeta). + */ +export function transferDebugEntriesDelta(jobId, job, entries) { + if (!isTransferDebugEnabled()) return; + const arr = Array.isArray(entries) ? entries : []; + const prevLen = job._transferDebugEntriesLen ?? 0; + if (arr.length <= prevLen) { + job._transferDebugEntriesLen = arr.length; + return; + } + const added = arr.slice(prevLen); + job._transferDebugEntriesLen = arr.length; + transferDebugLog(jobId, 'log', `progressLogEntries +${added.length}:`, added); +} + +let _announced; +export function announceTransferDebugOnce() { + if (_announced || !isTransferDebugEnabled()) return; + _announced = true; + console.log( + '[OpenPrism:transfer] 控制台调试已开启:每次 /transfer/step 会打印进度增量与状态快照;' + + '关闭请设 OPENPRISM_TRANSFER_DEBUG=0', + ); +} diff --git a/apps/backend/src/services/transferAgent/transferNodeError.js b/apps/backend/src/services/transferAgent/transferNodeError.js new file mode 100644 index 0000000..be2eaaf --- /dev/null +++ b/apps/backend/src/services/transferAgent/transferNodeError.js @@ -0,0 +1,25 @@ +/** + * Thrown when a NeurIPS transfer node exhausts LLM retries; surfaced to API as failedNode/failedPhase. + */ +export class TransferNodeError extends Error { + /** + * @param {string} node - Graph node name (e.g. applyBibliography) + * @param {string} phase - NeuripsPhase value + * @param {string} detail - Last failure reason summary + * @param {string} [message] - Full error message for logs/UI + * @param {string} [debugRelPath] - Project-relative dir with saved LLM raw/patch (e.g. .agent_runs/…/llm_diff/…) + * @param {number} [inputChars] - Length of main.tex (or target file) fed to the diff step when it failed + */ + constructor(node, phase, detail, message, debugRelPath, inputChars) { + const msg = message || `[${node}] ${detail}`; + super(msg); + this.name = 'TransferNodeError'; + this.node = node; + this.phase = phase; + this.detail = detail; + /** @type {string | undefined} */ + this.debugRelPath = debugRelPath; + /** @type {number | undefined} */ + this.inputChars = inputChars; + } +} diff --git a/apps/backend/src/services/transferAgent/utils.js b/apps/backend/src/services/transferAgent/utils.js index 5d3d335..a512df5 100644 --- a/apps/backend/src/services/transferAgent/utils.js +++ b/apps/backend/src/services/transferAgent/utils.js @@ -7,6 +7,36 @@ import { safeJoin } from '../../utils/pathUtils.js'; // Shared text helpers // --------------------------------------------------------------------------- +/** + * Produce a brief human-readable summary of tool call arguments (max ~100 chars). + * Used by agent nodes to populate liveProgress.toolArgs. + */ +export function briefToolArgs(toolName, args) { + if (!args || typeof args !== 'object') return ''; + try { + switch (toolName) { + case 'readFile': + return `${args.project || 'target'}:${args.path || ''}`.slice(0, 100); + case 'writeFile': + return `${args.path || ''} (${(args.content || '').length} chars)`.slice(0, 100); + case 'applyDiff': + return `${args.path || ''} (diff ${(args.diff || '').length} chars)`.slice(0, 100); + case 'grepFile': + return `pattern="${(args.pattern || '').slice(0, 40)}" ${args.path ? `in ${args.path}` : ''}`.slice(0, 100); + case 'listProjectTree': + return args.project || 'target'; + case 'copyAsset': + return `${args.from || ''} → ${args.to || ''}`.slice(0, 100); + case 'raiseQuestion': + return `${(args.questions || []).length} question(s)`; + default: + return JSON.stringify(args).slice(0, 100); + } + } catch { + return ''; + } +} + /** * Strip markdown code fences (```json, ```latex, ```tex, etc.) from LLM output. */ @@ -17,6 +47,48 @@ export function stripCodeFences(text) { .trim(); } +/** + * Reject LLM "full .tex file" output that would wipe the project (empty or far shorter than input). + * @returns {string|null} rejection reason, or null if OK to write + */ +export function rejectCatastrophicFullTexRewrite(previousContent, candidateContent) { + const prevLen = (previousContent || '').length; + const outLen = (candidateContent || '').trim().length; + if (!outLen) return 'empty output'; + if (prevLen > 2000 && outLen < Math.floor(prevLen * 0.2)) return 'output too short'; + return null; +} + +/** + * Split LaTeX into preamble (before \\begin{document}), body block (inclusive), and trailing tail. + */ +export function splitTexDocument(tex) { + const beginMark = '\\begin{document}'; + const endMark = '\\end{document}'; + const beginIdx = tex.indexOf(beginMark); + const endIdx = tex.lastIndexOf(endMark); + if (beginIdx === -1 || endIdx === -1 || endIdx < beginIdx) { + return { preamble: tex.trimEnd(), body: '', tail: '', hasDocument: false }; + } + const preamble = tex.slice(0, beginIdx).trimEnd(); + const bodyEnd = endIdx + endMark.length; + const body = tex.slice(beginIdx, bodyEnd); + const tail = tex.slice(bodyEnd); + return { preamble, body, tail, hasDocument: true }; +} + +/** + * Merge preamble + body + tail (body must include begin/end document). + */ +export function mergeTexDocument(preamble, body, tail = '') { + const p = (preamble || '').trimEnd(); + const b = body || ''; + const t = tail || ''; + if (!p && !b) return t; + if (!b) return `${p}${t}`; + return `${p}\n\n${b}${t}`; +} + /** * Extract the first JSON object or array from a string that may contain * surrounding prose. Handles cases where the LLM outputs explanatory text diff --git a/apps/frontend/src/api/client.ts b/apps/frontend/src/api/client.ts index 8e77535..31c1f26 100644 --- a/apps/frontend/src/api/client.ts +++ b/apps/frontend/src/api/client.ts @@ -50,7 +50,15 @@ export interface ArxivPaper { arxivId: string; } -const API_BASE = ''; +const rawApiBase = import.meta.env.VITE_API_BASE; +const API_BASE = + typeof rawApiBase === 'string' && rawApiBase.trim() !== '' ? rawApiBase.trim().replace(/\/$/, '') : ''; + +function apiUrl(path: string): string { + const p = path.startsWith('/') ? path : `/${path}`; + return `${API_BASE}${p}`; +} + const LANG_KEY = 'openprism-lang'; const COLLAB_TOKEN_KEY = 'openprism-collab-token'; const COLLAB_SERVER_KEY = 'openprism-collab-server'; @@ -104,7 +112,7 @@ async function request(url: string, options?: RequestInit): Promise { if (options?.body) { mergedHeaders['Content-Type'] = 'application/json'; } - const res = await fetch(`${API_BASE}${url}`, { + const res = await fetch(apiUrl(url), { ...options, headers: mergedHeaders }); @@ -210,7 +218,7 @@ export function renamePath(id: string, from: string, to: string) { export async function deleteFile(id: string, filePath: string) { const qs = new URLSearchParams({ path: filePath }).toString(); - const res = await fetch(`/api/projects/${id}/file?${qs}`, { + const res = await fetch(apiUrl(`/api/projects/${id}/file?${qs}`), { method: 'DELETE', headers: { 'x-lang': getLangHeader() @@ -236,7 +244,7 @@ export async function uploadFiles(projectId: string, files: File[], basePath?: s const finalPath = basePath ? `${basePath}/${rel}` : rel; form.append('files', file, finalPath); }); - const res = await fetch(`/api/projects/${projectId}/upload`, { + const res = await fetch(apiUrl(`/api/projects/${projectId}/upload`), { method: 'POST', body: form, headers: { @@ -319,7 +327,7 @@ export async function uploadTemplate(templateId: string, templateLabel: string, form.append('templateLabel', templateLabel); form.append('file', file); const lang = getLangHeader(); - const res = await fetch(`${API_BASE}/api/templates/upload`, { + const res = await fetch(apiUrl('/api/templates/upload'), { method: 'POST', headers: { 'x-lang': lang, ...getAuthHeader() }, body: form, @@ -384,7 +392,7 @@ export async function importZip(payload: { file: File; projectName?: string }) { if (payload.projectName) { form.append('projectName', payload.projectName); } - const res = await fetch('/api/projects/import-zip', { + const res = await fetch(apiUrl('/api/projects/import-zip'), { method: 'POST', body: form, headers: { @@ -407,7 +415,7 @@ export function importArxivSSE( if (payload.projectName) params.set('projectName', payload.projectName); const token = getCollabToken(); if (token) params.set('token', token); - const es = new EventSource(`/api/projects/import-arxiv-sse?${params.toString()}`); + const es = new EventSource(apiUrl(`/api/projects/import-arxiv-sse?${params.toString()}`)); es.addEventListener('progress', (e) => { if (onProgress) { @@ -450,7 +458,7 @@ export async function visionToLatex(payload: { if (payload.llmConfig) { form.append('llmConfig', JSON.stringify(payload.llmConfig)); } - const res = await fetch('/api/vision/latex', { + const res = await fetch(apiUrl('/api/vision/latex'), { method: 'POST', body: form, headers: { @@ -474,12 +482,58 @@ export interface TransferStartPayload { engine?: string; layoutCheck?: boolean; llmConfig?: Partial; + venue?: string; + doubleBlind?: boolean; + preprint?: boolean; + outputNotes?: string; +} + +export interface TransferQaItem { + id: string; + prompt: string; + type: 'single' | 'multi' | 'text'; + options?: string[]; +} + +export interface TransferProgressEntry { + node?: string; + level?: 'info' | 'warn' | 'error'; + message?: string; + ts?: number; +} + +export interface LiveProgress { + activeRole: string; + toolName: string; + toolArgs: string; + toolRound: number; + maxToolRounds: number; + lastUpdate: number; } export interface TransferStepResult { status: string; progressLog: string[]; + progressLogEntries?: TransferProgressEntry[]; + currentNode?: string; + phase?: string; + agentPhase?: string | null; + currentIteration?: number | null; + interruptedBeforeNode?: string; + completedNodes?: string[]; + pendingQA?: TransferQaItem[] | null; error?: string; + bundleNotes?: string | null; + transferGraphKind?: string; + liveProgress?: LiveProgress | null; + /** Present on 500 from /transfer/step when a graph node fails (e.g. diff retries exhausted) */ + failedNode?: string; + failedPhase?: string; + failedDetail?: string; + /** Project-relative path to saved LLM raw/patch (unified-diff nodes) */ + failedDebugPath?: string; + /** Length of target .tex input to the failed diff step */ + failedInputChars?: number; } export interface PageImage { @@ -495,10 +549,45 @@ export function transferStart(payload: TransferStartPayload) { }); } -export function transferStep(jobId: string) { - return request('/api/transfer/step', { +/** + * Parses JSON error bodies so TransferNodeError fields (failedNode, etc.) are available on thrown Error. + */ +export function transferStep(jobId: string): Promise { + const lang = getLangHeader(); + const mergedHeaders: Record = { + 'x-lang': lang, + ...getAuthHeader(), + 'Content-Type': 'application/json', + }; + return fetch(apiUrl('/api/transfer/step'), { method: 'POST', + headers: mergedHeaders, body: JSON.stringify({ jobId }), + }).then(async (res) => { + const text = await res.text(); + let body: Record = {}; + try { + body = text ? (JSON.parse(text) as Record) : {}; + } catch { + body = { error: text || 'Step failed' }; + } + if (!res.ok) { + const baseMsg = String(body.error ?? text ?? 'Step failed'); + const err = new Error(baseMsg) as Error & { + failedNode?: string; + failedPhase?: string; + failedDetail?: string; + failedDebugPath?: string; + failedInputChars?: number; + }; + if (typeof body.failedNode === 'string') err.failedNode = body.failedNode; + if (typeof body.failedPhase === 'string') err.failedPhase = body.failedPhase; + if (typeof body.failedDetail === 'string') err.failedDetail = body.failedDetail; + if (typeof body.failedDebugPath === 'string') err.failedDebugPath = body.failedDebugPath; + if (typeof body.failedInputChars === 'number') err.failedInputChars = body.failedInputChars; + throw err; + } + return body as unknown as TransferStepResult; }); } @@ -513,6 +602,52 @@ export function transferStatus(jobId: string) { return request(`/api/transfer/status/${jobId}`); } +/** + * Connect to the SSE progress stream for a transfer job. + * Returns an EventSource instance. Call .close() to disconnect. + */ +export function transferStream( + jobId: string, + onProgress: (data: TransferStepResult) => void, + onDone?: (data: TransferStepResult) => void, + onError?: (err: Event) => void, +): EventSource { + const es = new EventSource(apiUrl(`/api/transfer/stream/${jobId}`)); + + es.addEventListener('progress', (e: MessageEvent) => { + try { + const data = JSON.parse(e.data) as TransferStepResult; + onProgress(data); + } catch { /* ignore parse errors */ } + }); + + es.addEventListener('done', (e: MessageEvent) => { + try { + const data = JSON.parse(e.data) as TransferStepResult; + (onDone || onProgress)(data); + } catch { /* ignore */ } + es.close(); + }); + + es.onerror = (e) => { + if (onError) onError(e); + // EventSource auto-reconnects on transient errors; + // only close on permanent failure (readyState === CLOSED) + if (es.readyState === EventSource.CLOSED) { + es.close(); + } + }; + + return es; +} + +export function transferSubmitConfirm(jobId: string, answers: Record) { + return request<{ ok: boolean }>('/api/transfer/submit-confirm', { + method: 'POST', + body: JSON.stringify({ jobId, answers }), + }); +} + // ─── MinerU Transfer API ─── export interface MineruConfig { @@ -543,7 +678,7 @@ export async function mineruTransferUploadPdf(jobId: string, pdfFile: File) { const form = new FormData(); form.append('jobId', jobId); form.append('pdf', pdfFile); - const res = await fetch('/api/transfer/upload-pdf', { + const res = await fetch(apiUrl('/api/transfer/upload-pdf'), { method: 'POST', body: form, headers: { diff --git a/apps/frontend/src/app/App.css b/apps/frontend/src/app/App.css index ec09d64..bc8e485 100644 --- a/apps/frontend/src/app/App.css +++ b/apps/frontend/src/app/App.css @@ -3680,62 +3680,252 @@ textarea.input { border-radius: 999px; } -/* ── Transfer Progress Widget ── */ -.transfer-widget { +/* ── Transfer progress: FAB + right drawer ── */ +.transfer-progress-fab { position: fixed; - bottom: 20px; - right: 20px; - width: 360px; - max-height: 400px; - background: var(--panel); + bottom: 22px; + right: 22px; + z-index: 10050; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 2px; + min-width: 56px; + min-height: 56px; + padding: 8px 10px; border: 1px solid var(--border); - border-radius: 12px; + border-radius: 14px; + background: var(--panel); box-shadow: var(--shadow); - z-index: 1000; + color: var(--text); + cursor: pointer; + transition: transform 0.15s ease, box-shadow 0.15s ease; +} + +.transfer-progress-fab:hover { + transform: translateY(-2px); + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.12); +} + +.transfer-progress-fab--pulse { + animation: transfer-fab-pulse 2s ease-in-out infinite; +} + +@keyframes transfer-fab-pulse { + 0%, 100% { box-shadow: var(--shadow), 0 0 0 0 rgba(21, 101, 192, 0.35); } + 50% { box-shadow: var(--shadow), 0 0 0 8px rgba(21, 101, 192, 0); } +} + +.transfer-progress-fab-icon { + display: flex; + line-height: 0; + opacity: 0.9; +} + +.transfer-progress-fab-label { + font-size: 10px; + font-weight: 600; + letter-spacing: 0.02em; +} + +.transfer-progress-drawer-root { + position: fixed; + inset: 0; + z-index: 10040; + pointer-events: none; +} + +.transfer-progress-drawer-root.is-open { + pointer-events: auto; +} + +.transfer-progress-drawer-backdrop { + position: absolute; + inset: 0; + background: rgba(0, 0, 0, 0.35); + opacity: 0; + transition: opacity 0.25s ease; +} + +.transfer-progress-drawer-root.is-open .transfer-progress-drawer-backdrop { + opacity: 1; +} + +.transfer-progress-drawer { + position: absolute; + top: 0; + right: 0; + bottom: 0; + width: min(440px, 92vw); + max-width: 100%; + background: var(--panel); + border-left: 1px solid var(--border); + box-shadow: -12px 0 40px rgba(0, 0, 0, 0.12); display: flex; flex-direction: column; + transform: translateX(100%); + transition: transform 0.28s cubic-bezier(0.22, 1, 0.36, 1); overflow: hidden; } -.transfer-widget-header { +.transfer-progress-drawer-root.is-open .transfer-progress-drawer { + transform: translateX(0); +} + +.transfer-progress-drawer-header { display: flex; - align-items: center; + align-items: flex-start; justify-content: space-between; - padding: 10px 14px; + gap: 10px; + padding: 16px 16px 12px; border-bottom: 1px solid var(--border); - font-size: 13px; - font-weight: 500; background: var(--panel-muted); + flex-shrink: 0; } -.transfer-widget-header .icon-btn { - width: 22px; - height: 22px; - font-size: 13px; +.transfer-progress-drawer-title { + margin: 0; + font-size: 15px; + font-weight: 600; + line-height: 1.3; +} + +.transfer-progress-drawer-actions { + display: flex; + align-items: center; + gap: 6px; + flex-shrink: 0; } -.transfer-widget-status { - padding: 8px 14px; +.transfer-progress-drawer-meta { + padding: 12px 16px; font-size: 12px; + line-height: 1.55; color: var(--text); + border-bottom: 1px solid var(--border); + flex-shrink: 0; +} + +.transfer-progress-drawer-meta strong { + margin-right: 6px; + color: var(--muted); + font-weight: 500; +} + +.transfer-progress-drawer-node { + font-weight: 400; + opacity: 0.88; +} + +.transfer-progress-drawer-badge { + display: inline-block; + margin-left: 8px; + padding: 2px 8px; + border-radius: 6px; + font-size: 11px; + font-weight: 500; +} + +.transfer-progress-drawer-badge--images { + background: rgba(184, 134, 11, 0.15); + color: #b8860b; +} + +.transfer-progress-drawer-badge--qa { + background: rgba(21, 101, 192, 0.12); + color: #1565c0; +} + +.transfer-progress-drawer-nodes { + padding: 10px 16px; + border-bottom: 1px solid var(--border); + flex-shrink: 0; +} + +.transfer-progress-drawer-nodes-label { + font-size: 11px; + font-weight: 600; + color: var(--muted); + margin-bottom: 6px; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.transfer-progress-drawer-nodes-list { + display: flex; + flex-wrap: wrap; + gap: 6px; +} + +.transfer-progress-drawer-node-chip { + font-size: 10px; + padding: 3px 8px; + border-radius: 6px; + background: rgba(120, 98, 83, 0.1); + font-family: 'JetBrains Mono', ui-monospace, monospace; } -.transfer-widget-error { - padding: 4px 14px 8px; +.transfer-progress-drawer-error { + margin: 0 16px 8px; + padding: 10px 12px; font-size: 12px; - color: #d32f2f; + color: #c62828; + background: rgba(198, 40, 40, 0.08); + border-radius: 8px; + flex-shrink: 0; +} + +.transfer-progress-drawer-log-wrap { + flex: 1; + display: flex; + flex-direction: column; + min-height: 0; + padding: 12px 16px 20px; } -.transfer-widget-log { - padding: 8px 14px 12px; +.transfer-progress-drawer-log-title { font-size: 11px; - font-family: 'JetBrains Mono', monospace; - background: rgba(120, 98, 83, 0.06); - max-height: 220px; + font-weight: 600; + color: var(--muted); + margin-bottom: 8px; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.transfer-progress-drawer-log { + flex: 1; + min-height: 120px; overflow-y: auto; - line-height: 1.5; + font-size: 11px; + font-family: 'JetBrains Mono', ui-monospace, monospace; + line-height: 1.55; + padding: 12px; + border-radius: 10px; + background: rgba(120, 98, 83, 0.06); + border: 1px solid var(--border); } -.transfer-widget-log > div { - margin-bottom: 2px; +.transfer-progress-drawer-log-empty { + color: var(--muted); + font-style: italic; +} + +.transfer-progress-drawer-log-line { + margin-bottom: 4px; + word-break: break-word; +} + +.transfer-progress-drawer-log-line--warn { + color: #b8860b; +} + +.transfer-progress-drawer-log-line--error { + color: #c62828; +} + +.transfer-progress-drawer-log-node { + margin-right: 6px; + opacity: 0.75; + font-weight: 500; } diff --git a/apps/frontend/src/app/ProjectPage.tsx b/apps/frontend/src/app/ProjectPage.tsx index acb88ab..2681783 100644 --- a/apps/frontend/src/app/ProjectPage.tsx +++ b/apps/frontend/src/app/ProjectPage.tsx @@ -1,4 +1,5 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { createPortal } from 'react-dom'; import { useNavigate } from 'react-router-dom'; import { useTranslation } from 'react-i18next'; import { @@ -14,9 +15,11 @@ import { trashProject, updateProjectTags, permanentDeleteProject, - uploadTemplate + uploadTemplate, + transferStatus, + transferStream, } from '../api/client'; -import type { ProjectMeta, TemplateMeta, TemplateCategory } from '../api/client'; +import type { ProjectMeta, TemplateMeta, TemplateCategory, TransferProgressEntry, TransferStepResult } from '../api/client'; import TransferPanel from './TransferPanel'; type ViewFilter = 'all' | 'mine' | 'archived' | 'trash'; @@ -119,8 +122,93 @@ export default function ProjectPage() { const [activeJob, setActiveJob] = useState<{ jobId: string; status: string; progressLog: string[]; error?: string; sourceName?: string; + phase?: string; + currentNode?: string; + completedNodes?: string[]; + pendingQA?: unknown; + progressLogEntries?: TransferProgressEntry[]; } | null>(null); - const [jobWidgetOpen, setJobWidgetOpen] = useState(true); + /** 右侧滑出进度面板 */ + const [transferProgressDrawerOpen, setTransferProgressDrawerOpen] = useState(false); + + // SSE ref for progress streaming recovery + const recoverySSERef = useRef(null); + + // Recover active transfer job from sessionStorage on mount + useEffect(() => { + try { + const saved = sessionStorage.getItem('openprism-active-job'); + if (!saved) return; + const { jobId: savedJobId } = JSON.parse(saved); + if (!savedJobId) return; + + transferStatus(savedJobId).then((res: TransferStepResult) => { + if (!res || !res.status) { + sessionStorage.removeItem('openprism-active-job'); + return; + } + + // Restore the floating progress window + setActiveJob({ + jobId: savedJobId, + status: res.status, + progressLog: res.progressLog || [], + error: res.error, + phase: res.phase, + currentNode: res.currentNode, + completedNodes: res.completedNodes, + progressLogEntries: res.progressLogEntries, + }); + + const isTerminal = ['success', 'failed', 'error'].includes(res.status); + if (!isTerminal) { + // Job still active — connect SSE for real-time updates to the floating window + if (recoverySSERef.current) recoverySSERef.current.close(); + recoverySSERef.current = transferStream( + savedJobId, + (data) => { + setActiveJob((prev) => ({ + ...prev, + jobId: savedJobId, + status: data.status, + progressLog: data.progressLog || [], + error: data.error, + phase: data.phase, + currentNode: data.currentNode, + completedNodes: data.completedNodes, + progressLogEntries: data.progressLogEntries, + })); + }, + (data) => { + setActiveJob((prev) => ({ + ...prev, + jobId: savedJobId, + status: data.status, + progressLog: data.progressLog || [], + error: data.error, + phase: data.phase, + currentNode: data.currentNode, + completedNodes: data.completedNodes, + progressLogEntries: data.progressLogEntries, + })); + recoverySSERef.current = null; + if (['success', 'failed'].includes(data.status)) { + sessionStorage.removeItem('openprism-active-job'); + loadProjects(); + } + }, + ); + } + }).catch(() => { + sessionStorage.removeItem('openprism-active-job'); + }); + } catch { /* ignore */ } + + return () => { + if (recoverySSERef.current) { recoverySSERef.current.close(); recoverySSERef.current = null; } + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); // Template upload state const templateZipRef = useRef(null); @@ -902,7 +990,6 @@ export default function ProjectPage() { projectId={transferSource.id} onJobUpdate={(job) => { setActiveJob({ ...job, sourceName: transferSource.name }); - setJobWidgetOpen(true); if (job.status === 'success') loadProjects(); }} /> @@ -911,33 +998,119 @@ export default function ProjectPage() { )} - {/* Floating transfer progress widget */} - {activeJob && !transferOpen && jobWidgetOpen && ( -
-
- {t('模板转换')} — {activeJob.sourceName || ''} -
- - -
-
-
- {t('状态')}: {activeJob.status} + {/* 转换进度:悬浮按钮 + 右侧抽屉(Portal 到 body,避免被 overflow 裁剪;弹窗打开时也显示) */} + {typeof document !== 'undefined' && activeJob && createPortal( + <> + + +
+
setTransferProgressDrawerOpen(false)} + /> +
- {activeJob.error && ( -
{activeJob.error}
- )} - {activeJob.progressLog.length > 0 && ( -
- {activeJob.progressLog.map((line, i) => ( -
{line}
- ))} -
- )} -
+ , + document.body, )} {/* Settings Modal */} diff --git a/apps/frontend/src/app/TransferPanel.tsx b/apps/frontend/src/app/TransferPanel.tsx index 9144590..ff17b63 100644 --- a/apps/frontend/src/app/TransferPanel.tsx +++ b/apps/frontend/src/app/TransferPanel.tsx @@ -4,6 +4,9 @@ import { transferStart, transferStep, transferSubmitImages, + transferSubmitConfirm, + transferStream, + transferStatus, mineruTransferStart, mineruTransferUploadPdf, listTemplates, @@ -13,11 +16,25 @@ import type { LLMConfig, TemplateMeta, FileItem, + TransferQaItem, + TransferProgressEntry, + TransferStepResult, + LiveProgress, } from '../api/client'; interface TransferPanelProps { projectId: string; - onJobUpdate?: (job: { jobId: string; status: string; progressLog: string[]; error?: string }) => void; + onJobUpdate?: (job: { + jobId: string; + status: string; + progressLog: string[]; + error?: string; + phase?: string; + currentNode?: string; + completedNodes?: string[]; + pendingQA?: TransferQaItem[] | null; + progressLogEntries?: TransferProgressEntry[]; + }) => void; } type TransferMode = 'legacy' | 'mineru'; @@ -25,6 +42,49 @@ type MineruSource = 'project' | 'upload'; const ENGINES = ['pdflatex', 'xelatex', 'lualatex', 'latexmk'] as const; +function formatTransferStepFailure(err: unknown): string { + const e = err as Error & { + failedNode?: string; + failedPhase?: string; + failedDetail?: string; + failedDebugPath?: string; + failedInputChars?: number; + }; + const msg = e?.message || String(err || 'Step failed'); + const bits: string[] = []; + if (e.failedNode) bits.push(`节点 ${e.failedNode}`); + if (e.failedPhase) bits.push(`阶段 ${e.failedPhase}`); + if (e.failedDetail) bits.push(`原因 ${e.failedDetail}`); + if (typeof e.failedInputChars === 'number') bits.push(`输入 ${e.failedInputChars} 字符`); + if (e.failedDebugPath) bits.push(`调试文件 ${e.failedDebugPath}`); + return bits.length ? `${msg}\n${bits.join(' · ')}` : msg; +} + +/** NeurIPS 图阶段时间线(与后端 currentPhase 对齐) */ +const NEURIPS_PHASE_STEPS: { id: string; label: string }[] = [ + { id: 'intake', label: '摄入' }, + { id: 'source_analysis', label: '源稿/模板分析' }, + { id: 'migration_plan', label: '迁移计划' }, + { id: 'qa_plan', label: '计划确认 QA' }, + { id: 'preamble', label: '导言' }, + { id: 'body', label: '正文' }, + { id: 'figures', label: '图表' }, + { id: 'assets', label: '资源复制' }, + { id: 'bibliography', label: '参考文献' }, + { id: 'blind_qa', label: '双盲 QA' }, + { id: 'blind', label: '匿名处理' }, + { id: 'policy', label: '政策核对' }, + { id: 'finalize', label: '完成(本地编译)' }, +]; + +/** NeurIPS Agent 模式时间线 */ +const NEURIPS_AGENT_STEPS: { id: string; label: string }[] = [ + { id: 'agent_planning', label: '🧠 规划' }, + { id: 'agent_generating', label: '⚡ 执行' }, + { id: 'agent_reviewing', label: '🔍 审查' }, + { id: 'finalize', label: '✅ 完成' }, +]; + export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelProps) { const { t } = useTranslation(); @@ -42,6 +102,9 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP const [targetTemplateId, setTargetTemplateId] = useState(''); const [engine, setEngine] = useState('pdflatex'); const [layoutCheck, setLayoutCheck] = useState(false); + const [neuripsDoubleBlind, setNeuripsDoubleBlind] = useState(true); + const [neuripsPreprint, setNeuripsPreprint] = useState(false); + const [neuripsOutputNotes, setNeuripsOutputNotes] = useState(''); // LLM config — read from shared localStorage (set via ProjectPage / EditorPage settings) const SETTINGS_KEY = 'openprism-settings-v1'; @@ -89,8 +152,24 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP const [jobId, setJobId] = useState(''); const [status, setStatus] = useState('idle'); const [progressLog, setProgressLog] = useState([]); + const [progressLogEntries, setProgressLogEntries] = useState([]); + const [currentNode, setCurrentNode] = useState(''); + const [currentPhase, setCurrentPhase] = useState(''); + const [agentPhase, setAgentPhase] = useState(null); + const [currentIteration, setCurrentIteration] = useState(null); + const [completedNodes, setCompletedNodes] = useState([]); + const [pendingQA, setPendingQA] = useState(null); + const [qaAnswers, setQaAnswers] = useState>({}); + const [qaSubmitting, setQaSubmitting] = useState(false); + const [logFilterNode, setLogFilterNode] = useState(''); const [error, setError] = useState(''); const [running, setRunning] = useState(false); + const [transferGraphKind, setTransferGraphKind] = useState(''); + const [liveProgress, setLiveProgress] = useState(null); + + // SSE stream ref + const sseRef = useRef(null); + const JOB_STORAGE_KEY = 'openprism-active-job'; // Template list for target selection const [templates, setTemplates] = useState([]); @@ -161,6 +240,12 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP const targetMainFile = selectedTemplate?.mainFile || 'main.tex'; setError(''); setProgressLog([]); + setProgressLogEntries([]); + setCurrentNode(''); + setCurrentPhase(''); + setCompletedNodes([]); + setPendingQA(null); + setQaAnswers({}); setRunning(true); setStatus('starting'); @@ -186,6 +271,17 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP mineruConfig, }); setJobId(res.jobId); + try { sessionStorage.setItem(JOB_STORAGE_KEY, JSON.stringify({ jobId: res.jobId })); } catch { /* ignore */ } + onJobUpdate?.({ + jobId: res.jobId, + status: 'starting', + progressLog: [], + progressLogEntries: [], + currentNode: '', + phase: '', + completedNodes: [], + pendingQA: null, + }); // If uploading PDF, upload it before running graph if (mineruSource === 'upload' && uploadedPdf) { @@ -206,8 +302,27 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP engine, layoutCheck, llmConfig: buildLlmConfig(), + ...(targetTemplateId === 'neurips' + ? { + venue: 'neurips', + doubleBlind: neuripsDoubleBlind, + preprint: neuripsPreprint, + outputNotes: neuripsOutputNotes, + } + : {}), }); setJobId(res.jobId); + try { sessionStorage.setItem(JOB_STORAGE_KEY, JSON.stringify({ jobId: res.jobId })); } catch { /* ignore */ } + onJobUpdate?.({ + jobId: res.jobId, + status: 'starting', + progressLog: [], + progressLogEntries: [], + currentNode: '', + phase: '', + completedNodes: [], + pendingQA: null, + }); setStatus('started'); await runGraph(res.jobId); } @@ -216,32 +331,174 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP setRunning(false); setStatus('error'); } - }, [transferMode, mineruSource, uploadedPdf, targetTemplateId, sourceMainFile, projectId, engine, layoutCheck, selectedTemplate, mineruApiBase, mineruToken]); + }, [transferMode, mineruSource, uploadedPdf, targetTemplateId, sourceMainFile, projectId, engine, layoutCheck, selectedTemplate, mineruApiBase, mineruToken, neuripsDoubleBlind, neuripsPreprint, neuripsOutputNotes, onJobUpdate]); + + const pushJobUpdate = useCallback((jid: string, res: TransferStepResult) => { + setProgressLog(res.progressLog || []); + setProgressLogEntries(res.progressLogEntries || []); + setCurrentNode(res.currentNode || ''); + setCurrentPhase(res.phase || ''); + setAgentPhase(res.agentPhase ?? null); + setCurrentIteration(res.currentIteration ?? null); + setCompletedNodes(res.completedNodes || []); + setPendingQA(res.pendingQA ?? null); + setStatus(res.status); + if (res.transferGraphKind) setTransferGraphKind(res.transferGraphKind); + setLiveProgress(res.liveProgress ?? null); + onJobUpdate?.({ + jobId: jid, + status: res.status, + progressLog: res.progressLog || [], + error: res.error, + phase: res.phase, + currentNode: res.currentNode, + completedNodes: res.completedNodes, + pendingQA: res.pendingQA ?? null, + progressLogEntries: res.progressLogEntries, + }); + }, [onJobUpdate]); + /** Connect SSE stream for real-time progress updates */ + const connectSSE = useCallback((jid: string) => { + // Close any existing SSE connection + if (sseRef.current) { sseRef.current.close(); sseRef.current = null; } + + const es = transferStream( + jid, + // onProgress + (data) => { + pushJobUpdate(jid, data); + // Handle terminal-like states from SSE + if (data.status === 'waiting_images' || data.status === 'waiting_confirm') { + setRunning(false); + } + }, + // onDone + (data) => { + pushJobUpdate(jid, data); + setRunning(false); + sseRef.current = null; + if (data.status === 'success' || data.status === 'failed') { + try { sessionStorage.removeItem(JOB_STORAGE_KEY); } catch { /* ignore */ } + } + }, + // onError + () => { + // SSE reconnects automatically; only log + }, + ); + sseRef.current = es; + }, [pushJobUpdate]); + + /** Drive the graph forward step by step, with SSE providing real-time updates */ const runGraph = useCallback(async (jid: string) => { + // Connect SSE for real-time progress display + connectSSE(jid); + // eslint-disable-next-line no-constant-condition while (true) { try { const res = await transferStep(jid); - setProgressLog(res.progressLog || []); - setStatus(res.status); - onJobUpdate?.({ jobId: jid, status: res.status, progressLog: res.progressLog || [], error: res.error }); + pushJobUpdate(jid, res); if (res.status === 'waiting_images') { setRunning(false); return; } - if (res.status === 'success' || res.status === 'failed') { setRunning(false); return; } - if (res.error) { setError(res.error); setRunning(false); return; } + if (res.status === 'waiting_confirm') { + setRunning(false); + return; + } + if (res.status === 'success' || res.status === 'failed') { + setRunning(false); + try { sessionStorage.removeItem(JOB_STORAGE_KEY); } catch { /* ignore */ } + return; + } + if (res.error) { + const bits: string[] = []; + if (res.failedNode) bits.push(`节点 ${res.failedNode}`); + if (res.failedPhase) bits.push(`阶段 ${res.failedPhase}`); + if (res.failedDetail) bits.push(`原因 ${res.failedDetail}`); + if (typeof res.failedInputChars === 'number') bits.push(`输入 ${res.failedInputChars} 字符`); + if (res.failedDebugPath) bits.push(`调试文件 ${res.failedDebugPath}`); + setError(bits.length ? `${res.error}\n${bits.join(' · ')}` : res.error); + setRunning(false); + return; + } - // Brief pause before next poll - await new Promise(r => setTimeout(r, 1000)); - } catch (err: any) { - setError(err.message || 'Step failed'); + await new Promise(r => setTimeout(r, 400)); + } catch (err: unknown) { + const display = formatTransferStepFailure(err); + setError(display); setRunning(false); setStatus('error'); - onJobUpdate?.({ jobId: jid, status: 'error', progressLog: [], error: err.message }); + onJobUpdate?.({ + jobId: jid, + status: 'error', + progressLog: [], + error: display, + }); return; } } - }, [onJobUpdate]); + }, [onJobUpdate, pushJobUpdate, connectSSE]); + + // Cleanup SSE on unmount + useEffect(() => { + return () => { + if (sseRef.current) { sseRef.current.close(); sseRef.current = null; } + }; + }, []); + + // Recover active job from sessionStorage on mount + useEffect(() => { + try { + const saved = sessionStorage.getItem(JOB_STORAGE_KEY); + if (!saved) return; + const { jobId: savedJobId } = JSON.parse(saved); + if (!savedJobId) return; + + // Try to recover state from backend + transferStatus(savedJobId).then((res) => { + if (!res || res.status === 'not_found') { + sessionStorage.removeItem(JOB_STORAGE_KEY); + return; + } + setJobId(savedJobId); + pushJobUpdate(savedJobId, res); + + const isTerminal = ['success', 'failed', 'error'].includes(res.status); + if (!isTerminal) { + // Job still running — reconnect SSE and resume driving + setRunning(true); + connectSSE(savedJobId); + // If waiting for user input, don't drive + if (res.status !== 'waiting_images' && res.status !== 'waiting_confirm') { + runGraph(savedJobId); + } else { + setRunning(false); + } + } + }).catch(() => { + sessionStorage.removeItem(JOB_STORAGE_KEY); + }); + } catch { /* ignore */ } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const handleSubmitQa = useCallback(async () => { + if (!jobId || !pendingQA?.length) return; + setQaSubmitting(true); + setError(''); + try { + await transferSubmitConfirm(jobId, qaAnswers); + setPendingQA(null); + setRunning(true); + setStatus('running'); + await runGraph(jobId); + } catch (err: any) { + setError(err.message || 'Confirm submit failed'); + } finally { + setQaSubmitting(false); + } + }, [jobId, pendingQA, qaAnswers, runGraph]); const chevronSvg = (open: boolean) => ( @@ -437,6 +694,28 @@ export default function TransferPanel({ projectId, onJobUpdate }: TransferPanelP {t('启用排版检查 (VLM)')} + {transferMode === 'legacy' && targetTemplateId === 'neurips' && ( +
+
NeurIPS 投稿选项
+ + + +