From 80a2797aa5130bdd7c3efbf2d08990b9c2535429 Mon Sep 17 00:00:00 2001 From: jmanhype Date: Mon, 16 Mar 2026 13:26:00 -0500 Subject: [PATCH 1/6] Add DenarioEx port and Elixir CI --- .github/workflows/elixir-denario-ex.yml | 46 ++ .gitignore | 2 + elixir/denario_ex/.formatter.exs | 4 + elixir/denario_ex/.gitignore | 23 + elixir/denario_ex/README.md | 100 ++++ elixir/denario_ex/lib/denario_ex.ex | 445 ++++++++++++++ elixir/denario_ex/lib/denario_ex/ai.ex | 33 ++ .../denario_ex/lib/denario_ex/application.ex | 20 + .../lib/denario_ex/cmbagent_loop.ex | 205 +++++++ .../lib/denario_ex/code_executor.ex | 7 + .../denario_ex/lib/denario_ex/key_manager.ex | 54 ++ .../lib/denario_ex/literature_workflow.ex | 467 +++++++++++++++ elixir/denario_ex/lib/denario_ex/llm.ex | 100 ++++ .../denario_ex/lib/denario_ex/llm_client.ex | 8 + elixir/denario_ex/lib/denario_ex/open_alex.ex | 121 ++++ .../lib/denario_ex/paper_workflow.ex | 514 +++++++++++++++++ .../lib/denario_ex/prompt_templates.ex | 81 +++ .../lib/denario_ex/python_executor.ex | 67 +++ .../lib/denario_ex/req_llm_client.ex | 115 ++++ elixir/denario_ex/lib/denario_ex/research.ex | 30 + .../lib/denario_ex/results_workflow.ex | 300 ++++++++++ .../lib/denario_ex/semantic_scholar.ex | 48 ++ .../lib/denario_ex/semantic_scholar_client.ex | 9 + elixir/denario_ex/lib/denario_ex/text.ex | 77 +++ .../lib/denario_ex/workflow_prompts.ex | 451 +++++++++++++++ elixir/denario_ex/mix.exs | 32 + elixir/denario_ex/mix.lock | 27 + elixir/denario_ex/test/denario_ex_test.exs | 109 ++++ .../test/denario_ex_workflows_test.exs | 545 ++++++++++++++++++ elixir/denario_ex/test/key_manager_test.exs | 54 ++ .../literature_workflow_resilience_test.exs | 59 ++ .../denario_ex/test/req_llm_client_test.exs | 81 +++ elixir/denario_ex/test/test_helper.exs | 1 + elixir/denario_ex/test/text_test.exs | 15 + 34 files changed, 4250 insertions(+) create mode 100644 .github/workflows/elixir-denario-ex.yml create mode 100644 elixir/denario_ex/.formatter.exs create mode 100644 elixir/denario_ex/.gitignore create mode 100644 elixir/denario_ex/README.md create mode 100644 elixir/denario_ex/lib/denario_ex.ex create mode 100644 elixir/denario_ex/lib/denario_ex/ai.ex create mode 100644 elixir/denario_ex/lib/denario_ex/application.ex create mode 100644 elixir/denario_ex/lib/denario_ex/cmbagent_loop.ex create mode 100644 elixir/denario_ex/lib/denario_ex/code_executor.ex create mode 100644 elixir/denario_ex/lib/denario_ex/key_manager.ex create mode 100644 elixir/denario_ex/lib/denario_ex/literature_workflow.ex create mode 100644 elixir/denario_ex/lib/denario_ex/llm.ex create mode 100644 elixir/denario_ex/lib/denario_ex/llm_client.ex create mode 100644 elixir/denario_ex/lib/denario_ex/open_alex.ex create mode 100644 elixir/denario_ex/lib/denario_ex/paper_workflow.ex create mode 100644 elixir/denario_ex/lib/denario_ex/prompt_templates.ex create mode 100644 elixir/denario_ex/lib/denario_ex/python_executor.ex create mode 100644 elixir/denario_ex/lib/denario_ex/req_llm_client.ex create mode 100644 elixir/denario_ex/lib/denario_ex/research.ex create mode 100644 elixir/denario_ex/lib/denario_ex/results_workflow.ex create mode 100644 elixir/denario_ex/lib/denario_ex/semantic_scholar.ex create mode 100644 elixir/denario_ex/lib/denario_ex/semantic_scholar_client.ex create mode 100644 elixir/denario_ex/lib/denario_ex/text.ex create mode 100644 elixir/denario_ex/lib/denario_ex/workflow_prompts.ex create mode 100644 elixir/denario_ex/mix.exs create mode 100644 elixir/denario_ex/mix.lock create mode 100644 elixir/denario_ex/test/denario_ex_test.exs create mode 100644 elixir/denario_ex/test/denario_ex_workflows_test.exs create mode 100644 elixir/denario_ex/test/key_manager_test.exs create mode 100644 elixir/denario_ex/test/literature_workflow_resilience_test.exs create mode 100644 elixir/denario_ex/test/req_llm_client_test.exs create mode 100644 elixir/denario_ex/test/test_helper.exs create mode 100644 elixir/denario_ex/test/text_test.exs diff --git a/.github/workflows/elixir-denario-ex.yml b/.github/workflows/elixir-denario-ex.yml new file mode 100644 index 00000000..0d77aad7 --- /dev/null +++ b/.github/workflows/elixir-denario-ex.yml @@ -0,0 +1,46 @@ +name: Elixir DenarioEx CI + +on: + push: + paths: + - ".github/workflows/elixir-denario-ex.yml" + - "elixir/denario_ex/**" + pull_request: + paths: + - ".github/workflows/elixir-denario-ex.yml" + - "elixir/denario_ex/**" + +jobs: + test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: elixir/denario_ex + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Erlang and Elixir + uses: erlef/setup-beam@v1 + with: + elixir-version: "1.18.4" + otp-version: "28.0" + + - name: Restore Mix cache + uses: actions/cache@v4 + with: + path: | + ~/.mix + ~/.hex + elixir/denario_ex/deps + elixir/denario_ex/_build + key: ${{ runner.os }}-elixir-denario-ex-${{ hashFiles('elixir/denario_ex/mix.lock') }} + restore-keys: | + ${{ runner.os }}-elixir-denario-ex- + + - name: Install dependencies + run: mix deps.get + + - name: Run tests + run: mix test diff --git a/.gitignore b/.gitignore index 853e9e80..85ad384d 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ eggs/ .eggs/ lib/ lib64/ +!elixir/denario_ex/lib/ +!elixir/denario_ex/lib/** parts/ sdist/ var/ diff --git a/elixir/denario_ex/.formatter.exs b/elixir/denario_ex/.formatter.exs new file mode 100644 index 00000000..d2cda26e --- /dev/null +++ b/elixir/denario_ex/.formatter.exs @@ -0,0 +1,4 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/elixir/denario_ex/.gitignore b/elixir/denario_ex/.gitignore new file mode 100644 index 00000000..288c05b6 --- /dev/null +++ b/elixir/denario_ex/.gitignore @@ -0,0 +1,23 @@ +# The directory Mix will write compiled artifacts to. +/_build/ + +# If you run "mix test --cover", coverage assets end up here. +/cover/ + +# The directory Mix downloads your dependencies sources to. +/deps/ + +# Where third-party dependencies like ExDoc output generated docs. +/doc/ + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +denario_ex-*.tar + +# Temporary files, for example, from tests. +/tmp/ diff --git a/elixir/denario_ex/README.md b/elixir/denario_ex/README.md new file mode 100644 index 00000000..b6e34028 --- /dev/null +++ b/elixir/denario_ex/README.md @@ -0,0 +1,100 @@ +# DenarioEx + +Elixir port of the Denario workflow. + +Implemented: + +- project/session lifecycle +- `input_files` persistence +- provider key loading +- `LLMDB` model resolution +- `ReqLLM`-backed fast idea and method generation +- `cmbagent`-style planning/control loop for idea and method generation +- `get_results/2` with code generation, execution, retries, and plot harvesting +- literature checking via Semantic Scholar with OpenAlex fallback +- paper generation to LaTeX with optional bibliography and compile step + +## Setup + +```bash +cd elixir/denario_ex +mix deps.get +mix test +``` + +## Temporary `req_llm` pin + +`DenarioEx` is temporarily pinned to a Git commit of `req_llm` instead of the Hex release. + +- Current pin: `jmanhype/req_llm@ee00b4553cd6823b48c1045b825565855a77a93b` +- Upstream PR: + +Why this exists: + +- `req_llm` `1.7.1` still injects `:max_tokens` for some OpenAI reasoning/object requests +- that triggers noisy `Renamed :max_tokens to :max_completion_tokens` warnings +- the pinned commit removes that library-side warning path + +When the upstream PR is merged and a new Hex release includes the fix, switch +`mix.exs` back to the published `{:req_llm, "~> ..."}` dependency and refresh `mix.lock`. + +## Credentials + +The Elixir port reads these environment variables: + +- OpenAI: `OPENAI_API_KEY` +- Gemini: `GOOGLE_API_KEY` or `GEMINI_API_KEY` +- Anthropic: `ANTHROPIC_API_KEY` +- Perplexity: `PERPLEXITY_API_KEY` +- Semantic Scholar: `SEMANTIC_SCHOLAR_KEY`, `SEMANTIC_SCHOLAR_API_KEY`, or `S2_API_KEY` + +For citation-backed literature checking, export a Semantic Scholar key before running: + +```bash +export OPENAI_API_KEY=... +export SEMANTIC_SCHOLAR_API_KEY=... +``` + +Without a Semantic Scholar key, `check_idea/2` first falls back to OpenAlex. It only degrades to +`Idea literature search unavailable` if both providers fail. + +## Minimal usage + +```elixir +alias DenarioEx + +{:ok, denario} = + DenarioEx.new(project_dir: "/tmp/denario_elixir_demo", clear_project_dir: true) + +{:ok, denario} = + DenarioEx.set_data_description( + denario, + """ + Analyze a small hypothetical lab sensor dataset and propose one simple paper idea. + """ + ) + +{:ok, denario} = DenarioEx.get_idea_fast(denario, llm: "openai:gpt-4.1-mini") +{:ok, denario} = DenarioEx.get_method_fast(denario, llm: "openai:gpt-4.1-mini") +``` + +## Full workflow + +```elixir +alias DenarioEx + +{:ok, denario} = + DenarioEx.new(project_dir: "/tmp/denario_ex_full", clear_project_dir: true) + +{:ok, denario} = + DenarioEx.set_data_description( + denario, + "Generate a tiny synthetic anomaly-score dataset, summarize it, and write a short paper." + ) + +{:ok, denario} = DenarioEx.get_idea(denario, mode: :cmbagent, planner_model: "openai:gpt-4.1-mini") +{:ok, denario} = DenarioEx.get_method(denario, mode: :cmbagent, planner_model: "openai:gpt-4.1-mini") +{:ok, denario} = DenarioEx.get_results(denario, planner_model: "openai:gpt-4.1-mini", engineer_model: "openai:gpt-4.1-mini") +{:ok, denario} = DenarioEx.check_idea(denario, llm: "openai:gpt-4.1-mini") +{:ok, denario} = DenarioEx.get_paper(denario, llm: "openai:gpt-4.1-mini", compile: false) +``` diff --git a/elixir/denario_ex/lib/denario_ex.ex b/elixir/denario_ex/lib/denario_ex.ex new file mode 100644 index 00000000..6dba3a20 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex.ex @@ -0,0 +1,445 @@ +defmodule DenarioEx do + @moduledoc """ + Initial Elixir port of the core Denario session model. + + This module mirrors the Python project's project-directory lifecycle and the + ReqLLM/LLMDB-backed "fast" idea/method generation path. + """ + + alias DenarioEx.{ + AI, + CMBAgentLoop, + KeyManager, + LLM, + LiteratureWorkflow, + PaperWorkflow, + PromptTemplates, + ReqLLMClient, + Research, + ResultsWorkflow, + Text + } + + @default_project_name "project" + @input_files "input_files" + @plots_folder "plots" + @paper_folder "paper" + @description_file "data_description.md" + @idea_file "idea.md" + @method_file "methods.md" + @results_file "results.md" + @literature_file "literature.md" + @paper_tex_file "paper_v4_final.tex" + @paper_pdf_file "paper_v4_final.pdf" + + @enforce_keys [:project_dir, :input_files_dir, :plots_dir, :keys, :research] + defstruct [:project_dir, :input_files_dir, :plots_dir, :keys, :research] + + @type t :: %__MODULE__{ + project_dir: String.t(), + input_files_dir: String.t(), + plots_dir: String.t(), + keys: KeyManager.t(), + research: Research.t() + } + + @type option :: + {:project_dir, String.t()} + | {:clear_project_dir, boolean()} + | {:keys, KeyManager.t()} + | {:research, Research.t()} + + @type generation_option :: + {:llm, String.t() | LLM.t()} + | {:iterations, pos_integer()} + | {:client, module()} + + @spec new([option()]) :: {:ok, t()} + def new(opts \\ []) do + project_dir = + opts + |> Keyword.get(:project_dir, Path.join(File.cwd!(), @default_project_name)) + |> Path.expand() + + clear_project_dir? = Keyword.get(opts, :clear_project_dir, false) + + if clear_project_dir? and File.exists?(project_dir) do + File.rm_rf!(project_dir) + end + + input_files_dir = Path.join(project_dir, @input_files) + plots_dir = Path.join(input_files_dir, @plots_folder) + + File.mkdir_p!(plots_dir) + + session = %__MODULE__{ + project_dir: project_dir, + input_files_dir: input_files_dir, + plots_dir: plots_dir, + keys: Keyword.get(opts, :keys, KeyManager.from_env()), + research: Keyword.get(opts, :research, %Research{}) + } + + {:ok, load_existing_content(session)} + end + + @spec set_data_description(t(), String.t()) :: {:ok, t()} + def set_data_description(%__MODULE__{} = session, data_description) do + write_field(session, @description_file, data_description, :data_description) + end + + @spec set_idea(t(), String.t()) :: {:ok, t()} + def set_idea(%__MODULE__{} = session, idea) do + write_field(session, @idea_file, idea, :idea) + end + + @spec set_method(t(), String.t()) :: {:ok, t()} + def set_method(%__MODULE__{} = session, method) do + write_field(session, @method_file, method, :methodology) + end + + @spec set_results(t(), String.t()) :: {:ok, t()} + def set_results(%__MODULE__{} = session, results) do + write_field(session, @results_file, results, :results) + end + + @spec set_literature(t(), String.t()) :: {:ok, t()} + def set_literature(%__MODULE__{} = session, literature) do + write_field(session, @literature_file, literature, :literature) + end + + @spec set_plots(t(), [String.t()] | nil) :: {:ok, t()} + def set_plots(%__MODULE__{} = session, plots \\ nil) do + plot_paths = + case plots do + nil -> Path.wildcard(Path.join(session.plots_dir, "*.png")) + values -> values + end + + copied_paths = + Enum.map(plot_paths, fn plot_path -> + destination = Path.join(session.plots_dir, Path.basename(plot_path)) + File.cp!(plot_path, destination) + destination + end) + + {:ok, %{session | research: %{session.research | plot_paths: copied_paths}}} + end + + @spec show_data_description(t()) :: String.t() + def show_data_description(%__MODULE__{} = session), do: session.research.data_description + + @spec show_idea(t()) :: String.t() + def show_idea(%__MODULE__{} = session), do: session.research.idea + + @spec show_method(t()) :: String.t() + def show_method(%__MODULE__{} = session), do: session.research.methodology + + @spec show_results(t()) :: String.t() + def show_results(%__MODULE__{} = session), do: session.research.results + + @spec show_literature(t()) :: String.t() + def show_literature(%__MODULE__{} = session), do: session.research.literature + + @spec get_idea(t(), keyword()) :: {:ok, t()} | {:error, term()} + def get_idea(%__MODULE__{} = session, opts \\ []) do + case Keyword.get(opts, :mode, "fast") do + mode when mode in ["fast", :fast] -> get_idea_fast(session, opts) + mode when mode in ["cmbagent", :cmbagent] -> get_idea_cmbagent(session, opts) + other -> {:error, {:invalid_mode, other}} + end + end + + @spec get_method(t(), keyword()) :: {:ok, t()} | {:error, term()} + def get_method(%__MODULE__{} = session, opts \\ []) do + case Keyword.get(opts, :mode, "fast") do + mode when mode in ["fast", :fast] -> get_method_fast(session, opts) + mode when mode in ["cmbagent", :cmbagent] -> get_method_cmbagent(session, opts) + other -> {:error, {:invalid_mode, other}} + end + end + + @spec get_idea_fast(t(), [generation_option()]) :: {:ok, t()} | {:error, term()} + def get_idea_fast(%__MODULE__{} = session, opts \\ []) do + iterations = Keyword.get(opts, :iterations, 4) + client = Keyword.get(opts, :client, ReqLLMClient) + + with {:ok, llm} <- LLM.parse(Keyword.get(opts, :llm, "gpt-4.1-mini")), + {:ok, final_idea} <- iterate_idea(session, client, llm, iterations, 0, "", ""), + {:ok, updated} <- set_idea(session, final_idea) do + {:ok, updated} + end + end + + @spec get_idea_cmbagent(t(), keyword()) :: {:ok, t()} | {:error, term()} + def get_idea_cmbagent(%__MODULE__{} = session, opts \\ []) do + client = Keyword.get(opts, :client, ReqLLMClient) + + with :ok <- ensure_present(session.research.data_description, :data_description), + {:ok, idea_maker_llm} <- LLM.parse(Keyword.get(opts, :idea_maker_model, "gpt-4o")), + {:ok, idea_hater_llm} <- LLM.parse(Keyword.get(opts, :idea_hater_model, "o3-mini")), + {:ok, final_llm} <- LLM.parse(Keyword.get(opts, :formatter_model, idea_maker_llm)), + {:ok, run} <- + CMBAgentLoop.run_text_task( + "idea", + %{data_description: session.research.data_description}, + client: client, + keys: session.keys, + planner_model: Keyword.get(opts, :planner_model, "gpt-4o"), + plan_reviewer_model: Keyword.get(opts, :plan_reviewer_model, "o3-mini"), + allowed_agents: ["idea_maker", "idea_hater"], + max_steps: Keyword.get(opts, :max_n_steps, 6), + agent_models: %{ + "idea_maker" => idea_maker_llm, + "idea_hater" => idea_hater_llm + }, + final_model: final_llm + ), + {:ok, updated} <- set_idea(session, run.output) do + {:ok, updated} + end + end + + @spec get_method_fast(t(), [generation_option()]) :: {:ok, t()} | {:error, term()} + def get_method_fast(%__MODULE__{} = session, opts \\ []) do + client = Keyword.get(opts, :client, ReqLLMClient) + + with {:ok, llm} <- LLM.parse(Keyword.get(opts, :llm, "gpt-4.1-mini")), + :ok <- ensure_present(session.research.data_description, :data_description), + :ok <- ensure_present(session.research.idea, :idea), + prompt <- + PromptTemplates.methods_fast_prompt( + session.research.data_description, + session.research.idea + ), + {:ok, raw_text} <- complete(client, prompt, llm, session.keys), + {:ok, methods} <- Text.extract_block(raw_text, "METHODS"), + cleaned <- Text.clean_section(methods, "METHODS"), + {:ok, updated} <- set_method(session, cleaned) do + {:ok, updated} + end + end + + @spec get_method_cmbagent(t(), keyword()) :: {:ok, t()} | {:error, term()} + def get_method_cmbagent(%__MODULE__{} = session, opts \\ []) do + client = Keyword.get(opts, :client, ReqLLMClient) + + with :ok <- ensure_present(session.research.data_description, :data_description), + :ok <- ensure_present(session.research.idea, :idea), + {:ok, method_llm} <- LLM.parse(Keyword.get(opts, :method_generator_model, "gpt-4o")), + {:ok, final_llm} <- LLM.parse(Keyword.get(opts, :formatter_model, method_llm)), + {:ok, run} <- + CMBAgentLoop.run_text_task( + "method", + %{ + data_description: session.research.data_description, + idea: session.research.idea + }, + client: client, + keys: session.keys, + planner_model: Keyword.get(opts, :planner_model, "gpt-4o"), + plan_reviewer_model: Keyword.get(opts, :plan_reviewer_model, "o3-mini"), + allowed_agents: ["researcher"], + max_steps: Keyword.get(opts, :max_n_steps, 4), + agent_models: %{"researcher" => method_llm}, + final_model: final_llm + ), + {:ok, updated} <- set_method(session, run.output) do + {:ok, updated} + end + end + + @spec check_idea(t(), keyword()) :: {:ok, t()} | {:error, term()} + def check_idea(%__MODULE__{} = session, opts \\ []) do + case Keyword.get(opts, :mode, :semantic_scholar) do + mode when mode in [:semantic_scholar, "semantic_scholar"] -> + with :ok <- ensure_present(session.research.data_description, :data_description), + :ok <- ensure_present(session.research.idea, :idea), + {:ok, result} <- LiteratureWorkflow.run(session, opts), + {:ok, updated} <- set_literature(session, result.literature) do + {:ok, + %{ + updated + | research: %{ + updated.research + | literature_sources: result.sources + } + }} + end + + other -> + {:error, {:unsupported_literature_mode, other}} + end + end + + @spec get_results(t(), keyword()) :: {:ok, t()} | {:error, term()} + def get_results(%__MODULE__{} = session, opts \\ []) do + with :ok <- ensure_present(session.research.data_description, :data_description), + :ok <- ensure_present(session.research.idea, :idea), + :ok <- ensure_present(session.research.methodology, :methodology), + {:ok, result} <- ResultsWorkflow.run(session, opts), + {:ok, updated} <- set_results(session, result.results), + {:ok, updated} <- set_plots(updated, result.plot_paths) do + {:ok, updated} + end + end + + @spec get_paper(t(), keyword()) :: {:ok, t()} | {:error, term()} + def get_paper(%__MODULE__{} = session, opts \\ []) do + with :ok <- ensure_present(session.research.idea, :idea), + :ok <- ensure_present(session.research.methodology, :methodology), + :ok <- ensure_present(session.research.results, :results), + {:ok, result} <- PaperWorkflow.run(session, opts) do + {:ok, + %{ + session + | research: %{ + session.research + | keywords: result.keywords, + paper_tex_path: result.tex_path, + paper_pdf_path: result.pdf_path + } + }} + end + end + + defp iterate_idea( + _session, + _client, + _llm, + total_iterations, + current_iteration, + previous_ideas, + _criticism + ) + when current_iteration >= total_iterations do + last_idea = + previous_ideas + |> String.split("Idea:", trim: true) + |> List.last() + |> case do + nil -> "" + value -> String.trim(value) + end + + {:ok, last_idea} + end + + defp iterate_idea( + session, + client, + llm, + total_iterations, + current_iteration, + previous_ideas, + criticism + ) do + prompt = + PromptTemplates.idea_maker_prompt( + session.research.data_description, + previous_ideas, + criticism, + current_iteration + ) + + with {:ok, maker_text} <- complete(client, prompt, llm, session.keys), + {:ok, idea} <- Text.extract_block(maker_text, "IDEA") do + cleaned_idea = Text.clean_section(idea, "IDEA") + + updated_previous_ideas = + previous_ideas <> + "\n\nIteration #{current_iteration}:\nIdea: #{cleaned_idea}\n" + + if current_iteration + 1 >= total_iterations do + {:ok, cleaned_idea} + else + critic_prompt = + PromptTemplates.idea_hater_prompt( + session.research.data_description, + updated_previous_ideas, + cleaned_idea + ) + + with {:ok, critic_text} <- complete(client, critic_prompt, llm, session.keys), + {:ok, criticism} <- Text.extract_block(critic_text, "CRITIC") do + iterate_idea( + session, + client, + llm, + total_iterations, + current_iteration + 1, + updated_previous_ideas, + Text.clean_section(criticism, "CRITIC") + ) + end + end + end + end + + defp complete(client, prompt, %LLM{} = llm, %KeyManager{} = keys) do + AI.complete(client, prompt, llm, keys) + end + + defp ensure_present("", field), do: {:error, {:missing_field, field}} + defp ensure_present(nil, field), do: {:error, {:missing_field, field}} + defp ensure_present(_, _field), do: :ok + + defp write_field(%__MODULE__{} = session, filename, value, field) do + content = read_content!(value) + destination = Path.join(session.input_files_dir, filename) + File.write!(destination, content) + updated_research = Map.put(session.research, field, content) + {:ok, %{session | research: updated_research}} + end + + defp read_content!(value) when is_binary(value) do + if String.ends_with?(value, ".md") and File.regular?(value) do + File.read!(value) + else + value + end + end + + defp load_existing_content(%__MODULE__{} = session) do + research = + session.research + |> maybe_load_field(session.input_files_dir, @description_file, :data_description) + |> maybe_load_field(session.input_files_dir, @idea_file, :idea) + |> maybe_load_field(session.input_files_dir, @method_file, :methodology) + |> maybe_load_field(session.input_files_dir, @results_file, :results) + |> maybe_load_field(session.input_files_dir, @literature_file, :literature) + |> Map.put(:plot_paths, Path.wildcard(Path.join(session.plots_dir, "*.png"))) + |> maybe_set_path( + Path.join(session.project_dir, @paper_folder), + @paper_tex_file, + :paper_tex_path + ) + |> maybe_set_path( + Path.join(session.project_dir, @paper_folder), + @paper_pdf_file, + :paper_pdf_path + ) + + %{session | research: research} + end + + defp maybe_load_field(research, input_files_dir, filename, field) do + path = Path.join(input_files_dir, filename) + + if File.regular?(path) do + Map.put(research, field, File.read!(path)) + else + research + end + end + + defp maybe_set_path(research, folder, filename, field) do + path = Path.join(folder, filename) + + if File.regular?(path) do + Map.put(research, field, path) + else + research + end + end +end diff --git a/elixir/denario_ex/lib/denario_ex/ai.ex b/elixir/denario_ex/lib/denario_ex/ai.ex new file mode 100644 index 00000000..9f41a2cb --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/ai.ex @@ -0,0 +1,33 @@ +defmodule DenarioEx.AI do + @moduledoc false + + alias DenarioEx.{KeyManager, LLM} + + @spec complete(module(), String.t(), LLM.t(), KeyManager.t()) :: + {:ok, String.t()} | {:error, term()} + def complete(client, prompt, %LLM{} = llm, %KeyManager{} = keys) do + messages = [%{role: "user", content: prompt}] + + client.complete(messages, + api_key: KeyManager.api_key_for_provider(keys, llm.provider), + model: llm.spec, + model_metadata: llm.model, + temperature: llm.temperature, + max_output_tokens: llm.max_output_tokens + ) + end + + @spec generate_object(module(), String.t(), map(), LLM.t(), KeyManager.t()) :: + {:ok, map()} | {:error, term()} + def generate_object(client, prompt, schema, %LLM{} = llm, %KeyManager{} = keys) do + messages = [%{role: "user", content: prompt}] + + client.generate_object(messages, schema, + api_key: KeyManager.api_key_for_provider(keys, llm.provider), + model: llm.spec, + model_metadata: llm.model, + temperature: llm.temperature, + max_output_tokens: llm.max_output_tokens + ) + end +end diff --git a/elixir/denario_ex/lib/denario_ex/application.ex b/elixir/denario_ex/lib/denario_ex/application.ex new file mode 100644 index 00000000..2bd14807 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/application.ex @@ -0,0 +1,20 @@ +defmodule DenarioEx.Application do + # See https://hexdocs.pm/elixir/Application.html + # for more information on OTP Applications + @moduledoc false + + use Application + + @impl true + def start(_type, _args) do + children = [ + # Starts a worker by calling: DenarioEx.Worker.start_link(arg) + # {DenarioEx.Worker, arg} + ] + + # See https://hexdocs.pm/elixir/Supervisor.html + # for other strategies and supported options + opts = [strategy: :one_for_one, name: DenarioEx.Supervisor] + Supervisor.start_link(children, opts) + end +end diff --git a/elixir/denario_ex/lib/denario_ex/cmbagent_loop.ex b/elixir/denario_ex/lib/denario_ex/cmbagent_loop.ex new file mode 100644 index 00000000..5e1f7cf1 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/cmbagent_loop.ex @@ -0,0 +1,205 @@ +defmodule DenarioEx.CMBAgentLoop do + @moduledoc """ + Elixir-native replacement for the planning/control pattern used by the Python cmbagent flows. + """ + + alias DenarioEx.{AI, LLM, Text, WorkflowPrompts} + + @review_schema %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "approved" => %{"type" => "boolean"}, + "feedback" => %{"type" => "string"} + }, + "required" => ["approved", "feedback"] + } + + @spec plan_and_review(String.t(), map(), keyword()) :: + {:ok, %{summary: String.t(), steps: [map()], feedback: String.t()}} | {:error, term()} + def plan_and_review(task, context, opts) do + client = Keyword.fetch!(opts, :client) + keys = Keyword.fetch!(opts, :keys) + allowed_agents = Keyword.fetch!(opts, :allowed_agents) + max_steps = Keyword.get(opts, :max_steps, 6) + max_reviews = Keyword.get(opts, :max_reviews, 1) + + with {:ok, planner_llm} <- LLM.parse(Keyword.fetch!(opts, :planner_model)), + {:ok, reviewer_llm} <- LLM.parse(Keyword.fetch!(opts, :plan_reviewer_model)) do + do_plan_and_review( + task, + context, + client, + keys, + planner_llm, + reviewer_llm, + allowed_agents, + max_steps, + max_reviews, + nil + ) + end + end + + @spec run_text_task(String.t(), map(), keyword()) :: + {:ok, %{output: String.t(), plan: map(), step_outputs: [map()]}} | {:error, term()} + def run_text_task(task, context, opts) do + client = Keyword.fetch!(opts, :client) + keys = Keyword.fetch!(opts, :keys) + agent_models = Keyword.fetch!(opts, :agent_models) + final_model = Keyword.fetch!(opts, :final_model) + + with {:ok, plan} <- plan_and_review(task, context, opts), + {:ok, step_outputs} <- + run_text_steps(task, context, plan.steps, client, keys, agent_models), + final_prompt <- WorkflowPrompts.cmbagent_final_prompt(task, context, step_outputs), + {:ok, final_text} <- AI.complete(client, final_prompt, final_model, keys), + {:ok, output} <- extract_final_output(task, final_text) do + {:ok, %{output: output, plan: plan, step_outputs: step_outputs}} + end + end + + defp do_plan_and_review( + task, + context, + client, + keys, + planner_llm, + reviewer_llm, + allowed_agents, + max_steps, + remaining_reviews, + feedback + ) do + plan_prompt = + WorkflowPrompts.cmbagent_plan_prompt(task, context, allowed_agents, max_steps, feedback) + + with {:ok, plan_object} <- + AI.generate_object(client, plan_prompt, plan_schema(allowed_agents), planner_llm, keys), + {:ok, plan} <- normalize_plan(plan_object, allowed_agents, max_steps), + review_prompt <- WorkflowPrompts.cmbagent_plan_review_prompt(task, context, plan), + {:ok, review_object} <- + AI.generate_object(client, review_prompt, @review_schema, reviewer_llm, keys) do + if approved?(review_object) or remaining_reviews <= 0 do + {:ok, + %{summary: plan.summary, steps: plan.steps, feedback: review_feedback(review_object)}} + else + do_plan_and_review( + task, + context, + client, + keys, + planner_llm, + reviewer_llm, + allowed_agents, + max_steps, + remaining_reviews - 1, + review_feedback(review_object) + ) + end + end + end + + defp run_text_steps(task, context, steps, client, keys, agent_models) do + Enum.reduce_while(steps, {:ok, []}, fn step, {:ok, outputs} -> + agent = Text.fetch(step, "agent") + + with {:ok, llm} <- fetch_agent_model(agent_models, agent), + prompt <- WorkflowPrompts.cmbagent_step_prompt(task, agent, step, context, outputs), + {:ok, step_text} <- AI.complete(client, prompt, llm, keys), + {:ok, output} <- Text.extract_block(step_text, "STEP_OUTPUT") do + step_output = %{ + id: Text.fetch(step, "id"), + agent: agent, + goal: Text.fetch(step, "goal"), + output: Text.clean_section(output, "STEP_OUTPUT") + } + + {:cont, {:ok, outputs ++ [step_output]}} + else + {:error, reason} -> {:halt, {:error, reason}} + end + end) + end + + defp extract_final_output("idea", text) do + with {:ok, block} <- Text.extract_block(text, "IDEA") do + {:ok, Text.clean_section(block, "IDEA")} + end + end + + defp extract_final_output("method", text) do + with {:ok, block} <- Text.extract_block(text, "METHODS") do + {:ok, Text.clean_section(block, "METHODS")} + end + end + + defp fetch_agent_model(agent_models, agent) do + case Map.fetch(agent_models, agent) do + {:ok, %LLM{} = llm} -> {:ok, llm} + :error -> {:error, {:missing_agent_model, agent}} + end + end + + defp plan_schema(allowed_agents) do + %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "summary" => %{"type" => "string"}, + "steps" => %{ + "type" => "array", + "items" => %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "id" => %{"type" => "string"}, + "agent" => %{"type" => "string", "enum" => allowed_agents}, + "goal" => %{"type" => "string"}, + "deliverable" => %{"type" => "string"}, + "needs_code" => %{"type" => "boolean"} + }, + "required" => ["id", "agent", "goal", "deliverable", "needs_code"] + } + } + }, + "required" => ["summary", "steps"] + } + end + + defp normalize_plan(plan_object, allowed_agents, max_steps) do + steps = + plan_object + |> Text.fetch("steps") + |> List.wrap() + |> Enum.take(max_steps) + |> Enum.map(fn step -> + %{ + "id" => Text.fetch(step, "id"), + "agent" => Text.fetch(step, "agent"), + "goal" => Text.fetch(step, "goal"), + "deliverable" => Text.fetch(step, "deliverable"), + "needs_code" => truthy?(Text.fetch(step, "needs_code")) + } + end) + + valid? = + steps != [] and + Enum.all?(steps, fn step -> + step["id"] not in [nil, ""] and step["goal"] not in [nil, ""] and + step["deliverable"] not in [nil, ""] and step["agent"] in allowed_agents + end) + + if valid? do + {:ok, %{summary: Text.fetch(plan_object, "summary") || "", steps: steps}} + else + {:error, {:invalid_plan, plan_object}} + end + end + + defp approved?(review_object), do: truthy?(Text.fetch(review_object, "approved")) + defp review_feedback(review_object), do: Text.fetch(review_object, "feedback") || "" + + defp truthy?(value) when value in [true, "true", "TRUE", 1], do: true + defp truthy?(_value), do: false +end diff --git a/elixir/denario_ex/lib/denario_ex/code_executor.ex b/elixir/denario_ex/lib/denario_ex/code_executor.ex new file mode 100644 index 00000000..b49abd5e --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/code_executor.ex @@ -0,0 +1,7 @@ +defmodule DenarioEx.CodeExecutor do + @moduledoc """ + Behaviour for executing generated analysis code. + """ + + @callback execute(String.t(), keyword()) :: {:ok, map()} | {:error, map()} +end diff --git a/elixir/denario_ex/lib/denario_ex/key_manager.ex b/elixir/denario_ex/lib/denario_ex/key_manager.ex new file mode 100644 index 00000000..ce2703ff --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/key_manager.ex @@ -0,0 +1,54 @@ +defmodule DenarioEx.KeyManager do + @moduledoc """ + Loads provider credentials from the environment. + """ + + @enforce_keys [] + defstruct anthropic: nil, + gemini: nil, + openai: nil, + perplexity: nil, + semantic_scholar: nil + + @type t :: %__MODULE__{ + anthropic: String.t() | nil, + gemini: String.t() | nil, + openai: String.t() | nil, + perplexity: String.t() | nil, + semantic_scholar: String.t() | nil + } + + @spec from_env() :: t() + def from_env do + %__MODULE__{ + openai: first_env(["OPENAI_API_KEY"]), + gemini: first_env(["GOOGLE_API_KEY", "GEMINI_API_KEY"]), + anthropic: first_env(["ANTHROPIC_API_KEY"]), + perplexity: first_env(["PERPLEXITY_API_KEY"]), + semantic_scholar: + first_env(["SEMANTIC_SCHOLAR_KEY", "SEMANTIC_SCHOLAR_API_KEY", "S2_API_KEY"]) + } + end + + @spec api_key_for_provider(t(), atom()) :: String.t() | nil + def api_key_for_provider(%__MODULE__{} = keys, provider) do + case provider do + :openai -> keys.openai + :anthropic -> keys.anthropic + :google -> keys.gemini + :google_vertex -> keys.gemini + :gemini -> keys.gemini + :perplexity -> keys.perplexity + _ -> nil + end + end + + defp first_env(names) do + Enum.find_value(names, fn name -> + case System.get_env(name) do + value when is_binary(value) and value != "" -> value + _ -> nil + end + end) + end +end diff --git a/elixir/denario_ex/lib/denario_ex/literature_workflow.ex b/elixir/denario_ex/lib/denario_ex/literature_workflow.ex new file mode 100644 index 00000000..1f91ee91 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/literature_workflow.ex @@ -0,0 +1,467 @@ +defmodule DenarioEx.LiteratureWorkflow do + @moduledoc false + + alias DenarioEx.{AI, LLM, OpenAlex, ReqLLMClient, SemanticScholar, Text, WorkflowPrompts} + + @decision_schema %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "reason" => %{"type" => "string"}, + "decision" => %{"type" => "string", "enum" => ["novel", "not novel", "query"]}, + "query" => %{"type" => "string"} + }, + "required" => ["reason", "decision", "query"] + } + + @selection_schema %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "selected_paper_ids" => %{"type" => "array", "items" => %{"type" => "string"}}, + "rationale" => %{"type" => "string"} + }, + "required" => ["selected_paper_ids", "rationale"] + } + + @spec run(DenarioEx.t(), keyword()) :: {:ok, map()} | {:error, term()} + def run(session, opts \\ []) do + client = Keyword.get(opts, :client, ReqLLMClient) + semantic_scholar_client = Keyword.get(opts, :semantic_scholar_client, SemanticScholar) + fallback_literature_client = Keyword.get(opts, :fallback_literature_client, OpenAlex) + max_iterations = Keyword.get(opts, :max_iterations, 7) + literature_dir = Path.join(session.project_dir, "literature_output") + literature_log = Path.join(literature_dir, "literature.log") + + context = %{ + data_description: session.research.data_description, + idea: session.research.idea + } + + with {:ok, llm} <- LLM.parse(Keyword.get(opts, :llm, "gemini-2.5-flash")), + {:ok, state} <- + iterate( + 0, + max_iterations, + %{messages: "", papers_text: "", decision: "query", sources: []}, + context, + client, + semantic_scholar_client, + fallback_literature_client, + llm, + session.keys, + literature_log + ), + summary_prompt <- + WorkflowPrompts.literature_summary_prompt(context, state.decision, state.messages), + {:ok, summary_text} <- AI.complete(client, summary_prompt, llm, session.keys), + {:ok, block} <- Text.extract_block_or_fallback(summary_text, "SUMMARY") do + File.mkdir_p!(literature_dir) + + {:ok, + %{ + literature: "Idea #{state.decision}\n\n" <> block, + sources: state.sources, + decision: state.decision, + log_path: literature_log + }} + end + end + + defp iterate( + iteration, + max_iterations, + state, + context, + client, + semantic_scholar_client, + fallback_literature_client, + llm, + keys, + literature_log + ) do + prompt = + WorkflowPrompts.literature_decision_prompt( + context, + iteration, + max_iterations, + state.messages, + state.papers_text + ) + + with {:ok, object} <- AI.generate_object(client, prompt, @decision_schema, llm, keys), + decision <- normalize_decision(Text.fetch(object, "decision")), + reason <- Text.fetch(object, "reason") || "", + query <- Text.fetch(object, "query") || "" do + messages = + state.messages <> + "\nRound #{iteration}\nDecision: #{decision}\nReason: #{reason}\nQuery: #{query}\n" + + cond do + decision in ["novel", "not novel"] -> + {:ok, %{state | messages: messages, decision: decision}} + + iteration + 1 >= max_iterations -> + {:ok, %{state | messages: messages, decision: "novel"}} + + true -> + case semantic_scholar_client.search(query, keys, limit: 20) do + {:ok, result} -> + {papers_text, new_sources} = + normalize_papers(result, context, query, client, llm, keys) + + File.mkdir_p!(Path.dirname(literature_log)) + File.write!(literature_log, papers_text, [:append]) + + iterate( + iteration + 1, + max_iterations, + %{ + state + | messages: messages, + papers_text: papers_text, + sources: merge_sources(state.sources, new_sources), + decision: "query" + }, + context, + client, + semantic_scholar_client, + fallback_literature_client, + llm, + keys, + literature_log + ) + + {:error, error} -> + handle_primary_search_error( + error, + query, + state, + messages, + context, + iteration, + max_iterations, + client, + semantic_scholar_client, + fallback_literature_client, + llm, + keys, + literature_log + ) + end + end + end + end + + defp normalize_papers(result, context, query, client, llm, keys) do + papers = Text.fetch(result, "data") || [] + + normalized = + Enum.filter(papers, fn paper -> + abstract = Text.fetch(paper, "abstract") + is_binary(abstract) and abstract != "" + end) + + {selected_papers, selection_note} = + select_relevant_papers(normalized, context, query, client, llm, keys) + + papers_text = + if selected_papers == [] do + selection_note <> + "\nNo directly relevant papers were selected from the retrieved candidates.\n" + else + selection_note <> "\n" <> render_papers_text(selected_papers) + end + + {papers_text, selected_papers} + end + + defp merge_sources(existing, incoming) do + {_, merged} = + Enum.reduce(existing ++ incoming, {MapSet.new(), []}, fn source, {seen, acc} -> + key = + Text.fetch(source, "paperId") || + Text.slugify(Text.fetch(source, "title") || "paper") + + if MapSet.member?(seen, key) do + {seen, acc} + else + {MapSet.put(seen, key), acc ++ [source]} + end + end) + + merged + end + + defp normalize_decision(decision) when is_binary(decision) do + decision + |> String.downcase() + |> String.trim() + end + + defp normalize_decision(_decision), do: "query" + + defp select_relevant_papers([], _context, _query, _client, _llm, _keys), do: {[], ""} + + defp select_relevant_papers(papers, context, query, client, llm, keys) do + ranked = rank_papers(papers, context, query) + candidates = Enum.take(ranked, 10) + focus = focus_terms(context, query) + + prompt = + WorkflowPrompts.literature_selection_prompt( + context, + query, + render_papers_text(candidates, include_ids?: true, include_scores?: true) + ) + + case AI.generate_object(client, prompt, @selection_schema, llm, keys) do + {:ok, selection} -> + selected_ids = normalize_string_list(Text.fetch(selection, "selected_paper_ids")) + rationale = Text.fetch(selection, "rationale") || "" + selected = filter_selected_papers(candidates, selected_ids, focus) + {selected, "Selection rationale: #{rationale}\n"} + + {:error, _error} -> + fallback = + candidates + |> Enum.take(5) + |> Enum.filter(&paper_matches_focus?(&1, focus)) + + {fallback, "Selection rationale: heuristic ranking fallback.\n"} + end + end + + defp rank_papers(papers, context, query) do + query_terms = + tokenize( + "#{Map.get(context, :data_description, "")} #{Map.get(context, :idea, "")} #{query}" + ) + |> MapSet.new() + + Enum.sort_by( + papers, + fn paper -> + title = Text.fetch(paper, "title") || "" + abstract = Text.fetch(paper, "abstract") || "" + title_terms = tokenize(title) |> MapSet.new() + abstract_terms = tokenize(abstract) |> MapSet.new() + title_overlap = MapSet.intersection(query_terms, title_terms) |> MapSet.size() + abstract_overlap = MapSet.intersection(query_terms, abstract_terms) |> MapSet.size() + year = safe_integer(Text.fetch(paper, "year")) + citation_count = safe_integer(Text.fetch(paper, "citationCount")) + relevance_score = safe_float(Text.fetch(paper, "relevanceScore")) + + generic_penalty = + if generic_paper?(paper) and title_overlap < 2 do + 4.0 + else + 0.0 + end + + title_overlap * 3.0 + abstract_overlap + year_bonus(year) + + :math.log10(citation_count + 1) + relevance_score - generic_penalty + end, + :desc + ) + end + + defp render_papers_text(papers, opts \\ []) do + include_ids? = Keyword.get(opts, :include_ids?, false) + include_scores? = Keyword.get(opts, :include_scores?, false) + + Enum.map_join(papers, "\n\n", fn paper -> + authors = + paper + |> Text.fetch("authors") + |> List.wrap() + |> Enum.map_join(", ", fn author -> Text.fetch(author, "name") || "Unknown" end) + + id_line = + if include_ids? do + "Paper ID: #{Text.fetch(paper, "paperId")}\n" + else + "" + end + + score_line = + if include_scores? do + "Citation count: #{safe_integer(Text.fetch(paper, "citationCount"))}\n" + else + "" + end + + """ + #{id_line}Title: #{Text.fetch(paper, "title")} + Year: #{Text.fetch(paper, "year")} + #{score_line}Authors: #{authors} + Abstract: #{Text.fetch(paper, "abstract")} + URL: #{Text.fetch(paper, "url")} + """ + end) <> "\n" + end + + defp filter_selected_papers(papers, selected_ids, focus_terms) do + Enum.filter(papers, fn paper -> + id = Text.fetch(paper, "paperId") + id in selected_ids and paper_matches_focus?(paper, focus_terms) + end) + end + + defp normalize_string_list(values) when is_list(values) do + values + |> Enum.filter(&is_binary/1) + |> Enum.map(&String.trim/1) + |> Enum.reject(&(&1 == "")) + end + + defp normalize_string_list(_), do: [] + + defp tokenize(text) do + stopwords = + MapSet.new(~w( + a an and are as at be by for from in into is it of on or that the this to using with over within + data analysis workflow workflows using python plotting visualization visualisation tutorial tutorials + pipeline pipelines study studies method methods system systems approach approaches result results + generate generated small minimal hands handson journey building external required + experiment experiments tiny list directly print save keep self contained concise propose paper + simple scientific mean png + )) + + text + |> String.downcase() + |> String.replace(~r/[^a-z0-9\s]+/u, " ") + |> String.split(~r/\s+/, trim: true) + |> Enum.reject(&(String.length(&1) < 3 or MapSet.member?(stopwords, &1))) + end + + defp generic_paper?(paper) do + haystack = + "#{Text.fetch(paper, "title") || ""} #{Text.fetch(paper, "abstract") || ""}" + |> String.downcase() + + Enum.any?(["survey", "review", "overview", "tutorial"], &String.contains?(haystack, &1)) + end + + defp focus_terms(context, query) do + "#{Map.get(context, :idea, "")} #{query}" + |> tokenize() + |> MapSet.new() + end + + defp paper_matches_focus?(_paper, focus_terms) when map_size(focus_terms) == 0, do: true + + defp paper_matches_focus?(paper, focus_terms) do + paper_terms = + "#{Text.fetch(paper, "title") || ""} #{Text.fetch(paper, "abstract") || ""}" + |> tokenize() + |> MapSet.new() + + overlap = MapSet.intersection(focus_terms, paper_terms) |> MapSet.size() + overlap >= min(2, map_size(focus_terms)) + end + + defp year_bonus(year) when year >= 2020, do: 2.0 + defp year_bonus(year) when year >= 2015, do: 1.0 + defp year_bonus(_year), do: 0.0 + + defp safe_integer(value) when is_integer(value), do: value + defp safe_integer(value) when is_float(value), do: round(value) + defp safe_integer(_value), do: 0 + + defp safe_float(value) when is_float(value), do: value + defp safe_float(value) when is_integer(value), do: value * 1.0 + defp safe_float(_value), do: 0.0 + + defp handle_primary_search_error( + error, + query, + state, + messages, + context, + iteration, + max_iterations, + client, + semantic_scholar_client, + fallback_literature_client, + llm, + keys, + literature_log + ) do + primary_failure_note = search_failure_note(error) + + case fallback_literature_client.search(query, keys, limit: 20) do + {:ok, result} -> + {papers_text, new_sources} = + normalize_papers(result, context, query, client, llm, keys) + + File.mkdir_p!(Path.dirname(literature_log)) + + File.write!( + literature_log, + primary_failure_note <> "\nFalling back to OpenAlex.\n" <> papers_text, + [:append] + ) + + iterate( + iteration + 1, + max_iterations, + %{ + state + | messages: + messages <> + "Search status: #{primary_failure_note} Falling back to OpenAlex.\n", + papers_text: papers_text, + sources: merge_sources(state.sources, new_sources), + decision: "query" + }, + context, + client, + semantic_scholar_client, + fallback_literature_client, + llm, + keys, + literature_log + ) + + {:error, fallback_error} -> + failure_note = + primary_failure_note <> + " OpenAlex fallback also failed: #{search_failure_note(fallback_error)}" + + File.mkdir_p!(Path.dirname(literature_log)) + File.write!(literature_log, failure_note <> "\n", [:append]) + + {:ok, + %{ + state + | messages: messages <> "Search status: #{failure_note}\n", + decision: "literature search unavailable" + }} + end + end + + defp search_failure_note({:semantic_scholar_http_error, 429, _body}) do + "Semantic Scholar rate-limited the request (HTTP 429). The literature check could not be completed from the public API." + end + + defp search_failure_note({:semantic_scholar_http_error, status, _body}) do + "Semantic Scholar returned HTTP #{status}. The literature check could not be completed." + end + + defp search_failure_note({:semantic_scholar_request_error, message}) do + "Semantic Scholar request failed: #{message}" + end + + defp search_failure_note({:openalex_http_error, status, _body}) do + "OpenAlex returned HTTP #{status}." + end + + defp search_failure_note({:openalex_request_error, message}) do + "OpenAlex request failed: #{message}" + end + + defp search_failure_note(error) do + "Semantic Scholar request failed: #{inspect(error)}" + end +end diff --git a/elixir/denario_ex/lib/denario_ex/llm.ex b/elixir/denario_ex/lib/denario_ex/llm.ex new file mode 100644 index 00000000..afb006c3 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/llm.ex @@ -0,0 +1,100 @@ +defmodule DenarioEx.LLM do + @moduledoc """ + Normalizes Denario model inputs through LLMDB/ReqLLM. + """ + + @default_max_output_tokens 16_384 + + @default_specs %{ + "gemini-2.0-flash" => {"google:gemini-2.0-flash", 0.7}, + "gemini-2.5-flash" => {"google:gemini-2.5-flash", 0.7}, + "gemini-2.5-pro" => {"google:gemini-2.5-pro", 0.7}, + "o3-mini" => {"openai:o3-mini", nil}, + "gpt-4o" => {"openai:gpt-4o", 0.5}, + "gpt-4.1" => {"openai:gpt-4.1", 0.5}, + "gpt-4.1-mini" => {"openai:gpt-4.1-mini", 0.5}, + "gpt-4o-mini" => {"openai:gpt-4o-mini", 0.5}, + "gpt-4.5" => {"openai:gpt-4.5-preview", 0.5}, + "gpt-5" => {"openai:gpt-5", nil}, + "gpt-5-mini" => {"openai:gpt-5-mini", nil} + } + + @enforce_keys [:spec, :model, :provider, :max_output_tokens] + defstruct [:spec, :model, :provider, :max_output_tokens, :temperature] + + @type t :: %__MODULE__{ + spec: String.t(), + model: LLMDB.Model.t(), + provider: atom(), + max_output_tokens: pos_integer(), + temperature: float() | nil + } + + @spec parse(String.t() | t()) :: {:ok, t()} + def parse(%__MODULE__{} = llm), do: {:ok, llm} + + def parse(name) when is_binary(name) do + {spec, temperature} = normalize_spec(name) + + with {:ok, model} <- resolve_model(spec) do + {:ok, + %__MODULE__{ + spec: spec, + model: model, + provider: model.provider, + max_output_tokens: max_output_tokens(model), + temperature: temperature + }} + end + end + + defp normalize_spec(name) do + case Map.get(@default_specs, name) do + {spec, temperature} -> + {spec, temperature} + + nil -> + spec = + if String.contains?(name, [":", "@"]) do + name + else + "openai:#{name}" + end + + {provider, model_id} = LLMDB.parse!(spec) + {LLMDB.format({provider, model_id}), default_temperature(provider, model_id)} + end + end + + defp resolve_model(spec) do + case LLMDB.model(spec) do + {:ok, model} -> + {:ok, model} + + {:error, _reason} -> + {provider, model_id} = LLMDB.parse!(spec) + ReqLLM.model(%{provider: provider, id: model_id}) + end + end + + defp max_output_tokens(%LLMDB.Model{limits: limits}) do + case limits do + %{output: output} when is_integer(output) and output > 0 -> output + %{"output" => output} when is_integer(output) and output > 0 -> output + _ -> @default_max_output_tokens + end + end + + defp default_temperature(provider, _model_id) when provider in [:google, :google_vertex], + do: 0.7 + + defp default_temperature(:openai, model_id) do + if reasoning_model?(model_id), do: nil, else: 0.5 + end + + defp default_temperature(_provider, _model_id), do: 0.5 + + defp reasoning_model?(model_id) do + String.starts_with?(model_id, ["gpt-5", "o1", "o3", "o4"]) + end +end diff --git a/elixir/denario_ex/lib/denario_ex/llm_client.ex b/elixir/denario_ex/lib/denario_ex/llm_client.ex new file mode 100644 index 00000000..63c40b14 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/llm_client.ex @@ -0,0 +1,8 @@ +defmodule DenarioEx.LLMClient do + @moduledoc """ + Behaviour for pluggable LLM clients. + """ + + @callback complete([map()], keyword()) :: {:ok, String.t()} | {:error, term()} + @callback generate_object([map()], map(), keyword()) :: {:ok, map()} | {:error, term()} +end diff --git a/elixir/denario_ex/lib/denario_ex/open_alex.ex b/elixir/denario_ex/lib/denario_ex/open_alex.ex new file mode 100644 index 00000000..9d6bf26b --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/open_alex.ex @@ -0,0 +1,121 @@ +defmodule DenarioEx.OpenAlex do + @moduledoc """ + OpenAlex search adapter used as a public fallback when Semantic Scholar is + unavailable or rate-limited. + """ + + @behaviour DenarioEx.SemanticScholarClient + + @base_url "https://api.openalex.org/works" + @select_fields [ + "id", + "title", + "doi", + "publication_year", + "cited_by_count", + "relevance_score", + "type", + "authorships", + "primary_location", + "ids", + "open_access", + "abstract_inverted_index" + ] + + @impl true + def search(query, _keys, opts \\ []) do + limit = Keyword.get(opts, :limit, 20) + + params = [ + {"search", query}, + {"per-page", Integer.to_string(limit)}, + {"select", Enum.join(@select_fields, ",")}, + {"filter", "has_abstract:true,from_publication_date:2010-01-01"} + ] + + request = + Req.new( + url: @base_url, + connect_options: [timeout: 15_000], + receive_timeout: 30_000 + ) + + case Req.get(request, params: params) do + {:ok, %{status: 200, body: %{"results" => results}}} when is_list(results) -> + {:ok, %{"data" => Enum.map(results, &normalize_work/1), "source" => "openalex"}} + + {:ok, %{status: status, body: body}} -> + {:error, {:openalex_http_error, status, body}} + + {:error, error} -> + {:error, {:openalex_request_error, Exception.message(error)}} + end + end + + defp normalize_work(work) do + %{ + "paperId" => openalex_id(work), + "title" => Map.get(work, "title"), + "year" => Map.get(work, "publication_year"), + "citationCount" => Map.get(work, "cited_by_count"), + "relevanceScore" => Map.get(work, "relevance_score"), + "publicationType" => Map.get(work, "type"), + "abstract" => reconstruct_abstract(Map.get(work, "abstract_inverted_index")), + "url" => landing_page_url(work), + "authors" => normalize_authors(Map.get(work, "authorships", [])), + "externalIds" => normalize_external_ids(work), + "openAccessPdf" => %{"url" => pdf_url(work)}, + "retrievalSource" => "OpenAlex" + } + end + + defp openalex_id(work) do + work + |> Map.get("id", "") + |> String.replace_prefix("https://openalex.org/", "") + end + + defp landing_page_url(work) do + get_in(work, ["primary_location", "landing_page_url"]) || + get_in(work, ["ids", "doi"]) || + Map.get(work, "id") + end + + defp pdf_url(work) do + get_in(work, ["primary_location", "pdf_url"]) || + get_in(work, ["open_access", "oa_url"]) + end + + defp normalize_authors(authorships) when is_list(authorships) do + Enum.map(authorships, fn authorship -> + %{"name" => get_in(authorship, ["author", "display_name"]) || "Unknown"} + end) + end + + defp normalize_authors(_), do: [] + + defp normalize_external_ids(work) do + ids = Map.get(work, "ids", %{}) + + ids + |> Enum.reduce(%{}, fn + {"doi", value}, acc when is_binary(value) -> Map.put(acc, "DOI", value) + {"pmid", value}, acc when is_binary(value) -> Map.put(acc, "PubMed", value) + {"pmcid", value}, acc when is_binary(value) -> Map.put(acc, "PMCID", value) + {_key, _value}, acc -> acc + end) + end + + defp reconstruct_abstract(nil), do: nil + + defp reconstruct_abstract(index) when is_map(index) do + index + |> Enum.flat_map(fn {word, positions} -> + Enum.map(positions, fn position -> {position, word} end) + end) + |> Enum.sort_by(fn {position, _word} -> position end) + |> Enum.map_join(" ", fn {_position, word} -> word end) + end + + defp reconstruct_abstract(_), do: nil +end diff --git a/elixir/denario_ex/lib/denario_ex/paper_workflow.ex b/elixir/denario_ex/lib/denario_ex/paper_workflow.ex new file mode 100644 index 00000000..5b1983a9 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/paper_workflow.ex @@ -0,0 +1,514 @@ +defmodule DenarioEx.PaperWorkflow do + @moduledoc false + + alias DenarioEx.{AI, LLM, ReqLLMClient, Text, WorkflowPrompts} + + @asset_dir Path.expand("../../../../denario/paper_agents/LaTeX", __DIR__) + + @abstract_schema %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "title" => %{"type" => "string"}, + "abstract" => %{"type" => "string"} + }, + "required" => ["title", "abstract"] + } + + @spec run(DenarioEx.t(), keyword()) :: {:ok, map()} | {:error, term()} + def run(session, opts \\ []) do + client = Keyword.get(opts, :client, ReqLLMClient) + writer = Keyword.get(opts, :writer, "scientist") + journal = normalize_journal(Keyword.get(opts, :journal, :none)) + add_citations = Keyword.get(opts, :add_citations, true) + compile? = Keyword.get(opts, :compile, true) + paper_dir = Path.join(session.project_dir, "paper") + tex_name = "paper_v4_final.tex" + tex_path = Path.join(paper_dir, tex_name) + pdf_path = Path.join(paper_dir, "paper_v4_final.pdf") + preset = journal_preset(journal) + plot_paths = available_plot_paths(session) + + with {:ok, llm} <- LLM.parse(Keyword.get(opts, :llm, "gemini-2.5-flash")), + :ok <- File.mkdir_p(paper_dir), + :ok <- copy_assets(preset.files, paper_dir), + citation_context = citation_context(session.research.literature_sources), + keywords <- existing_keywords(session.research.keywords), + keywords <- + maybe_generate_keywords(keywords, client, llm, session.keys, writer, session.research), + {:ok, abstract_object} <- + AI.generate_object( + client, + WorkflowPrompts.paper_abstract_prompt( + writer, + paper_context(session.research), + citation_context + ), + @abstract_schema, + llm, + session.keys + ), + title = Text.fetch(abstract_object, "title") || "Untitled Paper", + abstract = Text.fetch(abstract_object, "abstract") || "", + {:ok, introduction} <- + generate_section( + client, + llm, + session.keys, + "Introduction", + writer, + section_context(session.research, title, abstract), + citation_context + ), + {:ok, methods} <- + generate_section( + client, + llm, + session.keys, + "Methods", + writer, + section_context(session.research, title, abstract), + citation_context + ), + {:ok, results} <- + generate_section( + client, + llm, + session.keys, + "Results", + writer, + section_context(session.research, title, abstract), + citation_context + ), + {:ok, results} <- + maybe_add_figures( + plot_paths, + client, + llm, + session.keys, + writer, + section_context(session.research, title, abstract) + |> Map.put(:paper_results, results) + ), + {:ok, conclusions} <- + generate_section( + client, + llm, + session.keys, + "Conclusions", + writer, + section_context(%{session.research | results: results}, title, abstract), + citation_context + ), + :ok <- + maybe_write_bibliography(paper_dir, add_citations, session.research.literature_sources), + tex <- + render_latex( + preset, + title, + abstract, + keywords, + introduction, + methods, + results, + conclusions, + add_citations + ), + :ok <- File.write(tex_path, tex), + {:ok, compiled_pdf_path} <- + maybe_compile(compile?, tex_name, pdf_path, paper_dir, add_citations) do + {:ok, %{tex_path: tex_path, pdf_path: compiled_pdf_path, keywords: keywords}} + end + end + + defp paper_context(research) do + %{ + idea: research.idea, + methodology: research.methodology, + results: research.results + } + end + + defp section_context(research, title, abstract) do + %{ + title: title, + abstract: abstract, + idea: research.idea, + methodology: research.methodology, + results: research.results + } + end + + defp existing_keywords(keywords) when is_list(keywords), do: Enum.join(keywords, ", ") + + defp existing_keywords(keywords) when is_map(keywords), + do: Map.keys(keywords) |> Enum.join(", ") + + defp existing_keywords(_keywords), do: "" + + defp maybe_generate_keywords("", client, llm, keys, writer, research) do + prompt = WorkflowPrompts.paper_keywords_prompt(writer, paper_context(research)) + + with {:ok, response} <- AI.complete(client, prompt, llm, keys), + {:ok, block} <- Text.extract_block_or_fallback(response, "KEYWORDS") do + block + else + _ -> "" + end + end + + defp maybe_generate_keywords(keywords, _client, _llm, _keys, _writer, _research), do: keywords + + defp available_plot_paths(session) do + if session.research.plot_paths == [] do + Path.wildcard(Path.join(session.project_dir, "input_files/plots/*.png")) + else + session.research.plot_paths + end + end + + defp generate_section(client, llm, keys, section, writer, context, citation_context) do + prompt = WorkflowPrompts.paper_section_prompt(section, writer, context, citation_context) + block_name = String.upcase(section) + + with {:ok, response} <- AI.complete(client, prompt, llm, keys), + {:ok, block} <- Text.extract_block_or_fallback(response, block_name) do + {:ok, block} + end + end + + defp maybe_add_figures([], _client, _llm, _keys, _writer, context), + do: {:ok, Map.get(context, :paper_results, "")} + + defp maybe_add_figures(plot_paths, client, llm, keys, writer, context) do + figure_specs = + Enum.map_join(plot_paths, "\n\n", fn plot_path -> + plot_name = Path.basename(plot_path) + label = "fig:#{Text.slugify(Path.rootname(plot_name))}" + + prompt = WorkflowPrompts.paper_figure_caption_prompt(writer, context, plot_name) + + caption = + case AI.complete(client, prompt, llm, keys) do + {:ok, response} -> + case Text.extract_block_or_fallback(response, "CAPTION") do + {:ok, block} -> block + _ -> "Figure generated during the analysis." + end + + _ -> + "Figure generated during the analysis." + end + + """ + File: #{plot_name} + Label: #{label} + Caption: #{caption} + """ + end) + + prompt = WorkflowPrompts.paper_refine_results_prompt(writer, context, figure_specs) + + with {:ok, response} <- AI.complete(client, prompt, llm, keys), + {:ok, block} <- Text.extract_block_or_fallback(response, "RESULTS") do + {:ok, normalize_figure_paths(block, plot_paths)} + end + end + + defp maybe_write_bibliography(_paper_dir, false, _sources), do: :ok + + defp maybe_write_bibliography(paper_dir, true, sources) do + bib_path = Path.join(paper_dir, "bibliography.bib") + entries = Enum.map_join(sources, "\n\n", &bib_entry/1) + File.write!(bib_path, entries) + :ok + end + + defp bib_entry(source) do + id = Text.fetch(source, "paperId") || Text.slugify(Text.fetch(source, "title") || "paper") + + authors = + source + |> Text.fetch("authors") + |> List.wrap() + |> Enum.map_join(" and ", fn author -> + escape_bib(Text.fetch(author, "name") || "Unknown") + end) + + title = escape_bib(Text.fetch(source, "title") || "Untitled") + year = Text.fetch(source, "year") || "2025" + url = escape_bib(Text.fetch(source, "url") || "") + + """ + @article{#{id}, + title = {#{title}}, + author = {#{authors}}, + year = {#{year}}, + url = {#{url}} + } + """ + end + + defp citation_context([]), do: "" + + defp citation_context(sources) do + Enum.map_join(sources, "\n", fn source -> + id = Text.fetch(source, "paperId") || Text.slugify(Text.fetch(source, "title") || "paper") + + authors = + source + |> Text.fetch("authors") + |> List.wrap() + |> Enum.map_join(", ", fn author -> Text.fetch(author, "name") || "Unknown" end) + + "#{id}: #{Text.fetch(source, "title")} (#{Text.fetch(source, "year")}) by #{authors}" + end) + end + + defp render_latex( + preset, + title, + abstract, + keywords, + introduction, + methods, + results, + conclusions, + add_citations + ) do + bibliography_block = + if add_citations do + "\\bibliography{bibliography}\n#{preset.bibliography_style}" + else + "" + end + + """ + \\documentclass[#{preset.layout}]{#{preset.article}} + \\usepackage{amsmath} + \\usepackage{graphicx} + \\usepackage{natbib} + #{preset.usepackage} + + \\begin{document} + #{preset.title.(title)} + #{preset.author.("Denario")} + #{preset.affiliation.("Anthropic, Gemini \\& OpenAI servers. Planet Earth.")} + #{preset.abstract.(abstract)} + #{preset.keywords.(keywords)} + + \\section{Introduction} + #{introduction} + + \\section{Methods} + #{methods} + + \\section{Results} + #{results} + + \\section{Conclusions} + #{conclusions} + + #{bibliography_block} + \\end{document} + """ + end + + defp maybe_compile(false, _tex_name, _pdf_path, _paper_dir, _add_citations), do: {:ok, nil} + + defp maybe_compile(true, tex_name, pdf_path, paper_dir, add_citations) do + if System.find_executable("xelatex") do + compile_tex(tex_name, pdf_path, paper_dir, add_citations) + else + {:ok, nil} + end + end + + defp compile_tex(tex_name, pdf_path, paper_dir, add_citations) do + {output_1, status_1} = + System.cmd("xelatex", ["-interaction=nonstopmode", tex_name], + cd: paper_dir, + stderr_to_stdout: true + ) + + if status_1 != 0 do + {:error, {:latex_compile_failed, output_1}} + else + if add_citations and File.exists?(Path.join(paper_dir, "bibliography.bib")) and + System.find_executable("bibtex") do + base = Path.rootname(tex_name) + System.cmd("bibtex", [base], cd: paper_dir, stderr_to_stdout: true) + end + + System.cmd("xelatex", ["-interaction=nonstopmode", tex_name], + cd: paper_dir, + stderr_to_stdout: true + ) + + System.cmd("xelatex", ["-interaction=nonstopmode", tex_name], + cd: paper_dir, + stderr_to_stdout: true + ) + + {:ok, if(File.exists?(pdf_path), do: pdf_path, else: nil)} + end + end + + defp copy_assets(files, paper_dir) do + Enum.each(files, fn file -> + source = Path.join(@asset_dir, file) + destination = Path.join(paper_dir, file) + + if File.exists?(source) do + File.cp!(source, destination) + end + end) + + :ok + end + + defp normalize_figure_paths(text, plot_paths) do + Enum.reduce(plot_paths, text, fn plot_path, acc -> + basename = Path.basename(plot_path) + normalized = "../input_files/plots/#{basename}" + + Regex.replace( + ~r/(\\includegraphics(?:\[[^\]]*\])?\{)([^}]*#{Regex.escape(basename)})\}/, + acc, + fn _match, prefix, _old_path -> "#{prefix}#{normalized}}" end + ) + end) + end + + defp normalize_journal(nil), do: :none + defp normalize_journal(:none), do: :none + defp normalize_journal("none"), do: :none + defp normalize_journal(:aas), do: :aas + defp normalize_journal("AAS"), do: :aas + defp normalize_journal(:aps), do: :aps + defp normalize_journal("APS"), do: :aps + defp normalize_journal(:icml), do: :icml + defp normalize_journal("ICML"), do: :icml + defp normalize_journal(:jhep), do: :jhep + defp normalize_journal("JHEP"), do: :jhep + defp normalize_journal(:neurips), do: :neurips + defp normalize_journal("NeurIPS"), do: :neurips + defp normalize_journal(:pasj), do: :pasj + defp normalize_journal("PASJ"), do: :pasj + defp normalize_journal(_journal), do: :none + + defp journal_preset(:aas) do + %{ + article: "aastex631", + layout: "twocolumn", + usepackage: "\\usepackage{aas_macros}", + title: &"\\title{#{&1}}", + author: &"\\author{#{&1}}", + affiliation: &"\\affiliation{#{&1}}", + abstract: &"\\begin{abstract}\n#{&1}\n\\end{abstract}", + keywords: &"\\keywords{#{&1}}", + bibliography_style: "\\bibliographystyle{aasjournal}", + files: ["aasjournal.bst", "aastex631.cls", "aas_macros.sty"] + } + end + + defp journal_preset(:aps) do + %{ + article: "revtex4-2", + layout: "aps", + usepackage: "", + title: &"\\title{#{&1}}", + author: &"\\author{#{&1}}", + affiliation: &"\\affiliation{#{&1}}", + abstract: &"\\begin{abstract}\n#{&1}\n\\end{abstract}\n\\maketitle", + keywords: fn _ -> "" end, + bibliography_style: "\\bibliographystyle{unsrt}", + files: [] + } + end + + defp journal_preset(:icml) do + %{ + article: "article", + layout: "", + usepackage: "\\usepackage[accepted]{icml2025}", + title: &"\\twocolumn[\n\\icmltitle{#{&1}}", + author: &"\\begin{icmlauthorlist}\n\\icmlauthor{#{&1}}{aff}\n\\end{icmlauthorlist}", + affiliation: &"\\icmlaffiliation{aff}{#{&1}}\n", + abstract: &"]\n\\printAffiliationsAndNotice{}\n\\begin{abstract}\n#{&1}\n\\end{abstract}", + keywords: &"\\icmlkeywords{#{&1}}", + bibliography_style: "\\bibliographystyle{icml2025}", + files: ["icml2025.sty", "icml2025.bst", "fancyhdr.sty"] + } + end + + defp journal_preset(:jhep) do + %{ + article: "article", + layout: "", + usepackage: "\\usepackage{jcappub}", + title: &"\\title{#{&1}}", + author: &"\\author{#{&1}}", + affiliation: &"\\affiliation{#{&1}}", + abstract: &"\\abstract{\n#{&1}\n}\n\\maketitle", + keywords: fn _ -> "" end, + bibliography_style: "\\bibliographystyle{JHEP}", + files: ["JHEP.bst", "jcappub.sty"] + } + end + + defp journal_preset(:neurips) do + %{ + article: "article", + layout: "", + usepackage: "\\usepackage[final]{neurips_2025}", + title: &"\\title{#{&1}}", + author: &"\\author{\n#{&1}\\\\", + affiliation: &"#{&1}\n}", + abstract: &"\\maketitle\n\\begin{abstract}\n#{&1}\n\\end{abstract}", + keywords: fn _ -> "" end, + bibliography_style: "\\bibliographystyle{unsrt}", + files: ["neurips_2025.sty"] + } + end + + defp journal_preset(:pasj) do + %{ + article: "pasj01", + layout: "twocolumn", + usepackage: "\\usepackage{aas_macros}", + title: &"\\title{#{&1}}", + author: &"\\author{#{&1}}", + affiliation: &"\\altaffiltext{1}{#{&1}}", + abstract: &"\\maketitle\n\\begin{abstract}\n#{&1}\n\\end{abstract}", + keywords: fn _ -> "" end, + bibliography_style: "\\bibliographystyle{aasjournal}", + files: ["aasjournal.bst", "pasj01.cls", "aas_macros.sty"] + } + end + + defp journal_preset(:none) do + %{ + article: "article", + layout: "", + usepackage: "", + title: &"\\title{#{&1}}", + author: &"\\author{#{&1}}", + affiliation: &"\\date{#{&1}}", + abstract: &"\\maketitle\n\\begin{abstract}\n#{&1}\n\\end{abstract}", + keywords: fn _ -> "" end, + bibliography_style: "\\bibliographystyle{unsrt}", + files: [] + } + end + + defp escape_bib(text) do + text + |> String.replace("\\", "\\\\") + |> String.replace("{", "\\{") + |> String.replace("}", "\\}") + |> String.replace("&", "\\&") + |> String.replace("%", "\\%") + |> String.replace("_", "\\_") + end +end diff --git a/elixir/denario_ex/lib/denario_ex/prompt_templates.ex b/elixir/denario_ex/lib/denario_ex/prompt_templates.ex new file mode 100644 index 00000000..37609638 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/prompt_templates.ex @@ -0,0 +1,81 @@ +defmodule DenarioEx.PromptTemplates do + @moduledoc false + + @spec idea_maker_prompt(String.t(), String.t(), String.t(), non_neg_integer()) :: String.t() + def idea_maker_prompt(data_description, previous_ideas, criticism, iteration) do + """ + Your goal is to generate a groundbreaking idea for a scientific paper. Generate a original idea given the data description. If available, take into account the criticism provided by another agent about the idea. Please stick to the guidelines mentioned in the data description. + + Iteration #{iteration} + + Data description: + #{data_description} + + Previous ideas: + #{previous_ideas} + + Critisms: + #{criticism} + + Respond in the following format: + + \\begin{IDEA} + + \\end{IDEA} + + In , put the idea together with its description. Try to be brief in the description. Do not explain how you have addressed any criticism. + """ + end + + @spec idea_hater_prompt(String.t(), String.t(), String.t()) :: String.t() + def idea_hater_prompt(data_description, previous_ideas, current_idea) do + """ + Your goal is to critic an idea. You will be provided with the idea together with the initial data description used to make the idea. Be a harsh critic of the idea. Take into account feasibility, impact and any other factor you think. The goal of your criticisms is to improve the idea. If the idea is not feasible, suggest to generate a new idea. When providing your feedback, take into account the guidelines in the data description. For instance, if a detailed idea is provided there, try to stick with it. + + Data description: + #{data_description} + + Previous ideas: + #{previous_ideas} + + Current idea: + #{current_idea} + + Respond in the following format: + + \\begin{CRITIC} + + \\end{CRITIC} + + In , put your criticism to the idea. Try to be brief in the description. + """ + end + + @spec methods_fast_prompt(String.t(), String.t()) :: String.t() + def methods_fast_prompt(data_description, idea) do + """ + You are provided with a data description and an idea for a scientific paper. Your task is to think about the methods to use in order to carry it out. + + Follow these instructions: + - generate a detailed description of the methodology that will be used to perform the research project. + - The description should clearly outline the steps, techniques, and rationale derived from the exploratory data analysis (EDA). + - The focus should be strictly on the methods and workflow for this specific project to be performed. do not include any discussion of future directions, future work, project extensions, or limitations. + - The description should be written as if it were a senior researcher explaining to her research assistant how to perform the research necessary for this project. + - Just provide the methods, do not add a sentence at the beginning saying showing your thinking process. + + Data description: + #{data_description} + + Idea: + #{idea} + + Respond in this format: + + \\begin{METHODS} + + \\end{METHODS} + + In put the methods you have generated. + """ + end +end diff --git a/elixir/denario_ex/lib/denario_ex/python_executor.ex b/elixir/denario_ex/lib/denario_ex/python_executor.ex new file mode 100644 index 00000000..050b272b --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/python_executor.ex @@ -0,0 +1,67 @@ +defmodule DenarioEx.PythonExecutor do + @moduledoc """ + Executes generated Python scripts inside a workflow workspace. + """ + + @behaviour DenarioEx.CodeExecutor + + @impl true + def execute(code, opts) do + work_dir = Keyword.fetch!(opts, :work_dir) + step_id = Keyword.get(opts, :step_id, "step") + attempt = Keyword.get(opts, :attempt, 1) + python_command = Keyword.get(opts, :python_command, default_python_command()) + script_name = "#{step_id}_attempt_#{attempt}.py" + script_path = Path.join(work_dir, script_name) + + File.mkdir_p!(work_dir) + File.write!(script_path, code) + + try do + case System.cmd(python_command, [script_path], cd: work_dir, stderr_to_stdout: true) do + {output, 0} -> + {:ok, + %{ + "status" => 0, + "output" => output, + "script_path" => script_path, + "generated_files" => collect_generated_files(work_dir) + }} + + {output, status} -> + {:error, + %{ + "status" => status, + "output" => output, + "script_path" => script_path + }} + end + rescue + error -> + {:error, + %{ + "status" => 127, + "output" => Exception.message(error), + "script_path" => script_path + }} + end + end + + defp default_python_command do + repo_python = Path.expand("../../../../.venv/bin/python", __DIR__) + + cond do + File.exists?(repo_python) -> repo_python + System.find_executable("python3") -> "python3" + true -> "python" + end + end + + defp collect_generated_files(work_dir) do + extensions = ["png", "jpg", "jpeg", "pdf", "svg"] + + extensions + |> Enum.flat_map(fn ext -> Path.wildcard(Path.join(work_dir, "**/*.#{ext}")) end) + |> Enum.uniq() + end +end diff --git a/elixir/denario_ex/lib/denario_ex/req_llm_client.ex b/elixir/denario_ex/lib/denario_ex/req_llm_client.ex new file mode 100644 index 00000000..530bf6b1 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/req_llm_client.ex @@ -0,0 +1,115 @@ +defmodule DenarioEx.ReqLLMClient do + @moduledoc """ + ReqLLM-backed client adapter for Denario text generation. + """ + + @behaviour DenarioEx.LLMClient + + alias ReqLLM.Response + alias ReqLLM.ToolCall + + @impl true + def complete(messages, opts) do + model = Keyword.get(opts, :model_metadata, Keyword.fetch!(opts, :model)) + + case ReqLLM.generate_text(model, messages, build_generation_opts(opts)) do + {:ok, response} -> + case Response.text(response) do + text when is_binary(text) and text != "" -> {:ok, text} + _ -> {:error, {:empty_response, response}} + end + + {:error, error} -> + {:error, error} + end + end + + @impl true + def generate_object(messages, schema, opts) do + model = Keyword.get(opts, :model_metadata, Keyword.fetch!(opts, :model)) + + case ReqLLM.generate_object(model, messages, schema, build_generation_opts(opts)) do + {:ok, response} -> + case extract_object_from_response(response) do + {:ok, object} when is_map(object) -> {:ok, object} + {:ok, _other} -> {:error, {:non_map_object_response, response}} + {:error, error} -> {:error, {:empty_object_response, error, response}} + end + + {:error, error} -> + {:error, error} + end + end + + @doc false + def build_generation_opts(opts) do + output_key = if openai_model?(opts), do: :max_completion_tokens, else: :max_tokens + + opts + |> Keyword.take([:api_key, :temperature, :provider_options]) + |> Keyword.put(output_key, Keyword.fetch!(opts, :max_output_tokens)) + |> maybe_put_openai_json_schema(opts) + end + + @doc false + def extract_object_from_response(response) do + case Response.unwrap_object(response) do + {:ok, object} -> + {:ok, object} + + {:error, _reason} -> + response + |> Response.tool_calls() + |> Enum.find_value(fn + %ToolCall{} = tool_call -> + case ToolCall.to_map(tool_call) do + %{name: "structured_output", arguments: arguments} when is_map(arguments) -> + {:ok, arguments} + + _other -> + nil + end + + %{name: "structured_output", arguments: arguments} when is_map(arguments) -> + {:ok, arguments} + + %{"name" => "structured_output", "arguments" => arguments} when is_map(arguments) -> + {:ok, arguments} + + _other -> + nil + end) + |> case do + nil -> {:error, :no_structured_output_tool_call} + result -> result + end + end + end + + defp maybe_put_openai_json_schema(generation_opts, opts) do + if openai_model?(opts) do + Keyword.put(generation_opts, :provider_options, openai_provider_options(generation_opts)) + else + generation_opts + end + end + + defp openai_provider_options(generation_opts) do + generation_opts + |> Keyword.get(:provider_options, []) + |> Keyword.put(:openai_structured_output_mode, :json_schema) + end + + defp openai_model?(opts) do + case Keyword.get(opts, :model_metadata) do + %{provider: :openai} -> + true + + _ -> + case Keyword.get(opts, :model) do + "openai:" <> _rest -> true + _ -> false + end + end + end +end diff --git a/elixir/denario_ex/lib/denario_ex/research.ex b/elixir/denario_ex/lib/denario_ex/research.ex new file mode 100644 index 00000000..2db99cce --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/research.ex @@ -0,0 +1,30 @@ +defmodule DenarioEx.Research do + @moduledoc """ + Research state persisted inside a Denario session. + """ + + @enforce_keys [] + defstruct data_description: "", + idea: "", + methodology: "", + results: "", + literature: "", + plot_paths: [], + keywords: %{}, + literature_sources: [], + paper_tex_path: nil, + paper_pdf_path: nil + + @type t :: %__MODULE__{ + data_description: String.t(), + idea: String.t(), + methodology: String.t(), + results: String.t(), + literature: String.t(), + plot_paths: [String.t()], + keywords: map() | list(), + literature_sources: [map()], + paper_tex_path: String.t() | nil, + paper_pdf_path: String.t() | nil + } +end diff --git a/elixir/denario_ex/lib/denario_ex/results_workflow.ex b/elixir/denario_ex/lib/denario_ex/results_workflow.ex new file mode 100644 index 00000000..d4cb6708 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/results_workflow.ex @@ -0,0 +1,300 @@ +defmodule DenarioEx.ResultsWorkflow do + @moduledoc false + + alias DenarioEx.{ + AI, + CMBAgentLoop, + LLM, + PythonExecutor, + ReqLLMClient, + Text, + WorkflowPrompts + } + + @engineer_schema %{ + "type" => "object", + "additionalProperties" => false, + "properties" => %{ + "summary" => %{"type" => "string"}, + "notes" => %{"type" => "string"}, + "code" => %{"type" => "string"} + }, + "required" => ["summary", "notes", "code"] + } + + @spec run(DenarioEx.t(), keyword()) :: {:ok, map()} | {:error, term()} + def run(session, opts \\ []) do + client = Keyword.get(opts, :client, ReqLLMClient) + executor = Keyword.get(opts, :executor, PythonExecutor) + involved_agents = Keyword.get(opts, :involved_agents, ["engineer", "researcher"]) + max_n_steps = Keyword.get(opts, :max_n_steps, 6) + max_n_attempts = Keyword.get(opts, :max_n_attempts, 10) + restart_at_step = Keyword.get(opts, :restart_at_step, -1) + hardware_constraints = Keyword.get(opts, :hardware_constraints, "") + experiment_dir = Path.join(session.project_dir, "experiment") + + context = %{ + data_description: session.research.data_description, + idea: session.research.idea, + methodology: session.research.methodology, + experiment_dir: experiment_dir + } + + with {:ok, planner_llm} <- LLM.parse(Keyword.get(opts, :planner_model, "gpt-4o")), + {:ok, reviewer_llm} <- LLM.parse(Keyword.get(opts, :plan_reviewer_model, "o3-mini")), + {:ok, engineer_llm} <- LLM.parse(Keyword.get(opts, :engineer_model, "gpt-4.1")), + {:ok, researcher_llm} <- LLM.parse(Keyword.get(opts, :researcher_model, "o3-mini")), + {:ok, formatter_llm} <- LLM.parse(Keyword.get(opts, :formatter_model, "o3-mini")), + {:ok, plan} <- + CMBAgentLoop.plan_and_review("results", context, + client: client, + keys: session.keys, + planner_model: planner_llm, + plan_reviewer_model: reviewer_llm, + allowed_agents: involved_agents, + max_steps: max_n_steps + ), + {:ok, step_outputs} <- + run_steps( + plan.steps, + context, + client, + executor, + session.keys, + engineer_llm, + researcher_llm, + max_n_attempts, + restart_at_step, + hardware_constraints, + experiment_dir + ), + final_prompt <- WorkflowPrompts.results_final_prompt(context, step_outputs), + {:ok, final_text} <- AI.complete(client, final_prompt, formatter_llm, session.keys), + {:ok, results_block} <- Text.extract_block(final_text, "RESULTS") do + log_path = Path.join(experiment_dir, "step_logs.md") + File.mkdir_p!(experiment_dir) + File.write!(log_path, render_step_log(plan.steps, step_outputs)) + + {:ok, + %{ + results: Text.clean_section(results_block, "RESULTS"), + plot_paths: collect_plot_paths(experiment_dir), + plan: plan, + step_outputs: step_outputs, + log_path: log_path + }} + end + end + + defp run_steps( + steps, + context, + client, + executor, + keys, + engineer_llm, + researcher_llm, + max_n_attempts, + restart_at_step, + hardware_constraints, + experiment_dir + ) do + start_index = if restart_at_step < 0, do: 0, else: restart_at_step + + steps + |> Enum.with_index() + |> Enum.reduce_while({:ok, []}, fn {step, index}, {:ok, outputs} -> + if index < start_index do + {:cont, {:ok, outputs}} + else + case run_single_step( + step, + context, + outputs, + client, + executor, + keys, + engineer_llm, + researcher_llm, + max_n_attempts, + hardware_constraints, + experiment_dir + ) do + {:ok, step_output} -> + {:cont, {:ok, outputs ++ [step_output]}} + + {:error, reason} -> + {:halt, {:error, reason}} + end + end + end) + end + + defp run_single_step( + step, + context, + outputs, + client, + executor, + keys, + engineer_llm, + researcher_llm, + max_n_attempts, + hardware_constraints, + experiment_dir + ) do + needs_code = truthy?(Text.fetch(step, "needs_code")) + agent = Text.fetch(step, "agent") + + cond do + needs_code or agent == "engineer" -> + run_engineer_step( + step, + context, + outputs, + client, + executor, + keys, + engineer_llm, + researcher_llm, + max_n_attempts, + hardware_constraints, + experiment_dir, + "" + ) + + true -> + prompt = WorkflowPrompts.results_step_summary_prompt(step, context, outputs, "", "") + + with {:ok, response} <- AI.complete(client, prompt, researcher_llm, keys), + {:ok, block} <- Text.extract_block(response, "STEP_OUTPUT") do + {:ok, + %{ + id: Text.fetch(step, "id"), + agent: agent, + goal: Text.fetch(step, "goal"), + output: Text.clean_section(block, "STEP_OUTPUT"), + execution_output: "" + }} + end + end + end + + defp run_engineer_step( + step, + context, + outputs, + client, + executor, + keys, + engineer_llm, + researcher_llm, + max_n_attempts, + hardware_constraints, + experiment_dir, + previous_error, + attempt \\ 1 + ) do + prompt = + WorkflowPrompts.results_engineer_prompt( + step, + context, + outputs, + previous_error, + hardware_constraints + ) + + with {:ok, object} <- + AI.generate_object(client, prompt, @engineer_schema, engineer_llm, keys), + code when is_binary(code) and code != "" <- Text.fetch(object, "code") do + case executor.execute(code, + work_dir: experiment_dir, + step_id: Text.fetch(step, "id"), + attempt: attempt + ) do + {:ok, execution} -> + execution_output = Text.fetch(execution, "output") || "" + engineer_summary = Text.fetch(object, "summary") || "" + + summary_prompt = + WorkflowPrompts.results_step_summary_prompt( + step, + context, + outputs, + engineer_summary, + execution_output + ) + + with {:ok, response} <- AI.complete(client, summary_prompt, researcher_llm, keys), + {:ok, block} <- Text.extract_block(response, "STEP_OUTPUT") do + {:ok, + %{ + id: Text.fetch(step, "id"), + agent: Text.fetch(step, "agent"), + goal: Text.fetch(step, "goal"), + output: Text.clean_section(block, "STEP_OUTPUT"), + execution_output: execution_output + }} + end + + {:error, execution} when attempt < max_n_attempts -> + run_engineer_step( + step, + context, + outputs, + client, + executor, + keys, + engineer_llm, + researcher_llm, + max_n_attempts, + hardware_constraints, + experiment_dir, + Text.fetch(execution, "output") || inspect(execution), + attempt + 1 + ) + + {:error, execution} -> + {:error, {:results_step_failed, Text.fetch(step, "id"), execution}} + end + else + {:error, reason} -> + {:error, reason} + + _ -> + {:error, {:invalid_engineer_response, step}} + end + end + + defp collect_plot_paths(experiment_dir) do + ["png", "jpg", "jpeg", "pdf", "svg"] + |> Enum.flat_map(fn ext -> Path.wildcard(Path.join(experiment_dir, "**/*.#{ext}")) end) + |> Enum.uniq() + end + + defp render_step_log(steps, outputs) do + """ + # Results Workflow + + ## Planned Steps + #{Enum.map_join(steps, "\n", fn step -> "- #{Text.fetch(step, "id")}: #{Text.fetch(step, "goal")}" end)} + + ## Step Outputs + #{Enum.map_join(outputs, "\n\n", fn output -> """ + ### #{output.id} + Agent: #{output.agent} + Goal: #{output.goal} + + #{output.output} + + Execution output: + ``` + #{output.execution_output} + ``` + """ end)} + """ + end + + defp truthy?(value) when value in [true, "true", "TRUE", 1], do: true + defp truthy?(_value), do: false +end diff --git a/elixir/denario_ex/lib/denario_ex/semantic_scholar.ex b/elixir/denario_ex/lib/denario_ex/semantic_scholar.ex new file mode 100644 index 00000000..2f483b16 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/semantic_scholar.ex @@ -0,0 +1,48 @@ +defmodule DenarioEx.SemanticScholar do + @moduledoc """ + Semantic Scholar search adapter. + """ + + @behaviour DenarioEx.SemanticScholarClient + + alias DenarioEx.KeyManager + + @base_url "https://api.semanticscholar.org/graph/v1/paper/search" + + @impl true + def search(query, %KeyManager{} = keys, opts \\ []) do + limit = Keyword.get(opts, :limit, 20) + + request = + Req.new( + url: @base_url, + headers: maybe_headers(keys), + connect_options: [timeout: 15_000], + receive_timeout: 30_000 + ) + + params = [ + query: query, + limit: limit, + fields: "title,authors,year,abstract,url,paperId,externalIds,openAccessPdf,citationCount" + ] + + case Req.get(request, params: params) do + {:ok, %{status: 200, body: body}} when is_map(body) -> + {:ok, body} + + {:ok, %{status: status, body: body}} -> + {:error, {:semantic_scholar_http_error, status, body}} + + {:error, error} -> + {:error, {:semantic_scholar_request_error, Exception.message(error)}} + end + end + + defp maybe_headers(%KeyManager{semantic_scholar: nil}), do: [] + defp maybe_headers(%KeyManager{semantic_scholar: ""}), do: [] + + defp maybe_headers(%KeyManager{semantic_scholar: key}) do + [{"x-api-key", key}] + end +end diff --git a/elixir/denario_ex/lib/denario_ex/semantic_scholar_client.ex b/elixir/denario_ex/lib/denario_ex/semantic_scholar_client.ex new file mode 100644 index 00000000..3579a8fe --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/semantic_scholar_client.ex @@ -0,0 +1,9 @@ +defmodule DenarioEx.SemanticScholarClient do + @moduledoc """ + Behaviour for Semantic Scholar search adapters. + """ + + alias DenarioEx.KeyManager + + @callback search(String.t(), KeyManager.t(), keyword()) :: {:ok, map()} | {:error, term()} +end diff --git a/elixir/denario_ex/lib/denario_ex/text.ex b/elixir/denario_ex/lib/denario_ex/text.ex new file mode 100644 index 00000000..3361b1d2 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/text.ex @@ -0,0 +1,77 @@ +defmodule DenarioEx.Text do + @moduledoc false + + @spec extract_block(String.t(), String.t()) :: {:ok, String.t()} | {:error, term()} + def extract_block(text, block) when is_binary(text) and is_binary(block) do + regex = + Regex.compile!( + "\\\\begin\\{#{Regex.escape(block)}\\}(.*?)\\\\end\\{#{Regex.escape(block)}\\}", + [:dotall] + ) + + case Regex.run(regex, text, capture: :all_but_first) do + [content] -> {:ok, String.trim(content)} + _ -> {:error, {:missing_block, block}} + end + end + + @spec extract_block_or_fallback(String.t(), String.t()) :: {:ok, String.t()} | {:error, term()} + def extract_block_or_fallback(text, block) when is_binary(text) and is_binary(block) do + case extract_block(text, block) do + {:ok, content} -> + {:ok, clean_section(content, block)} + + {:error, {:missing_block, ^block}} -> + cleaned = clean_section(text, block) + + if cleaned == "" do + {:error, {:missing_block, block}} + else + {:ok, cleaned} + end + + {:error, error} -> + {:error, error} + end + end + + @spec clean_section(String.t(), String.t()) :: String.t() + def clean_section(text, section) do + [ + "\\documentclass{article}", + "\\begin{document}", + "\\end{document}", + "\\section{#{section}}", + "\\section*{#{section}}", + "\\begin{#{section}}", + "\\end{#{section}}", + "\\maketitle", + "", + "", + "", + "<{section}>", + "```latex", + "```", + "\\usepackage{amsmath}" + ] + |> Enum.reduce(text, &String.replace(&2, &1, "")) + |> String.trim() + end + + @spec slugify(String.t()) :: String.t() + def slugify(text) do + text + |> String.downcase() + |> String.replace(~r/[^a-z0-9]+/u, "_") + |> String.trim("_") + |> case do + "" -> "item" + slug -> slug + end + end + + @spec fetch(map(), String.t()) :: term() + def fetch(map, key) when is_map(map) and is_binary(key) do + Map.get(map, key) || Map.get(map, String.to_atom(key)) + end +end diff --git a/elixir/denario_ex/lib/denario_ex/workflow_prompts.ex b/elixir/denario_ex/lib/denario_ex/workflow_prompts.ex new file mode 100644 index 00000000..dacafdd5 --- /dev/null +++ b/elixir/denario_ex/lib/denario_ex/workflow_prompts.ex @@ -0,0 +1,451 @@ +defmodule DenarioEx.WorkflowPrompts do + @moduledoc false + + alias DenarioEx.Text + + @spec cmbagent_plan_prompt(String.t(), map(), [String.t()], pos_integer(), String.t() | nil) :: + String.t() + def cmbagent_plan_prompt(task, context, allowed_agents, max_steps, feedback \\ nil) do + """ + [DENARIO_PLAN][#{task}] + Build a concise execution plan for this Denario task. + + Allowed agents: #{Enum.join(allowed_agents, ", ")} + Maximum number of steps: #{max_steps} + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methodology: + #{Map.get(context, :methodology, "")} + + Existing results: + #{Map.get(context, :results, "")} + + Planner feedback from a previous review: + #{feedback || "none"} + + Return a focused plan that uses only the allowed agents. Keep it linear and concrete. + """ + end + + @spec cmbagent_plan_review_prompt(String.t(), map(), map()) :: String.t() + def cmbagent_plan_review_prompt(task, context, plan) do + """ + [DENARIO_PLAN_REVIEW][#{task}] + Review the proposed execution plan for feasibility and focus. + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methodology: + #{Map.get(context, :methodology, "")} + + Proposed plan summary: + #{Map.get(plan, :summary, "")} + + Proposed steps: + #{render_steps(Map.get(plan, :steps, []))} + + Approve only if the plan is concrete, bounded, and aligned with the task artifacts. + """ + end + + @spec cmbagent_step_prompt(String.t(), String.t(), map(), map(), [map()]) :: String.t() + def cmbagent_step_prompt(task, agent, step, context, step_outputs) do + """ + [DENARIO_CMB_STEP][#{agent}] + [TASK][#{task}] + Execute the current Denario workflow step. + + Step id: #{Text.fetch(step, "id")} + Goal: #{Text.fetch(step, "goal")} + Deliverable: #{Text.fetch(step, "deliverable")} + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methodology: + #{Map.get(context, :methodology, "")} + + Existing step outputs: + #{render_step_outputs(step_outputs)} + + Respond exactly in this format: + + \\begin{STEP_OUTPUT} + + \\end{STEP_OUTPUT} + """ + end + + @spec cmbagent_final_prompt(String.t(), map(), [map()]) :: String.t() + def cmbagent_final_prompt("idea", context, step_outputs) do + """ + [DENARIO_CMB_FINAL][idea] + Turn the step outputs into one final research idea. + + Data description: + #{Map.get(context, :data_description, "")} + + Step outputs: + #{render_step_outputs(step_outputs)} + + Respond exactly in this format: + + \\begin{IDEA} + + \\end{IDEA} + """ + end + + def cmbagent_final_prompt("method", context, step_outputs) do + """ + [DENARIO_CMB_FINAL][method] + Turn the step outputs into one final project methodology. + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Step outputs: + #{render_step_outputs(step_outputs)} + + Respond exactly in this format: + + \\begin{METHODS} + + \\end{METHODS} + """ + end + + @spec results_engineer_prompt(map(), map(), [map()], String.t(), String.t()) :: String.t() + def results_engineer_prompt(step, context, step_outputs, previous_error, hardware_constraints) do + """ + [DENARIO_RESULTS_ENGINEER] + Generate Python code for the current experimental step. + + Step id: #{Text.fetch(step, "id")} + Goal: #{Text.fetch(step, "goal")} + Deliverable: #{Text.fetch(step, "deliverable")} + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methodology: + #{Map.get(context, :methodology, "")} + + Previous completed step outputs: + #{render_step_outputs(step_outputs)} + + Hardware constraints: + #{if hardware_constraints == "", do: "none", else: hardware_constraints} + + Previous execution error: #{if previous_error == "", do: "none", else: previous_error} + + Return code that prints all quantitative information needed for a scientific results section. + """ + end + + @spec results_step_summary_prompt(map(), map(), [map()], String.t(), String.t()) :: String.t() + def results_step_summary_prompt(step, context, step_outputs, engineer_summary, execution_output) do + """ + [DENARIO_RESULTS_STEP_SUMMARY] + Summarize the completed experimental step for downstream scientific writing. + + Step id: #{Text.fetch(step, "id")} + Goal: #{Text.fetch(step, "goal")} + Deliverable: #{Text.fetch(step, "deliverable")} + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methodology: + #{Map.get(context, :methodology, "")} + + Previous completed step outputs: + #{render_step_outputs(step_outputs)} + + Engineer summary: + #{engineer_summary} + + Execution output: + #{execution_output} + + Respond exactly in this format: + + \\begin{STEP_OUTPUT} + + \\end{STEP_OUTPUT} + """ + end + + @spec results_final_prompt(map(), [map()]) :: String.t() + def results_final_prompt(context, step_outputs) do + """ + [DENARIO_RESULTS_FINAL] + Write the final results section in markdown using the completed experiment outputs. + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methodology: + #{Map.get(context, :methodology, "")} + + Step outputs: + #{render_step_outputs(step_outputs)} + + Respond exactly in this format: + + \\begin{RESULTS} + + \\end{RESULTS} + """ + end + + @spec literature_decision_prompt( + map(), + non_neg_integer(), + pos_integer(), + String.t(), + String.t() + ) :: + String.t() + def literature_decision_prompt(context, iteration, max_iterations, messages, papers_text) do + """ + [DENARIO_LITERATURE_DECISION] + Decide whether the idea appears novel, not novel, or whether another search query is needed. + + Round: #{iteration}/#{max_iterations} + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Previous literature reasoning: + #{if messages == "", do: "none", else: messages} + + Papers found this round: + #{if papers_text == "", do: "none", else: papers_text} + + When proposing a query, focus on the scientific problem, modality, domain, and evaluation setup. + Avoid implementation-only terms such as Python, plotting, tutorials, or generic workflow language. + + The first round must always return decision=query. + """ + end + + @spec literature_summary_prompt(map(), String.t(), String.t()) :: String.t() + def literature_summary_prompt(context, decision, messages) do + """ + [DENARIO_LITERATURE_SUMMARY] + Summarize the literature check and explain why the idea is #{decision}. + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Literature search history: + #{messages} + + Respond exactly in this format: + + \\begin{SUMMARY} + + \\end{SUMMARY} + """ + end + + @spec literature_selection_prompt(map(), String.t(), String.t()) :: String.t() + def literature_selection_prompt(context, query, candidates_text) do + """ + [DENARIO_LITERATURE_SELECT] + Select the papers that are genuinely relevant prior work for the proposed idea. + + Prefer papers that are close in: + - task + - data modality + - domain + - evaluation setup + + Avoid generic surveys or broad background papers unless they are directly relevant. + Select at most 6 papers. + If none are clearly relevant, return an empty list instead of weak matches. + + Data description: + #{Map.get(context, :data_description, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Search query: + #{query} + + Candidate papers: + #{candidates_text} + """ + end + + @spec paper_keywords_prompt(String.t(), map()) :: String.t() + def paper_keywords_prompt(writer, context) do + """ + [DENARIO_PAPER_KEYWORDS] + You are a #{writer}. Generate five concise paper keywords. + + Idea: + #{Map.get(context, :idea, "")} + + Methods: + #{Map.get(context, :methodology, "")} + + Results: + #{Map.get(context, :results, "")} + + Respond exactly in this format: + + \\begin{KEYWORDS} + + \\end{KEYWORDS} + """ + end + + @spec paper_abstract_prompt(String.t(), map(), String.t()) :: String.t() + def paper_abstract_prompt(writer, context, citation_context) do + """ + [DENARIO_PAPER_ABSTRACT] + You are a #{writer}. Write a title and abstract for the paper. + + Idea: + #{Map.get(context, :idea, "")} + + Methods: + #{Map.get(context, :methodology, "")} + + Results: + #{Map.get(context, :results, "")} + + Available citations: + #{if citation_context == "", do: "none", else: citation_context} + """ + end + + @spec paper_section_prompt(String.t(), String.t(), map(), String.t()) :: String.t() + def paper_section_prompt(section, writer, context, citation_context) do + """ + [DENARIO_PAPER_SECTION][#{section}] + You are a #{writer}. Write the #{section} section of the paper in LaTeX. + + Paper title: + #{Map.get(context, :title, "")} + + Paper abstract: + #{Map.get(context, :abstract, "")} + + Idea: + #{Map.get(context, :idea, "")} + + Methods: + #{Map.get(context, :methodology, "")} + + Results: + #{Map.get(context, :results, "")} + + Available citations: + #{if citation_context == "", do: "none", else: citation_context} + + Only use \\cite{...} entries from the available citations above when relevant. + Respond exactly in this format: + + \\begin{#{String.upcase(section)}} + <#{String.upcase(section)}> + \\end{#{String.upcase(section)}} + """ + end + + @spec paper_figure_caption_prompt(String.t(), map(), String.t()) :: String.t() + def paper_figure_caption_prompt(writer, context, plot_name) do + """ + [DENARIO_PAPER_FIGURE_CAPTION] + You are a #{writer}. Write one short LaTeX figure caption. + + Plot name: + #{plot_name} + + Results: + #{Map.get(context, :results, "")} + + Respond exactly in this format: + + \\begin{CAPTION} + + \\end{CAPTION} + """ + end + + @spec paper_refine_results_prompt(String.t(), map(), String.t()) :: String.t() + def paper_refine_results_prompt(writer, context, figure_specs) do + """ + [DENARIO_PAPER_REFINE_RESULTS] + You are a #{writer}. Integrate the provided figure environments into the results section and reference them naturally. + + Current results section: + #{Map.get(context, :paper_results, "")} + + Figure specifications: + #{figure_specs} + + Respond exactly in this format: + + \\begin{RESULTS} + + \\end{RESULTS} + """ + end + + defp render_steps(steps) do + steps + |> Enum.map_join("\n", fn step -> + "- #{Text.fetch(step, "id")}: agent=#{Text.fetch(step, "agent")} goal=#{Text.fetch(step, "goal")} deliverable=#{Text.fetch(step, "deliverable")} needs_code=#{Text.fetch(step, "needs_code")}" + end) + end + + defp render_step_outputs(step_outputs) do + if step_outputs == [] do + "none" + else + Enum.map_join(step_outputs, "\n\n", fn step_output -> + """ + Step #{Map.get(step_output, :id, "")} + Agent: #{Map.get(step_output, :agent, "")} + Goal: #{Map.get(step_output, :goal, "")} + Output: + #{Map.get(step_output, :output, "")} + """ + end) + end + end +end diff --git a/elixir/denario_ex/mix.exs b/elixir/denario_ex/mix.exs new file mode 100644 index 00000000..1e4dceb7 --- /dev/null +++ b/elixir/denario_ex/mix.exs @@ -0,0 +1,32 @@ +defmodule DenarioEx.MixProject do + use Mix.Project + + def project do + [ + app: :denario_ex, + version: "0.1.0", + elixir: "~> 1.18", + start_permanent: Mix.env() == :prod, + description: "Initial Elixir port of the Denario research workflow using ReqLLM and LLMDB", + deps: deps() + ] + end + + def application do + [ + extra_applications: [:logger, :ssl, :inets], + mod: {DenarioEx.Application, []} + ] + end + + defp deps do + [ + {:req, "~> 0.5.17"}, + {:llm_db, "~> 2026.3"}, + {:req_llm, + git: "https://github.com/jmanhype/req_llm.git", + ref: "ee00b4553cd6823b48c1045b825565855a77a93b", + override: true} + ] + end +end diff --git a/elixir/denario_ex/mix.lock b/elixir/denario_ex/mix.lock new file mode 100644 index 00000000..629d4f12 --- /dev/null +++ b/elixir/denario_ex/mix.lock @@ -0,0 +1,27 @@ +%{ + "abnf_parsec": {:hex, :abnf_parsec, "2.1.0", "c4e88d5d089f1698297c0daced12be1fb404e6e577ecf261313ebba5477941f9", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e0ed6290c7cc7e5020c006d1003520390c9bdd20f7c3f776bd49bfe3c5cd362a"}, + "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, + "dotenvy": {:hex, :dotenvy, "1.1.1", "00e318f3c51de9fafc4b48598447e386f19204dc18ca69886905bb8f8b08b667", [:mix], [], "hexpm", "c8269471b5701e9e56dc86509c1199ded2b33dce088c3471afcfef7839766d8e"}, + "ex_aws_auth": {:hex, :ex_aws_auth, "1.3.1", "3963992d6f7cb251b53573603c3615cec70c3f4d86199fdb865ff440295ef7a4", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: true]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: true]}], "hexpm", "025793aa08fa419aabdb652db60edbdb2e12346bd447988a1bb5854c4dd64903"}, + "finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"}, + "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, + "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, + "jsv": {:hex, :jsv, "0.16.0", "b29e44da822db9f52010edf9db75b58f016434d9862bd76d18aec7a4712cf318", [:mix], [{:abnf_parsec, "~> 2.0", [hex: :abnf_parsec, repo: "hexpm", optional: false]}, {:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:idna, "~> 6.1", [hex: :idna, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_options, "~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:poison, ">= 3.0.0 and < 7.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:texture, "~> 0.3", [hex: :texture, repo: "hexpm", optional: false]}], "hexpm", "a4b2aaf5f62641640519da5de479e5704f6f7c8b6e323692bf71b4800d7b69ee"}, + "llm_db": {:hex, :llm_db, "2026.3.2", "79eee95abf130e83de7fa22403f017c178b6c571feb5216bba4a417fdabca928", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:dotenvy, "~> 1.1", [hex: :dotenvy, repo: "hexpm", optional: false]}, {:igniter, "~> 0.7", [hex: :igniter, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:toml, "~> 0.7", [hex: :toml, repo: "hexpm", optional: false]}, {:zoi, "~> 0.10", [hex: :zoi, repo: "hexpm", optional: false]}], "hexpm", "8181e192a8296decc46c8c56f4a2f95d7d9cdd9f11d343f0fe1c8cae5ec2090c"}, + "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, + "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, + "req": {:hex, :req, "0.5.17", "0096ddd5b0ed6f576a03dde4b158a0c727215b15d2795e59e0916c6971066ede", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "0b8bc6ffdfebbc07968e59d3ff96d52f2202d0536f10fef4dc11dc02a2a43e39"}, + "req_llm": {:git, "https://github.com/jmanhype/req_llm.git", "ee00b4553cd6823b48c1045b825565855a77a93b", [ref: "ee00b4553cd6823b48c1045b825565855a77a93b"]}, + "server_sent_events": {:hex, :server_sent_events, "0.2.1", "f83b34f01241302a8bf451efc8dde3a36c533d5715463c31c653f3db8695f636", [:mix], [], "hexpm", "c8099ce4f9acd610eb7c8e0f89dba7d5d1c13300ea9884b0bd8662401d3cf96f"}, + "splode": {:hex, :splode, "0.3.0", "ff8effecc509a51245df2f864ec78d849248647c37a75886033e3b1a53ca9470", [:mix], [], "hexpm", "73cfd0892d7316d6f2c93e6e8784bd6e137b2aa38443de52fd0a25171d106d81"}, + "telemetry": {:hex, :telemetry, "1.4.1", "ab6de178e2b29b58e8256b92b382ea3f590a47152ca3651ea857a6cae05ac423", [:rebar3], [], "hexpm", "2172e05a27531d3d31dd9782841065c50dd5c3c7699d95266b2edd54c2dafa1c"}, + "texture": {:hex, :texture, "0.3.2", "ca68fc2804ce05ffe33cded85d69b5ebadb0828233227accfe3c574e34fd4e3f", [:mix], [{:abnf_parsec, "~> 2.0", [hex: :abnf_parsec, repo: "hexpm", optional: false]}], "hexpm", "43bb1069d9cf4309ed6f0ff65ade787a76f986b821ab29d1c96b5b5102cb769c"}, + "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, + "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.1", "a48703a25c170eedadca83b11e88985af08d35f37c6f664d6dcfb106a97782fc", [:rebar3], [], "hexpm", "b3a917854ce3ae233619744ad1e0102e05673136776fb2fa76234f3e03b23642"}, + "uniq": {:hex, :uniq, "0.6.2", "51846518c037134c08bc5b773468007b155e543d53c8b39bafe95b0af487e406", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm", "95aa2a41ea331ef0a52d8ed12d3e730ef9af9dbc30f40646e6af334fbd7bc0fc"}, + "zoi": {:hex, :zoi, "0.17.2", "2ecc5ad4807afe920d30728a12ac9355d7b0796bd452e3e66335e4219caf6daa", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.1", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "c2fcb9f6b701f3333081dbd39df90bcb8f0179e89eb5b9587296eb9d24243beb"}, +} diff --git a/elixir/denario_ex/test/denario_ex_test.exs b/elixir/denario_ex/test/denario_ex_test.exs new file mode 100644 index 00000000..fc1a8f4f --- /dev/null +++ b/elixir/denario_ex/test/denario_ex_test.exs @@ -0,0 +1,109 @@ +defmodule DenarioExTest do + use ExUnit.Case, async: true + + alias DenarioEx + + defmodule FakeClient do + @behaviour DenarioEx.LLMClient + + @impl true + def complete([%{role: "user", content: prompt}], opts) do + send(self(), {:llm_call, prompt, opts[:model]}) + + cond do + String.contains?(prompt, "Current idea:") -> + {:ok, "\\begin{CRITIC}Make the idea more concrete and measurable.\\end{CRITIC}"} + + String.contains?(prompt, "Iteration 1") -> + {:ok, + "\\begin{IDEA}A concrete sensor robustness study with measurable outcomes.\\end{IDEA}"} + + String.contains?(prompt, "Iteration 0") -> + {:ok, "\\begin{IDEA}A broad sensor-analysis idea.\\end{IDEA}"} + + String.contains?(prompt, "\\begin{METHODS}") -> + {:ok, + "\\begin{METHODS}1. Load the dataset.\\n2. Compare the proposed signals.\\end{METHODS}"} + + true -> + {:error, {:unexpected_prompt, prompt}} + end + end + + @impl true + def generate_object(_messages, _schema, _opts) do + {:error, :not_used_in_this_test} + end + end + + setup do + project_dir = Path.join(System.tmp_dir!(), "denario_ex_#{System.unique_integer([:positive])}") + on_exit(fn -> File.rm_rf(project_dir) end) + {:ok, project_dir: project_dir} + end + + test "new/1 creates the expected project directories and persists markdown fields", %{ + project_dir: project_dir + } do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert File.dir?(Path.join(project_dir, "input_files")) + assert File.dir?(Path.join(project_dir, "input_files/plots")) + + assert {:ok, denario} = DenarioEx.set_data_description(denario, "Research description") + assert {:ok, denario} = DenarioEx.set_idea(denario, "Research idea") + assert {:ok, denario} = DenarioEx.set_method(denario, "Research method") + assert {:ok, _denario} = DenarioEx.set_results(denario, "Research results") + + assert File.read!(Path.join(project_dir, "input_files/data_description.md")) == + "Research description" + + assert File.read!(Path.join(project_dir, "input_files/idea.md")) == "Research idea" + assert File.read!(Path.join(project_dir, "input_files/methods.md")) == "Research method" + assert File.read!(Path.join(project_dir, "input_files/results.md")) == "Research results" + + assert {:ok, reloaded} = DenarioEx.new(project_dir: project_dir) + assert reloaded.research.data_description == "Research description" + assert reloaded.research.idea == "Research idea" + assert reloaded.research.methodology == "Research method" + assert reloaded.research.results == "Research results" + end + + test "fast workflows generate idea and methods through a pluggable client", %{ + project_dir: project_dir + } do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description( + denario, + "Analyze a small hypothetical lab sensor dataset and propose one paper idea." + ) + + assert {:ok, denario} = + DenarioEx.get_idea_fast( + denario, + client: FakeClient, + llm: "gpt-4.1-mini", + iterations: 2 + ) + + assert denario.research.idea == "A concrete sensor robustness study with measurable outcomes." + assert File.read!(Path.join(project_dir, "input_files/idea.md")) == denario.research.idea + + assert_received {:llm_call, prompt, "openai:gpt-4.1-mini"} + assert String.contains?(prompt, "groundbreaking idea") + + assert {:ok, denario} = + DenarioEx.get_method_fast( + denario, + client: FakeClient, + llm: "gpt-4.1-mini" + ) + + assert String.contains?(denario.research.methodology, "Load the dataset") + + assert File.read!(Path.join(project_dir, "input_files/methods.md")) == + denario.research.methodology + end +end diff --git a/elixir/denario_ex/test/denario_ex_workflows_test.exs b/elixir/denario_ex/test/denario_ex_workflows_test.exs new file mode 100644 index 00000000..dbe048a0 --- /dev/null +++ b/elixir/denario_ex/test/denario_ex_workflows_test.exs @@ -0,0 +1,545 @@ +defmodule DenarioExWorkflowsTest do + use ExUnit.Case, async: true + + alias DenarioEx + + defmodule FakeClient do + @behaviour DenarioEx.LLMClient + + @impl true + def complete([%{role: "user", content: prompt}], opts) do + send(self(), {:llm_text, prompt, opts[:model]}) + + cond do + String.contains?(prompt, "[DENARIO_CMB_STEP][idea_maker]") and + String.contains?(prompt, "Select the strongest idea") -> + {:ok, + "\\begin{STEP_OUTPUT}Adaptive urban microclimate anomaly detection using dense low-cost sensor arrays and interpretable time-series models.\\end{STEP_OUTPUT}"} + + String.contains?(prompt, "[DENARIO_CMB_STEP][idea_hater]") -> + {:ok, + "\\begin{STEP_OUTPUT}Focus the scope on one measurable anomaly-detection task and make the dataset assumptions explicit.\\end{STEP_OUTPUT}"} + + String.contains?(prompt, "[DENARIO_CMB_STEP][idea_maker]") -> + {:ok, + "\\begin{STEP_OUTPUT}Generate several candidate ideas around urban microclimate anomaly detection and pick the one with the clearest measurable outcome.\\end{STEP_OUTPUT}"} + + String.contains?(prompt, "[DENARIO_CMB_FINAL][idea]") -> + {:ok, + "\\begin{IDEA}Adaptive urban microclimate anomaly detection using dense low-cost sensor arrays and interpretable time-series models.\\end{IDEA}"} + + String.contains?(prompt, "[DENARIO_CMB_STEP][researcher]") and + String.contains?(prompt, "[TASK][method]") -> + {:ok, + "\\begin{STEP_OUTPUT}Define preprocessing, temporal validation splits, anomaly scoring, ablations, and calibration checks for the selected microclimate dataset.\\end{STEP_OUTPUT}"} + + String.contains?(prompt, "[DENARIO_CMB_FINAL][method]") -> + {:ok, + "\\begin{METHODS}1. Clean and align the sensor streams.\\n2. Build temporal train-validation-test splits.\\n3. Train an interpretable anomaly detector and compare against baselines.\\n4. Report quantitative metrics and calibration diagnostics.\\end{METHODS}"} + + String.contains?(prompt, "[DENARIO_RESULTS_STEP_SUMMARY]") -> + {:ok, + "\\begin{STEP_OUTPUT}The engineer produced reproducible summary statistics and saved a plot for the anomaly-score distribution.\\end{STEP_OUTPUT}"} + + String.contains?(prompt, "[DENARIO_RESULTS_FINAL]") -> + {:ok, + "\\begin{RESULTS}The anomaly detector achieved stable performance across temporal splits and produced a clear anomaly-score distribution plot. Quantitatively, the run shows strong separation between nominal and anomalous regimes, supporting the project idea.\\end{RESULTS}"} + + String.contains?(prompt, "[DENARIO_LITERATURE_SUMMARY]") -> + {:ok, + "\\begin{SUMMARY}The searched papers overlap with environmental anomaly monitoring broadly, but none combines dense low-cost urban microclimate sensing with the specific interpretable anomaly-detection framing proposed here. The idea can be considered novel relative to the retrieved work.\\end{SUMMARY}"} + + String.contains?(prompt, "[DENARIO_PAPER_KEYWORDS]") -> + {:ok, + "\\begin{KEYWORDS}anomaly detection, urban climate, sensor networks, interpretable models, time-series analysis\\end{KEYWORDS}"} + + String.contains?(prompt, "[DENARIO_PAPER_SECTION][Introduction]") -> + {:ok, + "\\begin{INTRODUCTION}Urban microclimate monitoring remains difficult because low-cost sensors drift, dense deployments produce noisy streams, and anomalies are rare. This paper frames the problem as interpretable anomaly detection over temporally aligned sensor networks and motivates why stable detection matters for urban operations.\\end{INTRODUCTION}"} + + String.contains?(prompt, "[DENARIO_PAPER_SECTION][Methods]") -> + {:ok, + "\\begin{METHODS}We align, clean, and calibrate the sensor streams before training an interpretable anomaly detector over temporally blocked splits. We compare against simple baselines and evaluate discrimination and calibration metrics.\\end{METHODS}"} + + String.contains?(prompt, "[DENARIO_PAPER_SECTION][Results]") -> + {:ok, + "\\begin{RESULTS}The proposed detector provides consistent separation between nominal and anomalous conditions and remains stable across blocked temporal evaluation. The resulting distribution plot highlights the operating region used for interpretation.\\end{RESULTS}"} + + String.contains?(prompt, "[DENARIO_PAPER_SECTION][Conclusions]") -> + {:ok, + "\\begin{CONCLUSIONS}Interpretable anomaly detection over dense urban microclimate sensor streams is feasible with careful temporal evaluation and calibration-aware reporting. The generated workflow supports reproducible environmental monitoring studies.\\end{CONCLUSIONS}"} + + String.contains?(prompt, "[DENARIO_PAPER_FIGURE_CAPTION]") -> + {:ok, + "\\begin{CAPTION}Distribution of anomaly scores across the evaluation split, highlighting separation between nominal and anomalous conditions.\\end{CAPTION}"} + + String.contains?(prompt, "[DENARIO_PAPER_REFINE_RESULTS]") -> + {:ok, + "\\begin{RESULTS}The proposed detector provides consistent separation between nominal and anomalous conditions and remains stable across blocked temporal evaluation. Figure \\ref{fig:anomaly_scores} visualizes the anomaly-score distribution used in the interpretation.\\begin{figure}[t]\\centering\\includegraphics[width=0.48\\textwidth]{anomaly_scores.png}\\caption{Distribution of anomaly scores across the evaluation split, highlighting separation between nominal and anomalous conditions.}\\label{fig:anomaly_scores}\\end{figure}\\end{RESULTS}"} + + true -> + {:error, {:unexpected_prompt, prompt}} + end + end + + @impl true + def generate_object([%{role: "user", content: prompt}], _schema, opts) do + send(self(), {:llm_object, prompt, opts[:model]}) + + cond do + String.contains?(prompt, "[DENARIO_PLAN][idea]") -> + {:ok, + %{ + "summary" => "Develop, critique, and finalize one research idea.", + "steps" => [ + %{ + "id" => "idea_1", + "agent" => "idea_maker", + "goal" => "Draft candidate ideas", + "deliverable" => "Candidate idea set", + "needs_code" => false + }, + %{ + "id" => "idea_2", + "agent" => "idea_hater", + "goal" => "Critique the candidate idea", + "deliverable" => "Actionable criticism", + "needs_code" => false + }, + %{ + "id" => "idea_3", + "agent" => "idea_maker", + "goal" => "Select the strongest idea", + "deliverable" => "Final idea draft", + "needs_code" => false + } + ] + }} + + String.contains?(prompt, "[DENARIO_PLAN][method]") -> + {:ok, + %{ + "summary" => "Turn the idea into an executable methodology.", + "steps" => [ + %{ + "id" => "method_1", + "agent" => "researcher", + "goal" => "Design the methodology", + "deliverable" => "Detailed methodological draft", + "needs_code" => false + } + ] + }} + + String.contains?(prompt, "[DENARIO_PLAN][results]") -> + {:ok, + %{ + "summary" => "Run one engineering step and then synthesize the findings.", + "steps" => [ + %{ + "id" => "results_1", + "agent" => "engineer", + "goal" => "Run the core analysis and generate a diagnostic plot", + "deliverable" => "Reproducible code, console output, and one plot", + "needs_code" => true + }, + %{ + "id" => "results_2", + "agent" => "researcher", + "goal" => "Summarize the quantitative findings", + "deliverable" => "Narrative summary of the evidence", + "needs_code" => false + } + ] + }} + + String.contains?(prompt, "[DENARIO_PLAN_REVIEW]") -> + {:ok, %{"approved" => true, "feedback" => "The plan is focused and feasible."}} + + String.contains?(prompt, "[DENARIO_RESULTS_ENGINEER]") and + String.contains?(prompt, "Previous execution error: none") -> + {:ok, + %{ + "summary" => "Generate a first analysis script.", + "notes" => "First attempt.", + "code" => "# FAIL_ONCE\nprint('first attempt')\n" + }} + + String.contains?(prompt, "[DENARIO_RESULTS_ENGINEER]") -> + {:ok, + %{ + "summary" => "Generate the corrected analysis script.", + "notes" => "Second attempt with plotting.", + "code" => "# SUCCESS\nprint('mean_score=0.91')\n" + }} + + String.contains?(prompt, "[DENARIO_LITERATURE_DECISION]") and + String.contains?(prompt, "Round: 0") -> + {:ok, + %{ + "reason" => + "The first round should broaden into a focused literature search before making a novelty claim.", + "decision" => "query", + "query" => "urban microclimate anomaly detection low-cost sensor network" + }} + + String.contains?(prompt, "[DENARIO_LITERATURE_DECISION]") -> + {:ok, + %{ + "reason" => + "The retrieved papers discuss environmental anomaly monitoring and urban sensing, but none matches the exact interpretable anomaly-detection framing or evaluation setup proposed here.", + "decision" => "novel", + "query" => "" + }} + + String.contains?(prompt, "[DENARIO_LITERATURE_SELECT]") and + String.contains?(prompt, "Paper ID: openalex-123") -> + {:ok, + %{ + "selected_paper_ids" => ["openalex-123"], + "rationale" => "This is the closest prior work in task and domain." + }} + + String.contains?(prompt, "[DENARIO_LITERATURE_SELECT]") -> + {:ok, + %{ + "selected_paper_ids" => ["paper-123"], + "rationale" => "This paper is the closest prior work to the proposed idea." + }} + + String.contains?(prompt, "[DENARIO_PAPER_ABSTRACT]") -> + {:ok, + %{ + "title" => "Interpretable Anomaly Detection for Urban Microclimate Sensor Networks", + "abstract" => + "We study interpretable anomaly detection over dense urban microclimate sensor networks. Using temporally blocked evaluation and calibration-aware reporting, we show that the proposed workflow separates anomalous from nominal conditions while remaining operationally interpretable." + }} + + true -> + {:error, {:unexpected_object_prompt, prompt}} + end + end + end + + defmodule FakeExecutor do + @behaviour DenarioEx.CodeExecutor + + @impl true + def execute(code, opts) do + step_id = Keyword.fetch!(opts, :step_id) + work_dir = Keyword.fetch!(opts, :work_dir) + File.mkdir_p!(work_dir) + + failure_key = {:executor_failed_once, step_id} + + cond do + String.contains?(code, "FAIL_ONCE") and Process.get(failure_key) != true -> + Process.put(failure_key, true) + + {:error, + %{ + "status" => 1, + "output" => "Traceback: simulated failure" + }} + + true -> + plot_path = Path.join(work_dir, "anomaly_scores.png") + File.write!(plot_path, "fake png bytes") + + {:ok, + %{ + "status" => 0, + "output" => "mean_score=0.91\nsaved_plot=anomaly_scores.png", + "generated_files" => [plot_path] + }} + end + end + end + + defmodule FakeSemanticScholarClient do + @behaviour DenarioEx.SemanticScholarClient + + @impl true + def search(query, _keys, _opts) do + send(self(), {:semantic_scholar_query, query}) + + {:ok, + %{ + "total" => 1, + "data" => [ + %{ + "paperId" => "paper-123", + "title" => "Urban sensing for anomaly monitoring", + "year" => 2024, + "citationCount" => 37, + "abstract" => + "A broad study of anomaly monitoring in urban environmental sensor systems.", + "url" => "https://example.com/paper-123", + "authors" => [%{"name" => "A. Researcher"}, %{"name" => "B. Scientist"}], + "externalIds" => %{"ArXiv" => "2401.12345"}, + "openAccessPdf" => %{"url" => "https://example.com/paper-123.pdf"} + } + ] + }} + end + end + + defmodule RateLimitedSemanticScholarClient do + @behaviour DenarioEx.SemanticScholarClient + + @impl true + def search(query, _keys, _opts) do + send(self(), {:semantic_scholar_query, query}) + {:error, {:semantic_scholar_http_error, 429, %{"message" => "Too Many Requests"}}} + end + end + + defmodule FakeOpenAlexClient do + @behaviour DenarioEx.SemanticScholarClient + + @impl true + def search(query, _keys, _opts) do + send(self(), {:openalex_query, query}) + + {:ok, + %{ + "total" => 1, + "data" => [ + %{ + "paperId" => "openalex-123", + "title" => "OpenAlex urban sensing anomaly paper", + "year" => 2024, + "citationCount" => 18, + "relevanceScore" => 12.4, + "abstract" => + "A public-index paper about anomaly monitoring in urban environmental sensing.", + "url" => "https://openalex.org/W123", + "authors" => [%{"name" => "C. Author"}], + "externalIds" => %{"DOI" => "https://doi.org/10.1234/example"}, + "openAccessPdf" => %{"url" => "https://example.com/openalex-123.pdf"} + } + ] + }} + end + end + + setup do + project_dir = + Path.join(System.tmp_dir!(), "denario_ex_workflows_#{System.unique_integer([:positive])}") + + on_exit(fn -> File.rm_rf(project_dir) end) + {:ok, project_dir: project_dir} + end + + test "cmbagent loop ports idea and method generation", %{project_dir: project_dir} do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description( + denario, + "Analyze dense low-cost urban microclimate sensor data and build one concise research direction." + ) + + assert {:ok, denario} = + DenarioEx.get_idea_cmbagent( + denario, + client: FakeClient, + planner_model: "openai:gpt-4.1-mini", + plan_reviewer_model: "openai:gpt-4.1-mini", + idea_maker_model: "openai:gpt-4.1-mini", + idea_hater_model: "openai:gpt-4.1-mini" + ) + + assert String.contains?(denario.research.idea, "urban microclimate anomaly detection") + assert File.read!(Path.join(project_dir, "input_files/idea.md")) == denario.research.idea + + assert {:ok, denario} = + DenarioEx.get_method_cmbagent( + denario, + client: FakeClient, + planner_model: "openai:gpt-4.1-mini", + plan_reviewer_model: "openai:gpt-4.1-mini", + method_generator_model: "openai:gpt-4.1-mini" + ) + + assert String.contains?(denario.research.methodology, "temporal train-validation-test splits") + + assert File.read!(Path.join(project_dir, "input_files/methods.md")) == + denario.research.methodology + end + + test "get_results runs the planning loop, retries code execution, and persists plots", %{ + project_dir: project_dir + } do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description(denario, "Small synthetic sensor dataset.") + + assert {:ok, denario} = DenarioEx.set_idea(denario, "Detect anomalies in sensor readings.") + + assert {:ok, denario} = + DenarioEx.set_method( + denario, + "Train an interpretable anomaly detector and produce one diagnostic figure." + ) + + assert {:ok, denario} = + DenarioEx.get_results( + denario, + client: FakeClient, + executor: FakeExecutor, + planner_model: "openai:gpt-4.1-mini", + plan_reviewer_model: "openai:gpt-4.1-mini", + engineer_model: "openai:gpt-4.1-mini", + researcher_model: "openai:gpt-4.1-mini", + formatter_model: "openai:gpt-4.1-mini", + max_n_attempts: 2 + ) + + assert String.contains?(denario.research.results, "stable performance") + + assert File.read!(Path.join(project_dir, "input_files/results.md")) == + denario.research.results + + assert Enum.any?(denario.research.plot_paths, &String.ends_with?(&1, "anomaly_scores.png")) + assert File.exists?(Path.join(project_dir, "input_files/plots/anomaly_scores.png")) + end + + test "literature checking queries semantic scholar and writes literature.md", %{ + project_dir: project_dir + } do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description(denario, "Urban sensor anomaly project.") + + assert {:ok, denario} = + DenarioEx.set_idea( + denario, + "Interpretable anomaly detection for urban microclimate sensor networks." + ) + + assert {:ok, denario} = + DenarioEx.check_idea( + denario, + client: FakeClient, + semantic_scholar_client: FakeSemanticScholarClient, + llm: "openai:gpt-4.1-mini", + max_iterations: 3 + ) + + assert String.contains?(denario.research.literature, "can be considered novel") + + assert File.read!(Path.join(project_dir, "input_files/literature.md")) == + denario.research.literature + + assert length(denario.research.literature_sources) == 1 + assert hd(denario.research.literature_sources)["paperId"] == "paper-123" + + assert_received {:semantic_scholar_query, + "urban microclimate anomaly detection low-cost sensor network"} + end + + test "literature checking falls back to OpenAlex when Semantic Scholar is rate-limited", %{ + project_dir: project_dir + } do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description(denario, "Urban sensor anomaly project.") + + assert {:ok, denario} = + DenarioEx.set_idea( + denario, + "Interpretable anomaly detection for urban microclimate sensor networks." + ) + + assert {:ok, denario} = + DenarioEx.check_idea( + denario, + client: FakeClient, + semantic_scholar_client: RateLimitedSemanticScholarClient, + fallback_literature_client: FakeOpenAlexClient, + llm: "openai:gpt-4.1-mini", + max_iterations: 3 + ) + + assert String.contains?(denario.research.literature, "can be considered novel") + assert length(denario.research.literature_sources) == 1 + + assert_received {:semantic_scholar_query, + "urban microclimate anomaly detection low-cost sensor network"} + + assert_received {:openalex_query, + "urban microclimate anomaly detection low-cost sensor network"} + end + + test "paper generation writes a journal-aware LaTeX draft from project artifacts", %{ + project_dir: project_dir + } do + plots_dir = Path.join(project_dir, "input_files/plots") + + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description(denario, "Urban sensor anomaly project.") + + assert {:ok, denario} = + DenarioEx.set_idea( + denario, + "Interpretable anomaly detection for urban microclimate sensor networks." + ) + + assert {:ok, denario} = + DenarioEx.set_method( + denario, + "Align the sensor streams, train the anomaly detector, and evaluate on blocked temporal splits." + ) + + assert {:ok, denario} = + DenarioEx.set_results( + denario, + "The detector separates anomalous from nominal periods and yields a diagnostic score distribution." + ) + + File.mkdir_p!(plots_dir) + File.write!(Path.join(plots_dir, "anomaly_scores.png"), "fake png bytes") + + assert {:ok, denario} = + DenarioEx.check_idea( + denario, + client: FakeClient, + semantic_scholar_client: FakeSemanticScholarClient, + llm: "openai:gpt-4.1-mini", + max_iterations: 3 + ) + + assert {:ok, denario} = + DenarioEx.get_paper( + denario, + client: FakeClient, + llm: "openai:gpt-4.1-mini", + writer: "climate scientist", + journal: :neurips, + add_citations: true, + compile: false + ) + + assert denario.research.paper_tex_path + assert File.exists?(denario.research.paper_tex_path) + + tex = File.read!(denario.research.paper_tex_path) + + assert String.contains?( + tex, + "\\title{Interpretable Anomaly Detection for Urban Microclimate Sensor Networks}" + ) + + assert String.contains?(tex, "\\section{Introduction}") + assert String.contains?(tex, "\\label{fig:anomaly_scores}") + assert String.contains?(tex, "../input_files/plots/anomaly_scores.png") + assert File.exists?(Path.join(project_dir, "paper/neurips_2025.sty")) + assert File.exists?(Path.join(project_dir, "paper/bibliography.bib")) + end +end diff --git a/elixir/denario_ex/test/key_manager_test.exs b/elixir/denario_ex/test/key_manager_test.exs new file mode 100644 index 00000000..2a6ba5ab --- /dev/null +++ b/elixir/denario_ex/test/key_manager_test.exs @@ -0,0 +1,54 @@ +defmodule DenarioEx.KeyManagerTest do + use ExUnit.Case, async: false + + alias DenarioEx.KeyManager + + setup do + original = + for name <- [ + "OPENAI_API_KEY", + "GOOGLE_API_KEY", + "GEMINI_API_KEY", + "ANTHROPIC_API_KEY", + "PERPLEXITY_API_KEY", + "SEMANTIC_SCHOLAR_KEY", + "SEMANTIC_SCHOLAR_API_KEY", + "S2_API_KEY" + ], + into: %{} do + {name, System.get_env(name)} + end + + on_exit(fn -> + Enum.each(original, fn + {name, nil} -> System.delete_env(name) + {name, value} -> System.put_env(name, value) + end) + end) + + Enum.each(Map.keys(original), &System.delete_env/1) + :ok + end + + test "from_env reads common provider env vars and semantic scholar aliases" do + System.put_env("OPENAI_API_KEY", "openai-key") + System.put_env("GEMINI_API_KEY", "gemini-key") + System.put_env("SEMANTIC_SCHOLAR_API_KEY", "s2-key") + + keys = KeyManager.from_env() + + assert keys.openai == "openai-key" + assert keys.gemini == "gemini-key" + assert keys.semantic_scholar == "s2-key" + end + + test "from_env prefers SEMANTIC_SCHOLAR_KEY over fallback aliases" do + System.put_env("SEMANTIC_SCHOLAR_KEY", "primary-key") + System.put_env("SEMANTIC_SCHOLAR_API_KEY", "secondary-key") + System.put_env("S2_API_KEY", "tertiary-key") + + keys = KeyManager.from_env() + + assert keys.semantic_scholar == "primary-key" + end +end diff --git a/elixir/denario_ex/test/literature_workflow_resilience_test.exs b/elixir/denario_ex/test/literature_workflow_resilience_test.exs new file mode 100644 index 00000000..15d7cbc1 --- /dev/null +++ b/elixir/denario_ex/test/literature_workflow_resilience_test.exs @@ -0,0 +1,59 @@ +defmodule DenarioEx.LiteratureWorkflowResilienceTest do + use ExUnit.Case, async: true + + alias DenarioEx + alias DenarioExWorkflowsTest.FakeClient + + defmodule RateLimitedSemanticScholarClient do + @behaviour DenarioEx.SemanticScholarClient + + @impl true + def search(_query, _keys, _opts) do + {:error, {:semantic_scholar_http_error, 429, %{"message" => "Too Many Requests"}}} + end + end + + defmodule FailingFallbackClient do + @behaviour DenarioEx.SemanticScholarClient + + @impl true + def search(_query, _keys, _opts) do + {:error, {:openalex_http_error, 503, %{"message" => "Unavailable"}}} + end + end + + setup do + project_dir = + Path.join( + System.tmp_dir!(), + "denario_ex_lit_resilience_#{System.unique_integer([:positive])}" + ) + + on_exit(fn -> File.rm_rf(project_dir) end) + {:ok, project_dir: project_dir} + end + + test "check_idea degrades cleanly when Semantic Scholar rate limits the request", %{ + project_dir: project_dir + } do + assert {:ok, denario} = DenarioEx.new(project_dir: project_dir, clear_project_dir: true) + + assert {:ok, denario} = + DenarioEx.set_data_description(denario, "Synthetic anomaly-score study.") + + assert {:ok, denario} = DenarioEx.set_idea(denario, "Interpret synthetic anomaly scores.") + + assert {:ok, denario} = + DenarioEx.check_idea( + denario, + client: FakeClient, + semantic_scholar_client: RateLimitedSemanticScholarClient, + fallback_literature_client: FailingFallbackClient, + llm: "openai:gpt-4.1-mini", + max_iterations: 2 + ) + + assert String.contains?(denario.research.literature, "Idea literature search unavailable") + assert denario.research.literature_sources == [] + end +end diff --git a/elixir/denario_ex/test/req_llm_client_test.exs b/elixir/denario_ex/test/req_llm_client_test.exs new file mode 100644 index 00000000..f5ffa758 --- /dev/null +++ b/elixir/denario_ex/test/req_llm_client_test.exs @@ -0,0 +1,81 @@ +defmodule DenarioEx.ReqLLMClientTest do + use ExUnit.Case, async: true + + import ExUnit.CaptureLog + + alias DenarioEx.ReqLLMClient + alias ReqLLM.Message + alias ReqLLM.Provider.Options + alias ReqLLM.Providers.OpenAI + alias ReqLLM.Response + alias ReqLLM.ToolCall + + test "extract_object_from_response returns structured output from ReqLLM tool calls" do + response = %Response{ + id: "resp_test", + model: "gpt-4.1-mini-2025-04-14", + context: %ReqLLM.Context{messages: []}, + message: %Message{ + role: :assistant, + content: [], + tool_calls: [ + ToolCall.new("call_test", "structured_output", ~s({"approved":true,"feedback":"ok"})) + ], + metadata: %{} + }, + object: nil, + stream?: false, + stream: nil, + usage: nil, + finish_reason: :tool_calls, + provider_meta: %{}, + error: nil + } + + assert {:ok, %{"approved" => true, "feedback" => "ok"}} = + ReqLLMClient.extract_object_from_response(response) + end + + test "build_generation_opts uses max_completion_tokens for openai models" do + opts = + ReqLLMClient.build_generation_opts( + model: "openai:gpt-4.1-mini", + max_output_tokens: 321, + temperature: 0.2 + ) + + assert opts[:max_completion_tokens] == 321 + refute Keyword.has_key?(opts, :max_tokens) + assert opts[:provider_options][:openai_structured_output_mode] == :json_schema + end + + test "build_generation_opts keeps max_tokens for non-openai models" do + opts = + ReqLLMClient.build_generation_opts( + model: "anthropic:claude-sonnet-4-5", + max_output_tokens: 321, + temperature: 0.2 + ) + + assert opts[:max_tokens] == 321 + refute Keyword.has_key?(opts, :max_completion_tokens) + end + + test "options processing does not synthesize max_tokens when max_completion_tokens is present" do + {:ok, model} = ReqLLM.model("openai:gpt-4.1-mini") + + log = + capture_log(fn -> + assert {:ok, processed} = + Options.process(OpenAI, :object, model, + max_completion_tokens: 123, + operation: :object + ) + + assert processed[:max_completion_tokens] == 123 + refute Keyword.has_key?(processed, :max_tokens) + end) + + refute log =~ "Renamed :max_tokens to :max_completion_tokens" + end +end diff --git a/elixir/denario_ex/test/test_helper.exs b/elixir/denario_ex/test/test_helper.exs new file mode 100644 index 00000000..869559e7 --- /dev/null +++ b/elixir/denario_ex/test/test_helper.exs @@ -0,0 +1 @@ +ExUnit.start() diff --git a/elixir/denario_ex/test/text_test.exs b/elixir/denario_ex/test/text_test.exs new file mode 100644 index 00000000..2baa05a9 --- /dev/null +++ b/elixir/denario_ex/test/text_test.exs @@ -0,0 +1,15 @@ +defmodule DenarioEx.TextTest do + use ExUnit.Case, async: true + + alias DenarioEx.Text + + test "extract_block_or_fallback returns wrapped content when block markers exist" do + assert {:ok, "hello"} = + Text.extract_block_or_fallback("\\begin{SUMMARY}hello\\end{SUMMARY}", "SUMMARY") + end + + test "extract_block_or_fallback returns cleaned raw text when block markers are missing" do + assert {:ok, "plain summary text"} = + Text.extract_block_or_fallback("plain summary text", "SUMMARY") + end +end From b7ff05c75058a4c12f70446d0d1182d317c8b7ae Mon Sep 17 00:00:00 2001 From: jmanhype Date: Tue, 17 Mar 2026 14:12:42 -0500 Subject: [PATCH 2/6] Prepare v1.0.1-rc1 --- .gitignore | 1 + denario/__init__.py | 4 ++++ denario/_compat.py | 35 +++++++++++++++++++++++++++++++++++ denario/denario.py | 14 +++++++++++--- tests/test_compat.py | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 denario/_compat.py create mode 100644 tests/test_compat.py diff --git a/.gitignore b/.gitignore index 85ad384d..c0b5961d 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,7 @@ cover/ # Django stuff: *.log +erl_crash.dump local_settings.py db.sqlite3 db.sqlite3-journal diff --git a/denario/__init__.py b/denario/__init__.py index e9b1de40..5efb0734 100644 --- a/denario/__init__.py +++ b/denario/__init__.py @@ -1,3 +1,7 @@ +from ._compat import patch_mistralai_for_cmbagent + +patch_mistralai_for_cmbagent() + from .denario import Denario, Research, Journal, LLM, models, KeyManager from .config import REPO_DIR diff --git a/denario/_compat.py b/denario/_compat.py new file mode 100644 index 00000000..b7811daa --- /dev/null +++ b/denario/_compat.py @@ -0,0 +1,35 @@ +"""Compatibility helpers for third-party dependencies.""" + +from __future__ import annotations + +import importlib + + +def patch_mistralai_for_cmbagent() -> None: + """Expose legacy top-level Mistral symbols expected by ``cmbagent``. + + ``cmbagent`` imports ``Mistral`` and ``DocumentURLChunk`` from the top-level + ``mistralai`` module, while newer ``mistralai`` releases place them under + nested modules. Patch those attributes in when needed so Denario keeps + working across both layouts. + """ + + try: + mistralai = importlib.import_module("mistralai") + except Exception: + return + + try: + if not hasattr(mistralai, "Mistral"): + mistral_sdk = importlib.import_module("mistralai.client.sdk") + mistralai.Mistral = mistral_sdk.Mistral + + if not hasattr(mistralai, "DocumentURLChunk"): + document_module = importlib.import_module( + "mistralai.client.models.documenturlchunk" + ) + mistralai.DocumentURLChunk = document_module.DocumentURLChunk + except Exception: + # Leave the environment unchanged if the dependency moves again; + # downstream imports will surface the real error in that case. + return diff --git a/denario/denario.py b/denario/denario.py index aa1dd710..41e160bc 100644 --- a/denario/denario.py +++ b/denario/denario.py @@ -755,8 +755,14 @@ def get_results(self, self.research.results = experiment.results self.research.plot_paths = experiment.plot_paths - # move plots to the plots folder in input_files/plots - ## Clearing the folder + # Move plots to the plots folder in input_files/plots. + # Some cmbagent runs return plot paths but leave the destination missing; + # guard against that so plots always land in a directory, never a file path. + if os.path.isfile(self.plots_folder): + os.remove(self.plots_folder) + os.makedirs(self.plots_folder, exist_ok=True) + + # Clear any previous plot outputs. if os.path.exists(self.plots_folder): for file in os.listdir(self.plots_folder): file_path = os.path.join(self.plots_folder, file) @@ -765,7 +771,9 @@ def get_results(self, elif os.path.isdir(file_path): shutil.rmtree(file_path) for plot_path in self.research.plot_paths: - shutil.move(plot_path, self.plots_folder) + if os.path.exists(plot_path): + destination = os.path.join(self.plots_folder, os.path.basename(plot_path)) + shutil.move(plot_path, destination) # Write results to file results_path = os.path.join(self.project_dir, INPUT_FILES, RESULTS_FILE) diff --git a/tests/test_compat.py b/tests/test_compat.py new file mode 100644 index 00000000..3899aea2 --- /dev/null +++ b/tests/test_compat.py @@ -0,0 +1,41 @@ +import importlib.util +import unittest +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import patch + + +def load_compat_module(): + module_path = Path(__file__).resolve().parents[1] / "denario" / "_compat.py" + spec = importlib.util.spec_from_file_location("denario_compat", module_path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module + + +class CompatTests(unittest.TestCase): + def test_patch_mistralai_for_cmbagent_adds_legacy_top_level_symbols(self): + compat = load_compat_module() + fake_mistralai = SimpleNamespace() + fake_sdk = SimpleNamespace(Mistral=object()) + fake_document = SimpleNamespace(DocumentURLChunk=object()) + + modules = { + "mistralai": fake_mistralai, + "mistralai.client.sdk": fake_sdk, + "mistralai.client.models.documenturlchunk": fake_document, + } + + def fake_import_module(name): + return modules[name] + + with patch.object(compat.importlib, "import_module", side_effect=fake_import_module): + compat.patch_mistralai_for_cmbagent() + + self.assertIs(fake_mistralai.Mistral, fake_sdk.Mistral) + self.assertIs(fake_mistralai.DocumentURLChunk, fake_document.DocumentURLChunk) + + +if __name__ == "__main__": + unittest.main() From 64996b926b8744909a06254e74efa0e468adef41 Mon Sep 17 00:00:00 2001 From: jmanhype Date: Tue, 17 Mar 2026 14:18:12 -0500 Subject: [PATCH 3/6] Add authorship confirmation gate for paper generation --- README.md | 8 +++ denario/__init__.py | 3 +- denario/config.py | 1 + denario/denario.py | 119 ++++++++++++++++++++++++++++++++-- denario/exceptions.py | 2 + denario/research.py | 5 ++ docs/get_started.md | 8 +++ tests/test_authorship_gate.py | 44 +++++++++++++ 8 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 denario/exceptions.py create mode 100644 tests/test_authorship_gate.py diff --git a/README.md b/README.md index 76da1306..4e2d7eba 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,14 @@ With the methodology setup, perform the required computations and get the plots den.get_results() ``` +Before paper generation, Denario now requires explicit human sign-off that the generated artifacts were reviewed and that a human accepts authorship responsibility. + +```python +den.confirm_authorship( + "I checked the claims against the results, reviewed the citations, and rewrote the sections I will stand behind." +) +``` + Finally, generate a latex article with the results. You can specify the journal style, in this example we choose the [APS (Physical Review Journals)](https://journals.aps.org/) style. ```python diff --git a/denario/__init__.py b/denario/__init__.py index 5efb0734..df2ec36a 100644 --- a/denario/__init__.py +++ b/denario/__init__.py @@ -4,8 +4,9 @@ from .denario import Denario, Research, Journal, LLM, models, KeyManager from .config import REPO_DIR +from .exceptions import AuthorshipConfirmationError -__all__ = ['Denario', 'Research', 'Journal', 'REPO_DIR', 'LLM', "models", "KeyManager"] +__all__ = ['Denario', 'Research', 'Journal', 'REPO_DIR', 'LLM', "models", "KeyManager", "AuthorshipConfirmationError"] from importlib.metadata import version, PackageNotFoundError diff --git a/denario/config.py b/denario/config.py index 1590b132..3222943c 100644 --- a/denario/config.py +++ b/denario/config.py @@ -23,3 +23,4 @@ RESULTS_FILE = "results.md" LITERATURE_FILE = "literature.md" REFEREE_FILE = "referee.md" +AUTHORSHIP_CONFIRMATION_FILE = "authorship_confirmation.md" diff --git a/denario/denario.py b/denario/denario.py index 41e160bc..8d2dd59c 100644 --- a/denario/denario.py +++ b/denario/denario.py @@ -3,11 +3,12 @@ import time import os import shutil +from datetime import datetime, UTC from pathlib import Path from PIL import Image import cmbagent -from .config import DEFAUL_PROJECT_NAME, INPUT_FILES, PLOTS_FOLDER, DESCRIPTION_FILE, IDEA_FILE, METHOD_FILE, RESULTS_FILE, LITERATURE_FILE +from .config import DEFAUL_PROJECT_NAME, INPUT_FILES, PLOTS_FOLDER, DESCRIPTION_FILE, IDEA_FILE, METHOD_FILE, RESULTS_FILE, LITERATURE_FILE, AUTHORSHIP_CONFIRMATION_FILE from .research import Research from .key_manager import KeyManager from .llm import LLM, models @@ -18,6 +19,7 @@ from .paper_agents.agents_graph import build_graph from .utils import llm_parser, input_check, check_file_paths, in_notebook from .langgraph_agents.agents_graph import build_lg_graph +from .exceptions import AuthorshipConfirmationError from cmbagent import preprocess_task class Denario: @@ -57,6 +59,9 @@ def __init__(self, self.project_dir = project_dir self.plots_folder = os.path.join(self.project_dir, INPUT_FILES, PLOTS_FOLDER) + self.authorship_confirmation_path = os.path.join( + self.project_dir, INPUT_FILES, AUTHORSHIP_CONFIRMATION_FILE + ) # Ensure the folder exists os.makedirs(self.plots_folder, exist_ok=True) @@ -92,20 +97,100 @@ def reset(self) -> None: def setter(self, field: str | None, file: str) -> str: """Base method for setting the content of idea, method or results.""" + path = os.path.join(self.project_dir, INPUT_FILES, file) + previous_value = None + if os.path.exists(path): + with open(path, 'r') as f: + previous_value = f.read() + if field is None: try: - with open(os.path.join(self.project_dir, INPUT_FILES, file), 'r') as f: + with open(path, 'r') as f: field = f.read() except FileNotFoundError: raise FileNotFoundError("Please provide an input string or path to a markdown file.") field = input_check(field) - with open(os.path.join(self.project_dir, INPUT_FILES, file), 'w') as f: + with open(path, 'w') as f: f.write(field) + if previous_value is not None and previous_value != field: + self._invalidate_authorship_confirmation() + return field + def _invalidate_authorship_confirmation(self) -> None: + self.research.authorship_confirmation = "" + if os.path.exists(self.authorship_confirmation_path): + os.remove(self.authorship_confirmation_path) + + def _load_authorship_confirmation(self) -> str: + if self.research.authorship_confirmation: + return self.research.authorship_confirmation + + try: + with open(self.authorship_confirmation_path, 'r') as f: + self.research.authorship_confirmation = f.read() + except FileNotFoundError: + self.research.authorship_confirmation = "" + + return self.research.authorship_confirmation + + def confirm_authorship( + self, + summary: str, + *, + reviewed_claims: bool = True, + reviewed_citations: bool = True, + accepts_responsibility: bool = True, + ) -> str: + """Record explicit human sign-off before paper generation. + + Args: + summary: Brief description of what the human reviewed or rewrote. + reviewed_claims: Confirm that claims were checked against the artifacts. + reviewed_citations: Confirm that citations/references were checked. + accepts_responsibility: Confirm that a human accepts authorship responsibility. + """ + + summary = input_check(summary).strip() + if not summary: + raise ValueError("Please provide a non-empty authorship review summary.") + if not reviewed_claims or not reviewed_citations or not accepts_responsibility: + raise ValueError( + "Authorship confirmation requires claims review, citation review, and responsibility acceptance." + ) + + confirmed_at = datetime.now(UTC).replace(microsecond=0).isoformat() + confirmation = ( + "# Authorship Confirmation\n\n" + f"Confirmed at: {confirmed_at}\n\n" + "- Reviewed claims: yes\n" + "- Reviewed citations: yes\n" + "- Accepts human authorship responsibility: yes\n\n" + "## Review Summary\n\n" + f"{summary}\n" + ) + + with open(self.authorship_confirmation_path, 'w') as f: + f.write(confirmation) + + self.research.authorship_confirmation = confirmation + print(f"Authorship confirmation written to: {self.authorship_confirmation_path}") + return confirmation + + def _require_authorship_confirmation(self) -> str: + confirmation = self._load_authorship_confirmation() + if confirmation.strip(): + return confirmation + + raise AuthorshipConfirmationError( + "Paper generation requires explicit human sign-off. " + "Review the generated artifacts, then call " + "confirm_authorship(summary=...) before get_paper()." + ) + def set_data_description(self, data_description: str | None = None) -> None: """ Set the description of the data and tools to be used by the agents. @@ -136,6 +221,8 @@ def set_results(self, results: str | None = None) -> None: def set_plots(self, plots: list[str] | list[Image.Image] | None = None) -> None: """Manually set the plots from their path.""" + provided_plots = plots is not None + if plots is None: plots = [str(p) for p in (Path(self.project_dir) / "input_files" / "Plots").glob("*.png")] @@ -150,6 +237,9 @@ def set_plots(self, plots: list[str] | list[Image.Image] | None = None) -> None: img.save( os.path.join(self.project_dir, INPUT_FILES, PLOTS_FOLDER, plot_name) ) + if provided_plots: + self._invalidate_authorship_confirmation() + def set_all(self) -> None: """Set all Research fields if present in the working directory""" @@ -260,6 +350,7 @@ def enhance_data_description(self, print(f"Enhanced text from file length: {len(enhanced_text)}") # Update the research object with enhanced text + previous_data_description = self.research.data_description self.research.data_description = enhanced_text # Create the input_files directory if it doesn't exist @@ -273,6 +364,9 @@ def enhance_data_description(self, # set the enhanced text to the research object self.research.data_description = enhanced_text + + if previous_data_description != enhanced_text: + self._invalidate_authorship_confirmation() print(f"Enhanced text written to: {os.path.join(input_files_dir, DESCRIPTION_FILE)}") @@ -361,6 +455,7 @@ def get_idea_cmagent(self, f.write(idea) self.idea = idea + self._invalidate_authorship_confirmation() def get_idea_fast(self, llm: LLM | str = models["gemini-2.0-flash"], @@ -410,6 +505,7 @@ def get_idea_fast(self, minutes = int(elapsed_time // 60) seconds = int(elapsed_time % 60) print(f"Idea generated in {minutes} min {seconds} sec.") + self._invalidate_authorship_confirmation() def check_idea(self, mode : str = 'semantic_scholar', @@ -634,6 +730,7 @@ def get_method_cmbagent(self, method_path = os.path.join(self.project_dir, INPUT_FILES, METHOD_FILE) with open(method_path, 'w') as f: f.write(methododology) + self._invalidate_authorship_confirmation() def get_method_fast(self, llm: LLM | str = models["gemini-2.0-flash"], @@ -684,6 +781,7 @@ def get_method_fast(self, minutes = int(elapsed_time // 60) seconds = int(elapsed_time % 60) print(f"Methods generated in {minutes} min {seconds} sec.") + self._invalidate_authorship_confirmation() def get_results(self, involved_agents: List[str] = ['engineer', 'researcher'], @@ -779,6 +877,7 @@ def get_results(self, results_path = os.path.join(self.project_dir, INPUT_FILES, RESULTS_FILE) with open(results_path, 'w') as f: f.write(self.research.results) + self._invalidate_authorship_confirmation() def get_keywords(self, input_text: str, n_keywords: int = 5, kw_type: str = 'unesco') -> None: """ @@ -803,6 +902,7 @@ def get_paper(self, writer: str = 'scientist', cmbagent_keywords: bool = False, add_citations=True, + require_authorship_confirmation: bool = True, ) -> None: """ Generate a full paper based on the files in input_files: @@ -828,7 +928,11 @@ def get_paper(self, writer: set the style and tone to write. E.g. astrophysicist, biologist, chemist cmbagent_keywords: whether to use CMBAgent to select the keywords add_citations: whether to add citations to the paper or not + require_authorship_confirmation: require explicit human review before paper writing """ + + if require_authorship_confirmation: + self._require_authorship_confirmation() # Start timer start_time = time.time() @@ -929,4 +1033,11 @@ def research_pilot(self, data_description: str | None = None) -> None: self.get_idea() self.get_method() self.get_results() - self.get_paper() + try: + self.get_paper() + except AuthorshipConfirmationError as exc: + print(exc) + print( + "Denario stopped before paper generation. Review the artifacts, call " + "confirm_authorship(summary=...), then rerun get_paper()." + ) diff --git a/denario/exceptions.py b/denario/exceptions.py new file mode 100644 index 00000000..ead6bd17 --- /dev/null +++ b/denario/exceptions.py @@ -0,0 +1,2 @@ +class AuthorshipConfirmationError(RuntimeError): + """Raised when paper generation is attempted without human sign-off.""" diff --git a/denario/research.py b/denario/research.py index d8ba4b48..963761d4 100644 --- a/denario/research.py +++ b/denario/research.py @@ -11,6 +11,11 @@ class Research(BaseModel): """The methodology of the project.""" results: str = Field(default="", description="The results of the project") """The results of the project.""" + authorship_confirmation: str = Field( + default="", + description="Human confirmation that the generated artifacts were reviewed before paper writing.", + ) + """Human confirmation that generated artifacts were reviewed before paper writing.""" plot_paths: List[str] = Field(default_factory=list, description="The plot paths of the project") """The plot paths of the project.""" keywords: Dict[str, str] | list = Field(default_factory=dict, description="The keywords describing the project") diff --git a/docs/get_started.md b/docs/get_started.md index 09bb720b..4cd986ce 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -46,6 +46,14 @@ With the methodology setup, perform the required computations and get the plots den.get_results() ``` +Before paper generation, explicitly confirm that a human reviewed the outputs and accepts authorship responsibility. + +```python +den.confirm_authorship( + "I checked the claims against the results, reviewed the citations, and rewrote the sections I will stand behind." +) +``` + Finally, generate a latex article with the results. You can specify the journal style, in this example we choose the [APS (Physical Review Journals)](https://journals.aps.org/) style. ```python diff --git a/tests/test_authorship_gate.py b/tests/test_authorship_gate.py new file mode 100644 index 00000000..262d9301 --- /dev/null +++ b/tests/test_authorship_gate.py @@ -0,0 +1,44 @@ +import tempfile +import unittest +from pathlib import Path + +from denario import AuthorshipConfirmationError, Denario +from denario.config import AUTHORSHIP_CONFIRMATION_FILE, INPUT_FILES + + +class AuthorshipGateTests(unittest.TestCase): + def test_get_paper_requires_authorship_confirmation_before_running(self): + with tempfile.TemporaryDirectory() as tmpdir: + den = Denario(project_dir=tmpdir) + den.set_data_description("Dataset description") + den.set_idea("Research idea") + den.set_method("Methodology") + den.set_results("Results") + + with self.assertRaises(AuthorshipConfirmationError): + den.get_paper() + + def test_confirmation_is_written_and_invalidated_on_artifact_change(self): + with tempfile.TemporaryDirectory() as tmpdir: + den = Denario(project_dir=tmpdir) + den.set_data_description("Dataset description") + den.set_idea("Research idea") + den.set_method("Methodology") + den.set_results("Results") + + den.confirm_authorship( + "Reviewed claims, checked citations, and rewrote the abstract/results framing." + ) + + confirmation_path = Path(tmpdir) / INPUT_FILES / AUTHORSHIP_CONFIRMATION_FILE + self.assertTrue(confirmation_path.exists()) + self.assertIn("Reviewed claims: yes", confirmation_path.read_text()) + + den.set_results("Updated results after manual review") + + self.assertFalse(confirmation_path.exists()) + self.assertEqual(den.research.authorship_confirmation, "") + + +if __name__ == "__main__": + unittest.main() From 202d5f28246f1fc1f6ef0ed9c3aea93d6df4eaab Mon Sep 17 00:00:00 2001 From: jmanhype Date: Tue, 17 Mar 2026 14:31:13 -0500 Subject: [PATCH 4/6] Add researcher statement artifact for paper framing --- README.md | 10 ++++++++ denario/config.py | 1 + denario/denario.py | 17 +++++++++++-- denario/paper_agents/parameters.py | 2 ++ denario/paper_agents/prompts.py | 31 ++++++++++++++++++++++ denario/paper_agents/reader.py | 6 ++--- denario/research.py | 5 ++++ docs/get_started.md | 10 ++++++++ tests/test_researcher_statement.py | 41 ++++++++++++++++++++++++++++++ 9 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 tests/test_researcher_statement.py diff --git a/README.md b/README.md index 4e2d7eba..408f58ed 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,14 @@ Generate a research idea from that data specification. den.get_idea() ``` +Optionally, give Denario a short researcher statement so the paper-writing stages preserve your framing and priorities rather than defaulting only to generic scientific tone. + +```python +den.set_researcher_statement( + "Emphasize robustness and measurement limits. Do not overclaim causality." +) +``` + Generate the methodology required for working on that idea. ```python @@ -118,6 +126,8 @@ You can also manually provide any info as a string or markdown file in an interm den.set_method(path_to_the_method_file.md) ``` +You can also provide a `researcher_statement.md` artifact through `set_researcher_statement(...)` if you want the paper-writing stages to preserve a particular stance, emphasis, or constraint. + ## DenarioApp You can run Denario using a GUI through the [DenarioApp](https://github.com/AstroPilot-AI/DenarioApp). diff --git a/denario/config.py b/denario/config.py index 3222943c..73fd7e28 100644 --- a/denario/config.py +++ b/denario/config.py @@ -18,6 +18,7 @@ PAPER_FOLDER = "paper" DESCRIPTION_FILE = "data_description.md" +RESEARCHER_STATEMENT_FILE = "researcher_statement.md" IDEA_FILE = "idea.md" METHOD_FILE = "methods.md" RESULTS_FILE = "results.md" diff --git a/denario/denario.py b/denario/denario.py index 8d2dd59c..c82688ef 100644 --- a/denario/denario.py +++ b/denario/denario.py @@ -8,7 +8,7 @@ from PIL import Image import cmbagent -from .config import DEFAUL_PROJECT_NAME, INPUT_FILES, PLOTS_FOLDER, DESCRIPTION_FILE, IDEA_FILE, METHOD_FILE, RESULTS_FILE, LITERATURE_FILE, AUTHORSHIP_CONFIRMATION_FILE +from .config import DEFAUL_PROJECT_NAME, INPUT_FILES, PLOTS_FOLDER, DESCRIPTION_FILE, RESEARCHER_STATEMENT_FILE, IDEA_FILE, METHOD_FILE, RESULTS_FILE, LITERATURE_FILE, AUTHORSHIP_CONFIRMATION_FILE from .research import Research from .key_manager import KeyManager from .llm import LLM, models @@ -203,6 +203,13 @@ def set_data_description(self, data_description: str | None = None) -> None: check_file_paths(self.research.data_description) + def set_researcher_statement(self, researcher_statement: str | None = None) -> None: + """Set the user's framing, stance, or non-negotiable perspective for paper writing.""" + + self.research.researcher_statement = self.setter( + researcher_statement, RESEARCHER_STATEMENT_FILE + ) + def set_idea(self, idea: str | None = None) -> None: """Manually set an idea, either directly from a string or providing the path of a markdown file with the idea.""" @@ -245,6 +252,7 @@ def set_all(self) -> None: for setter in ( self.set_data_description, + self.set_researcher_statement, self.set_idea, self.set_method, self.set_results, @@ -275,9 +283,14 @@ def show_data_description(self) -> None: def show_idea(self) -> None: """Show the provided or generated idea by the `set_idea` or `get_idea` methods.""" - + self.printer(self.research.idea) + def show_researcher_statement(self) -> None: + """Show the provided researcher statement.""" + + self.printer(self.research.researcher_statement) + def show_method(self) -> None: """Show the provided or generated methods by `set_method` or `get_method`.""" diff --git a/denario/paper_agents/parameters.py b/denario/paper_agents/parameters.py index e8b6f507..61229119 100644 --- a/denario/paper_agents/parameters.py +++ b/denario/paper_agents/parameters.py @@ -24,6 +24,7 @@ class PAPER(TypedDict): # Class for Input/Output files class FILES(TypedDict): Folder: str #name of the project file + ResearcherStatement: str #name of the file containing the user's research framing Idea: str #name of the file containing the project idea Methods: str #name of the file containing the methods Results: str #name of the file containing the results @@ -43,6 +44,7 @@ class FILES(TypedDict): # Idea class class IDEA(TypedDict): + ResearcherStatement: str | None Idea: str Methods: str Results: str diff --git a/denario/paper_agents/prompts.py b/denario/paper_agents/prompts.py index f8c208fa..c402bab7 100644 --- a/denario/paper_agents/prompts.py +++ b/denario/paper_agents/prompts.py @@ -1,6 +1,19 @@ from langchain_core.messages import HumanMessage, SystemMessage +def researcher_statement_context(state): + statement = state['idea'].get('ResearcherStatement') + if not statement: + return "" + + return rf""" +Researcher statement: +{statement} + +Use this statement to preserve the researcher's framing, emphasis, caveats, and perspective. Follow it when it is compatible with the actual methods and results, and do not invent support that is not present in the artifacts. +""" + + def idea_prompt(topic): return [ SystemMessage(content='''You are a scientist and your role is to generate a groundbreaking idea for a PhD student thesis.'''), @@ -83,6 +96,8 @@ def abstract_prompt(state, attempt): Results: {state['idea']['Results']} +{researcher_statement_context(state)} + **Respond in exactly this format** ```json @@ -106,6 +121,8 @@ def abstract_reflection(state): Results: {state['idea']['Results']} +{researcher_statement_context(state)} + Previous abstract: {state['paper']['Abstract']} @@ -142,6 +159,8 @@ def introduction_prompt(state): Paper methods: {state['idea']['Methods']} +{researcher_statement_context(state)} + Please respond in this format: \\begin{{Introduction}} @@ -180,6 +199,8 @@ def introduction_reflection(state): Paper methods: {state['idea']['Methods']} +{researcher_statement_context(state)} + Previous paper introduction: {state['paper']['Introduction']} @@ -220,6 +241,8 @@ def methods_prompt(state): Short description of paper methods: {state['idea']['Methods']} +{researcher_statement_context(state)} + Respond in this format: \begin{{Methods}} @@ -260,6 +283,8 @@ def results_prompt(state): Paper short results: {state['idea']['Results']} +{researcher_statement_context(state)} + Respond in this format: \begin{{Results}} @@ -333,6 +358,8 @@ def conclusions_prompt(state): Results: {state['paper']['Results']} +{researcher_statement_context(state)} + Follow these guidelines: - Write in LaTex - Briefly describe what is the problem and how this paper tries to solve it @@ -597,6 +624,8 @@ def cmbagent_keywords_prompt(state): Methods: {state['idea']['Methods']} + +{researcher_statement_context(state)} """ def keyword_prompt(state): @@ -630,6 +659,8 @@ def keyword_prompt(state): Methods: {state['idea']['Methods']} +{researcher_statement_context(state)} + AAS keywords list: diff --git a/denario/paper_agents/reader.py b/denario/paper_agents/reader.py index e7dfcb8c..576fd3fb 100644 --- a/denario/paper_agents/reader.py +++ b/denario/paper_agents/reader.py @@ -10,7 +10,7 @@ from .parameters import GraphState from .latex_presets import journal_dict -from ..config import INPUT_FILES, IDEA_FILE, METHOD_FILE, RESULTS_FILE, PAPER_FOLDER, PLOTS_FOLDER, LaTeX_DIR +from ..config import INPUT_FILES, RESEARCHER_STATEMENT_FILE, IDEA_FILE, METHOD_FILE, RESULTS_FILE, PAPER_FOLDER, PLOTS_FOLDER, LaTeX_DIR def preprocess_node(state: GraphState, config: RunnableConfig): @@ -50,6 +50,7 @@ def preprocess_node(state: GraphState, config: RunnableConfig): # set the name of the other files state['files'] = {**state['files'], + "ResearcherStatement": f"{RESEARCHER_STATEMENT_FILE}", "Idea": f"{IDEA_FILE}", #name of file containing idea description "Methods": f"{METHOD_FILE}", #name of file with methods description "Results": f"{RESULTS_FILE}", #name of file with results description @@ -70,7 +71,7 @@ def preprocess_node(state: GraphState, config: RunnableConfig): # read input files idea = {} - for key in ["Idea", "Methods", "Results"]: + for key in ["ResearcherStatement", "Idea", "Methods", "Results"]: path = Path(f"{state['files']['Folder']}/{INPUT_FILES}/{state['files'][key]}") if path.exists(): with path.open("r", encoding="utf-8") as f: @@ -160,4 +161,3 @@ def preprocess_node(state: GraphState, config: RunnableConfig): "paper": {**state['paper'], "summary": ""}, "time": state['time'], } - diff --git a/denario/research.py b/denario/research.py index 963761d4..f3dc75f4 100644 --- a/denario/research.py +++ b/denario/research.py @@ -5,6 +5,11 @@ class Research(BaseModel): """Research class.""" data_description: str = Field(default="", description="The data description of the project") """The data description of the project.""" + researcher_statement: str = Field( + default="", + description="The user's framing, stance, and perspective for the paper-writing stages.", + ) + """The user's framing, stance, and perspective for the paper-writing stages.""" idea: str = Field(default="", description="The idea of the project") """The idea of the project.""" methodology: str = Field(default="", description="The methodology of the project") diff --git a/docs/get_started.md b/docs/get_started.md index 4cd986ce..a1d3df0a 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -21,6 +21,14 @@ Generate a research idea from that data specification. den.get_idea() ``` +Optionally, give Denario a short researcher statement so the paper-writing stages preserve your framing and priorities rather than defaulting only to generic scientific tone. + +```python +den.set_researcher_statement( + "Emphasize robustness and measurement limits. Do not overclaim causality." +) +``` + This will trigger a planning and control workflow to design the ide. For a faster method you can use: ```python @@ -67,3 +75,5 @@ You can also manually provide any info as a string or markdown file in an interm ```python den.set_method(path_to_the_method_file.md) ``` + +You can also provide a `researcher_statement.md` artifact through `set_researcher_statement(...)` if you want the paper-writing stages to preserve a particular stance, emphasis, or constraint. diff --git a/tests/test_researcher_statement.py b/tests/test_researcher_statement.py new file mode 100644 index 00000000..a375aaa5 --- /dev/null +++ b/tests/test_researcher_statement.py @@ -0,0 +1,41 @@ +import tempfile +import unittest + +from denario import Denario +from denario.paper_agents.prompts import abstract_prompt + + +class ResearcherStatementTests(unittest.TestCase): + def test_set_researcher_statement_persists_on_disk(self): + with tempfile.TemporaryDirectory() as tmpdir: + den = Denario(project_dir=tmpdir) + den.set_researcher_statement( + "Prioritize robustness over novelty and avoid causal language." + ) + + reloaded = Denario(project_dir=tmpdir) + self.assertEqual( + reloaded.research.researcher_statement, + "Prioritize robustness over novelty and avoid causal language.", + ) + + def test_paper_prompts_include_researcher_statement_when_present(self): + state = { + "writer": "scientist", + "idea": { + "ResearcherStatement": "Keep the framing conservative and emphasize reproducibility.", + "Idea": "Idea text", + "Methods": "Method text", + "Results": "Result text", + }, + "paper": {"Abstract": ""}, + } + + prompt = abstract_prompt(state, 1)[1].content + + self.assertIn("Researcher statement:", prompt) + self.assertIn("Keep the framing conservative", prompt) + + +if __name__ == "__main__": + unittest.main() From 00bfa4c61d20a5f96c026a88344e1af8f9969536 Mon Sep 17 00:00:00 2001 From: jmanhype Date: Tue, 17 Mar 2026 14:43:19 -0500 Subject: [PATCH 5/6] Add branching workflow for comparing ideas and methods --- README.md | 16 ++ denario/config.py | 5 + denario/denario.py | 270 ++++++++++++++++++++++++++++++- denario/research.py | 4 + docs/get_started.md | 16 ++ tests/test_branching_workflow.py | 80 +++++++++ 6 files changed, 390 insertions(+), 1 deletion(-) create mode 100644 tests/test_branching_workflow.py diff --git a/README.md b/README.md index 408f58ed..d5a00614 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,20 @@ den.set_researcher_statement( ) ``` +If you want to compare multiple directions before converging, generate candidate branches and build a comparison template: + +```python +den.generate_idea_branches(count=3) +den.build_idea_comparison( + criteria=[ + "Novelty", + "Feasibility with this dataset", + "Fit to the intended paper contribution", + ] +) +den.select_idea_candidate(2) +``` + Generate the methodology required for working on that idea. ```python @@ -128,6 +142,8 @@ den.set_method(path_to_the_method_file.md) You can also provide a `researcher_statement.md` artifact through `set_researcher_statement(...)` if you want the paper-writing stages to preserve a particular stance, emphasis, or constraint. +Similarly, `generate_idea_branches(...)`, `generate_method_branches(...)`, `build_idea_comparison(...)`, `build_method_comparison(...)`, `select_idea_candidate(...)`, and `select_method_candidate(...)` support a compare-before-converge workflow. + ## DenarioApp You can run Denario using a GUI through the [DenarioApp](https://github.com/AstroPilot-AI/DenarioApp). diff --git a/denario/config.py b/denario/config.py index 73fd7e28..78c14e8b 100644 --- a/denario/config.py +++ b/denario/config.py @@ -20,8 +20,13 @@ DESCRIPTION_FILE = "data_description.md" RESEARCHER_STATEMENT_FILE = "researcher_statement.md" IDEA_FILE = "idea.md" +IDEA_CANDIDATES_FILE = "idea_candidates.md" +IDEA_COMPARISON_FILE = "idea_comparison.md" METHOD_FILE = "methods.md" +METHOD_CANDIDATES_FILE = "method_candidates.md" +METHOD_COMPARISON_FILE = "method_comparison.md" RESULTS_FILE = "results.md" LITERATURE_FILE = "literature.md" REFEREE_FILE = "referee.md" AUTHORSHIP_CONFIRMATION_FILE = "authorship_confirmation.md" +BRANCH_WORKSPACES_DIR = "branch_workspaces" diff --git a/denario/denario.py b/denario/denario.py index c82688ef..850295ad 100644 --- a/denario/denario.py +++ b/denario/denario.py @@ -3,12 +3,13 @@ import time import os import shutil +import re from datetime import datetime, UTC from pathlib import Path from PIL import Image import cmbagent -from .config import DEFAUL_PROJECT_NAME, INPUT_FILES, PLOTS_FOLDER, DESCRIPTION_FILE, RESEARCHER_STATEMENT_FILE, IDEA_FILE, METHOD_FILE, RESULTS_FILE, LITERATURE_FILE, AUTHORSHIP_CONFIRMATION_FILE +from .config import DEFAUL_PROJECT_NAME, INPUT_FILES, PLOTS_FOLDER, DESCRIPTION_FILE, RESEARCHER_STATEMENT_FILE, IDEA_FILE, IDEA_CANDIDATES_FILE, IDEA_COMPARISON_FILE, METHOD_FILE, METHOD_CANDIDATES_FILE, METHOD_COMPARISON_FILE, RESULTS_FILE, LITERATURE_FILE, AUTHORSHIP_CONFIRMATION_FILE, BRANCH_WORKSPACES_DIR from .research import Research from .key_manager import KeyManager from .llm import LLM, models @@ -191,6 +192,121 @@ def _require_authorship_confirmation(self) -> str: "confirm_authorship(summary=...) before get_paper()." ) + def _candidate_file_for_kind(self, kind: str) -> str: + if kind == "idea": + return IDEA_CANDIDATES_FILE + if kind == "method": + return METHOD_CANDIDATES_FILE + raise ValueError("Candidate kind must be either 'idea' or 'method'.") + + def _comparison_file_for_kind(self, kind: str) -> str: + if kind == "idea": + return IDEA_COMPARISON_FILE + if kind == "method": + return METHOD_COMPARISON_FILE + raise ValueError("Comparison kind must be either 'idea' or 'method'.") + + def _candidate_attr_for_kind(self, kind: str) -> str: + if kind == "idea": + return "idea_candidates" + if kind == "method": + return "method_candidates" + raise ValueError("Candidate kind must be either 'idea' or 'method'.") + + def _selected_setter_for_kind(self, kind: str): + if kind == "idea": + return self.set_idea + if kind == "method": + return self.set_method + raise ValueError("Candidate kind must be either 'idea' or 'method'.") + + def _ensure_data_description_loaded(self) -> str: + if not self.research.data_description: + self.set_data_description() + return self.research.data_description + + def _ensure_idea_loaded(self) -> str: + if not self.research.idea: + self.set_idea() + return self.research.idea + + def _serialize_candidates(self, kind: str, candidates: list[str]) -> str: + kind_title = "Idea" if kind == "idea" else "Method" + sections = [f"# {kind_title} Candidates", ""] + for index, candidate in enumerate(candidates, start=1): + sections.extend( + [ + f"## Candidate {index}", + "", + candidate.strip(), + "", + ] + ) + return "\n".join(sections).strip() + "\n" + + def _parse_candidates(self, text: str) -> list[str]: + matches = re.findall( + r"(?ms)^## Candidate \d+\s*\n(.*?)(?=^## Candidate \d+\s*\n|\Z)", + text.strip(), + ) + candidates = [match.strip() for match in matches if match.strip()] + if candidates: + return candidates + text = text.strip() + return [text] if text else [] + + def _set_candidates(self, kind: str, candidates: list[str] | str | None) -> list[str]: + path = os.path.join( + self.project_dir, INPUT_FILES, self._candidate_file_for_kind(kind) + ) + + if candidates is None: + with open(path, 'r') as f: + text = f.read() + parsed = self._parse_candidates(text) + elif isinstance(candidates, str): + parsed = self._parse_candidates(input_check(candidates)) + else: + parsed = [] + for candidate in candidates: + cleaned = input_check(candidate).strip() + if cleaned: + parsed.append(cleaned) + + if not parsed: + raise ValueError(f"No {kind} candidates were provided.") + + with open(path, 'w') as f: + f.write(self._serialize_candidates(kind, parsed)) + + setattr(self.research, self._candidate_attr_for_kind(kind), parsed) + return parsed + + def _load_candidates(self, kind: str) -> list[str]: + attr = self._candidate_attr_for_kind(kind) + cached = getattr(self.research, attr) + if cached: + return cached + + path = os.path.join( + self.project_dir, INPUT_FILES, self._candidate_file_for_kind(kind) + ) + with open(path, 'r') as f: + parsed = self._parse_candidates(f.read()) + setattr(self.research, attr, parsed) + return parsed + + def _branch_workspace_root(self, kind: str) -> Path: + folder = "ideas" if kind == "idea" else "methods" + return Path(self.project_dir) / BRANCH_WORKSPACES_DIR / folder + + def _prepare_branch_runner(self, branch_dir: Path): + runner = self.__class__(project_dir=str(branch_dir), clear_project_dir=True) + runner.set_data_description(self._ensure_data_description_loaded()) + if self.research.researcher_statement: + runner.set_researcher_statement(self.research.researcher_statement) + return runner + def set_data_description(self, data_description: str | None = None) -> None: """ Set the description of the data and tools to be used by the agents. @@ -210,6 +326,146 @@ def set_researcher_statement(self, researcher_statement: str | None = None) -> N researcher_statement, RESEARCHER_STATEMENT_FILE ) + def set_idea_candidates(self, idea_candidates: list[str] | str | None = None) -> None: + """Persist multiple idea candidates for later comparison and selection.""" + + self.research.idea_candidates = self._set_candidates("idea", idea_candidates) + + def set_method_candidates(self, method_candidates: list[str] | str | None = None) -> None: + """Persist multiple methodology candidates for later comparison and selection.""" + + self.research.method_candidates = self._set_candidates("method", method_candidates) + + def generate_idea_branches(self, count: int = 3, **kwargs) -> list[str]: + """Generate multiple idea branches without overwriting the selected idea.""" + + if count < 2: + raise ValueError("Idea branching requires at least 2 candidates.") + + branch_root = self._branch_workspace_root("idea") + if branch_root.exists(): + shutil.rmtree(branch_root) + branch_root.mkdir(parents=True, exist_ok=True) + + candidates: list[str] = [] + for index in range(1, count + 1): + branch_dir = branch_root / f"idea_branch_{index:02d}" + runner = self._prepare_branch_runner(branch_dir) + runner.get_idea(**kwargs) + if not runner.research.idea: + runner.set_idea() + candidates.append(runner.research.idea) + + self.set_idea_candidates(candidates) + self.build_idea_comparison() + return candidates + + def generate_method_branches(self, count: int = 3, **kwargs) -> list[str]: + """Generate multiple methodology branches using the currently selected idea.""" + + if count < 2: + raise ValueError("Method branching requires at least 2 candidates.") + + selected_idea = self._ensure_idea_loaded() + + branch_root = self._branch_workspace_root("method") + if branch_root.exists(): + shutil.rmtree(branch_root) + branch_root.mkdir(parents=True, exist_ok=True) + + candidates: list[str] = [] + for index in range(1, count + 1): + branch_dir = branch_root / f"method_branch_{index:02d}" + runner = self._prepare_branch_runner(branch_dir) + runner.set_idea(selected_idea) + runner.get_method(**kwargs) + if not runner.research.methodology: + runner.set_method() + candidates.append(runner.research.methodology) + + self.set_method_candidates(candidates) + self.build_method_comparison() + return candidates + + def build_idea_comparison(self, criteria: list[str] | None = None) -> str: + """Write a human-first comparison template for idea branches.""" + + return self._build_comparison("idea", criteria=criteria) + + def build_method_comparison(self, criteria: list[str] | None = None) -> str: + """Write a human-first comparison template for method branches.""" + + return self._build_comparison("method", criteria=criteria) + + def _build_comparison(self, kind: str, criteria: list[str] | None = None) -> str: + candidates = self._load_candidates(kind) + kind_title = "Idea" if kind == "idea" else "Method" + criteria = criteria or [ + "Novelty or differentiation", + "Feasibility with the available data and tools", + "Clarity and scientific defensibility", + "Fit with the intended paper contribution", + ] + + lines = [ + f"# {kind_title} Comparison", + "", + f"Use this file to compare candidate {kind.lower()} branches before selecting one.", + "", + "## Criteria", + "", + ] + lines.extend([f"- {criterion}" for criterion in criteria]) + lines.extend(["", "## Decision", "", "- Selected candidate: ", "- Why: ", ""]) + + for index, candidate in enumerate(candidates, start=1): + lines.extend( + [ + f"## Candidate {index}", + "", + "### Strengths", + "", + "### Risks", + "", + "### Notes", + "", + "### Candidate Text", + "", + candidate.strip(), + "", + ] + ) + + comparison = "\n".join(lines).strip() + "\n" + path = os.path.join( + self.project_dir, INPUT_FILES, self._comparison_file_for_kind(kind) + ) + with open(path, 'w') as f: + f.write(comparison) + return comparison + + def select_idea_candidate(self, index: int) -> str: + """Select one idea candidate as the active idea artifact.""" + + return self._select_candidate("idea", index) + + def select_method_candidate(self, index: int) -> str: + """Select one methodology candidate as the active methods artifact.""" + + return self._select_candidate("method", index) + + def _select_candidate(self, kind: str, index: int) -> str: + candidates = self._load_candidates(kind) + if index < 1 or index > len(candidates): + raise IndexError( + f"{kind.title()} candidate index must be between 1 and {len(candidates)}." + ) + + candidate = candidates[index - 1] + setter = self._selected_setter_for_kind(kind) + setter(candidate) + return candidate + def set_idea(self, idea: str | None = None) -> None: """Manually set an idea, either directly from a string or providing the path of a markdown file with the idea.""" @@ -253,7 +509,9 @@ def set_all(self) -> None: for setter in ( self.set_data_description, self.set_researcher_statement, + self.set_idea_candidates, self.set_idea, + self.set_method_candidates, self.set_method, self.set_results, self.set_plots, @@ -291,6 +549,16 @@ def show_researcher_statement(self) -> None: self.printer(self.research.researcher_statement) + def show_idea_candidates(self) -> None: + """Show the stored idea candidates.""" + + self.printer(self._serialize_candidates("idea", self._load_candidates("idea"))) + + def show_method_candidates(self) -> None: + """Show the stored method candidates.""" + + self.printer(self._serialize_candidates("method", self._load_candidates("method"))) + def show_method(self) -> None: """Show the provided or generated methods by `set_method` or `get_method`.""" diff --git a/denario/research.py b/denario/research.py index f3dc75f4..6bf1476e 100644 --- a/denario/research.py +++ b/denario/research.py @@ -10,8 +10,12 @@ class Research(BaseModel): description="The user's framing, stance, and perspective for the paper-writing stages.", ) """The user's framing, stance, and perspective for the paper-writing stages.""" + idea_candidates: List[str] = Field(default_factory=list, description="Candidate idea branches for comparison") + """Candidate idea branches for comparison.""" idea: str = Field(default="", description="The idea of the project") """The idea of the project.""" + method_candidates: List[str] = Field(default_factory=list, description="Candidate method branches for comparison") + """Candidate method branches for comparison.""" methodology: str = Field(default="", description="The methodology of the project") """The methodology of the project.""" results: str = Field(default="", description="The results of the project") diff --git a/docs/get_started.md b/docs/get_started.md index a1d3df0a..52b471cd 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -29,6 +29,20 @@ den.set_researcher_statement( ) ``` +If you want to compare multiple directions before converging, generate candidate branches and build a comparison template: + +```python +den.generate_idea_branches(count=3) +den.build_idea_comparison( + criteria=[ + "Novelty", + "Feasibility with this dataset", + "Fit to the intended paper contribution", + ] +) +den.select_idea_candidate(2) +``` + This will trigger a planning and control workflow to design the ide. For a faster method you can use: ```python @@ -77,3 +91,5 @@ den.set_method(path_to_the_method_file.md) ``` You can also provide a `researcher_statement.md` artifact through `set_researcher_statement(...)` if you want the paper-writing stages to preserve a particular stance, emphasis, or constraint. + +Similarly, `generate_idea_branches(...)`, `generate_method_branches(...)`, `build_idea_comparison(...)`, `build_method_comparison(...)`, `select_idea_candidate(...)`, and `select_method_candidate(...)` support a compare-before-converge workflow. diff --git a/tests/test_branching_workflow.py b/tests/test_branching_workflow.py new file mode 100644 index 00000000..d8de3e6f --- /dev/null +++ b/tests/test_branching_workflow.py @@ -0,0 +1,80 @@ +import tempfile +import unittest +from pathlib import Path + +from denario import Denario +from denario.config import ( + IDEA_CANDIDATES_FILE, + IDEA_COMPARISON_FILE, + IDEA_FILE, + INPUT_FILES, + METHOD_CANDIDATES_FILE, + METHOD_COMPARISON_FILE, + METHOD_FILE, +) + + +class FakeBranchDenario(Denario): + def get_idea(self, **kwargs) -> None: # type: ignore[override] + idea = f"Idea from {Path(self.project_dir).name}" + self.research.idea = idea + with open(Path(self.project_dir) / INPUT_FILES / IDEA_FILE, "w") as f: + f.write(idea) + + def get_method(self, **kwargs) -> None: # type: ignore[override] + method = f"Method from {Path(self.project_dir).name} using {self.research.idea}" + self.research.methodology = method + with open(Path(self.project_dir) / INPUT_FILES / METHOD_FILE, "w") as f: + f.write(method) + + +class BranchingWorkflowTests(unittest.TestCase): + def test_generate_idea_branches_persists_candidates_and_comparison(self): + with tempfile.TemporaryDirectory() as tmpdir: + den = FakeBranchDenario(project_dir=tmpdir) + den.set_data_description("Dataset description") + + candidates = den.generate_idea_branches(count=3) + + self.assertEqual(len(candidates), 3) + self.assertEqual(den.research.idea, "") + self.assertTrue( + (Path(tmpdir) / INPUT_FILES / IDEA_CANDIDATES_FILE).exists() + ) + self.assertTrue( + (Path(tmpdir) / INPUT_FILES / IDEA_COMPARISON_FILE).exists() + ) + + def test_select_idea_candidate_promotes_candidate_to_primary_artifact(self): + with tempfile.TemporaryDirectory() as tmpdir: + den = Denario(project_dir=tmpdir) + den.set_idea_candidates(["Idea A", "Idea B"]) + + selected = den.select_idea_candidate(2) + + self.assertEqual(selected, "Idea B") + self.assertEqual(den.research.idea, "Idea B") + self.assertEqual((Path(tmpdir) / INPUT_FILES / IDEA_FILE).read_text(), "Idea B") + + def test_generate_method_branches_uses_selected_idea_and_round_trips(self): + with tempfile.TemporaryDirectory() as tmpdir: + den = FakeBranchDenario(project_dir=tmpdir) + den.set_data_description("Dataset description") + den.set_idea("Chosen idea") + + candidates = den.generate_method_branches(count=2) + + self.assertEqual(len(candidates), 2) + self.assertTrue( + (Path(tmpdir) / INPUT_FILES / METHOD_CANDIDATES_FILE).exists() + ) + self.assertTrue( + (Path(tmpdir) / INPUT_FILES / METHOD_COMPARISON_FILE).exists() + ) + + reloaded = Denario(project_dir=tmpdir) + self.assertEqual(len(reloaded.research.method_candidates), 2) + + +if __name__ == "__main__": + unittest.main() From 4a673b87413bcf5ee5ea563e7cf81720eef7dc16 Mon Sep 17 00:00:00 2001 From: jmanhype Date: Tue, 17 Mar 2026 15:33:48 -0500 Subject: [PATCH 6/6] Align package version with dignity beta line --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b964b1fe..4dfdd70c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "denario" -version = "1.0.1" +version = "1.1.0b6" description = "Modular Multi-Agent System for Scientific Research Assistance" authors = [ { name = "Pablo Villanueva-Domingo" }, @@ -41,7 +41,7 @@ Repository = "https://github.com/AstroPilot-AI/Denario" denario = "denario.cli:main" [project.optional-dependencies] -app = ["denario_app>=1.0.0"] +app = ["denario_app>=1.1.0b1"] docs = [ "mkdocs", "mkdocstrings[python]",