From da29ca7353a52b2f4fce62fbfbc8d95f9fca23c4 Mon Sep 17 00:00:00 2001
From: aritraroy24 <aritraroy24@gmail.com>
Date: Thu, 2 Apr 2026 10:25:31 +0100
Subject: [PATCH 1/3] docs: update citations, readme and add API key guide for
 v0.1.6

---
 .gitignore                            |   8 +-
 CHANGELOG.md                          |  14 ++
 CITATION.cff                          |  19 +-
 README.md                             |  17 +-
 docs/about/changelog.md               |  22 +-
 docs/about/citation.md                |  17 +-
 docs/getting-started/api-key-guide.md | 322 ++++++++++++++++++++++++++
 7 files changed, 395 insertions(+), 24 deletions(-)
 create mode 100644 docs/getting-started/api-key-guide.md

diff --git a/.gitignore b/.gitignore
index a1b8848a..a3f4f2a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -180,4 +180,10 @@ cython_debug/
 examples/db/10.*
 tests example/
 
-applications
\ No newline at end of file
+applications
+vlm_test
+examples/vlm_piezo_test
+
+# Claude files
+CLAUDE.md
+.claude
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index faeeea1e..34351bd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,12 @@
+## [0.1.6] - 02-04-2026
+### Changed
+- Updated [README.md](README.md), [CITATION.cff](CITATION.cff) and docs with the published version (advance article) of the ComProScanner paper in _Digital Discovery_ as fully open access:
+  - [ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature](https://doi.org/10.1039/D5DD00521C) 
+
+### Added
+- Guide for API key creation for various LLM providers and publisher APIs added to the documentation at `docs/getting-started/api-key-guide.md` with detailed instructions for each provider.
+
+---
 ## [0.1.5] - 08-02-2026
 
 ### Added
@@ -69,6 +78,7 @@
 
 - README badges section converted from HTML to markdown format for better compatibility across platforms.
 
+---
 ## [0.1.4] - 02-12-2025
 
 ### Added
@@ -104,6 +114,7 @@
   - [ComProScanner Logo](https://raw.githubusercontent.com/aritraroy24/ComProScanner/main/assets/comproscanner_logo.png)
   - [ComProScanner Workflow](https://raw.githubusercontent.com/aritraroy24/ComProScanner/main/assets/overall_workflow.png)
 
+---
 ## [0.1.3] - 04-11-2025
 
 ### Fixed
@@ -112,6 +123,7 @@
   - Changed from `from langchain.text_splitter import RecursiveCharacterTextSplitter`
   - To `from langchain.text_splitter.recursive_character import RecursiveCharacterTextSplitter`
 
+---
 ## [0.1.2] - 24-10-2025
 
 ### Added
@@ -119,6 +131,7 @@
 - Link to ComProScanner preprint on arXiv in the documentation index page and README.md:
   - [arXiv:2510.20362](https://arxiv.org/abs/2510.20362)
 
+---
 ## [0.1.1] - 22-10-2025
 
 ### Fixed
@@ -127,6 +140,7 @@
   - [ComProScanner Logo](https://i.ibb.co/whHSbGvT/comproscanner-logo.png)
   - [ComProScanner Workflow](https://i.ibb.co/QWd2qd3/overall-workflow.png)
 
+---
 ## [0.1.0] - 22-10-2025
 
 ### Added
diff --git a/CITATION.cff b/CITATION.cff
index 723c2600..56b4a26c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -16,7 +16,7 @@ contact:
   - family-names: Roy
     given-names: Aritra
     orcid: "https://orcid.org/0000-0002-4928-2935"
-message: If you use this software, please cite our article on arXiv.
+message: If you use this software, please cite our article in Digital Discovery.
 preferred-citation:
   authors:
     - family-names: Roy
@@ -31,21 +31,28 @@ preferred-citation:
     - family-names: Gattinoni
       given-names: Chiara
       orcid: "https://orcid.org/0000-0002-3376-6374"
-  date-published: 2025-10-23
+  doi: "10.1039/D5DD00521C"
   identifiers:
+    - type: doi
+      value: "10.1039/D5DD00521C"
+      description: "Peer-reviewed article"
     - type: other
       value: "arXiv:2510.20362"
       description: "arXiv preprint"
-  title: "ComProScanner: A multi-agent based framework for composition-property structured data extraction from scientific literature"
+  journal: "Digital Discovery"
+  publisher:
+    name: "RSC"
+  status: advance-online
+  title: "ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature"
   type: article
-  url: "https://arxiv.org/abs/2510.20362"
+  url: "https://doi.org/10.1039/D5DD00521C"
 repository-code: "https://github.com/slimeslab/ComProScanner"
 license: MIT
 title: "ComProScanner: A multi-agent based framework for composition-property structured data extraction from scientific literature"
 type: software
 url: "https://slimeslab.github.io/ComProScanner/"
-version: "0.1.4"
-date-released: 2025-12-03
+version: "0.1.6"
+date-released: 2026-04-02
 keywords:
   - materials science
   - data extraction
diff --git a/README.md b/README.md
index 2d5a4e05..b1609a5a 100644
--- a/README.md
+++ b/README.md
@@ -169,14 +169,15 @@ eval_visualizer.plot_multiple_radar_charts(
 If you use ComProScanner in your research, please cite:
 
 ```bibtex
-@misc{roy2025comproscannermultiagentbasedframework,
-      title={ComProScanner: A multi-agent based framework for composition-property structured data extraction from scientific literature},
-      author={Aritra Roy and Enrico Grisan and John Buckeridge and Chiara Gattinoni},
-      year={2025},
-      eprint={2510.20362},
-      archivePrefix={arXiv},
-      primaryClass={physics.comp-ph},
-      url={https://arxiv.org/abs/2510.20362},
+@Article{roy2026comproscannermultiagentbasedframework,
+author ="Roy, Aritra and Grisan, Enrico and Buckeridge, John and Gattinoni, Chiara",
+title  ="ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature",
+journal  ="Digital Discovery",
+year  ="2026",
+pages  ="Accepted",
+publisher  ="RSC",
+doi  ="10.1039/D5DD00521C",
+url  ="https://doi.org/10.1039/D5DD00521C"
 }
 ```
 
diff --git a/docs/about/changelog.md b/docs/about/changelog.md
index eb6852fa..0770dc03 100644
--- a/docs/about/changelog.md
+++ b/docs/about/changelog.md
@@ -1,6 +1,16 @@
-## Unreleased
+## [0.1.6] - 02-04-2026
+### Changed
+- Updated [README.md](README.md), [CITATION.cff](CITATION.cff) and docs with the published version (advance article) of the ComProScanner paper in _Digital Discovery_ as fully open access:
+  - [ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature](https://doi.org/10.1039/D5DD00521C) 
+
+### Added
+- Guide for API key creation for various LLM providers and publisher APIs added to the documentation at `docs/getting-started/api-key-guide.md` with detailed instructions for each provider.
+
+---
+## [0.1.5] - 08-02-2026
 
 ### Added
+- Data related to comparison with other agentic data extraction frameworks added for the ComProScanner paper in the `examples/piezo_test/comparing_existing_frameworks` folder.
 
 - New parameter `apply_advanced_cleaning` added to data cleaning methods in `data_cleaner.py`. When set to `True`, it triggers the advanced cleaning pipeline.
 
@@ -38,6 +48,11 @@
 - [CITATION.cff](https://github.com/slimeslab/ComProScanner/blob/main/CITATION.cff) added for standardized citation information based on the latest release and arXiv preprint.
 
 ### Fixed
+- OAWorks API is replaced with OpenAlex API as OAWorks is no longer available.
+
+- Empty/corrupted PDF handled in `pdf_processor.py` and `wiley_processor.py` to avoid having GLYPH errors during text extraction.
+
+- Data extraction failures fixed if composition-property text data is empty.
 
 - CSV progress tracking in `elsevier_processor.py`:
 
@@ -63,6 +78,7 @@
 
 - README badges section converted from HTML to markdown format for better compatibility across platforms.
 
+---
 ## [0.1.4] - 02-12-2025
 
 ### Added
@@ -96,6 +112,7 @@
 
 - README images updated with raw GitHub links for better reliability: [ComProScanner Logo](https://raw.githubusercontent.com/aritraroy24/ComProScanner/main/assets/comproscanner_logo.png), [ComProScanner Workflow](https://raw.githubusercontent.com/aritraroy24/ComProScanner/main/assets/overall_workflow.png)
 
+---
 ## [0.1.3] - 04-11-2025
 
 ### Fixed
@@ -104,18 +121,21 @@
   - Changed from `from langchain.text_splitter import RecursiveCharacterTextSplitter`
   - To `from langchain.text_splitter.recursive_character import RecursiveCharacterTextSplitter`
 
+---
 ## [0.1.2] - 24-10-2025
 
 ### Added
 
 - Link to ComProScanner preprint on arXiv in the documentation index page and README.md: [arXiv:2510.20362](https://arxiv.org/abs/2510.20362)
 
+---
 ## [0.1.1] - 22-10-2025
 
 ### Fixed
 
 - README images updated with external image link to fix PyPI rendering issue. [ComProScanner Logo](https://i.ibb.co/whHSbGvT/comproscanner-logo.png), [ComProScanner Workflow](https://i.ibb.co/QWd2qd3/overall-workflow.png)
 
+---
 ## [0.1.0] - 22-10-2025
 
 ### Added
diff --git a/docs/about/citation.md b/docs/about/citation.md
index b0d3be68..977b3882 100644
--- a/docs/about/citation.md
+++ b/docs/about/citation.md
@@ -3,13 +3,14 @@
 If you use ComProScanner in your research, please cite our related paper:
 
 ```bibtex
-@misc{roy2025comproscannermultiagentbasedframework,
-      title={ComProScanner: A multi-agent based framework for composition-property structured data extraction from scientific literature},
-      author={Aritra Roy and Enrico Grisan and John Buckeridge and Chiara Gattinoni},
-      year={2025},
-      eprint={2510.20362},
-      archivePrefix={arXiv},
-      primaryClass={physics.comp-ph},
-      url={https://arxiv.org/abs/2510.20362},
+@Article{roy2026comproscannermultiagentbasedframework,
+author ="Roy, Aritra and Grisan, Enrico and Buckeridge, John and Gattinoni, Chiara",
+title  ="ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature",
+journal  ="Digital Discovery",
+year  ="2026",
+pages  ="Accepted",
+publisher  ="RSC",
+doi  ="10.1039/D5DD00521C",
+url  ="https://doi.org/10.1039/D5DD00521C"
 }
 ```
diff --git a/docs/getting-started/api-key-guide.md b/docs/getting-started/api-key-guide.md
new file mode 100644
index 00000000..0cb974ba
--- /dev/null
+++ b/docs/getting-started/api-key-guide.md
@@ -0,0 +1,322 @@
+# API Key Guide
+
+This page explains which provider credentials ComProScanner can use, what each one is used for, and how to generate or obtain them.
+
+## Overview
+
+ComProScanner can work with three groups of external providers:
+
+!!! important "Which credentials do you actually need?"
+
+    | Provider group | Requirement level |
+    | --- | --- |
+    | **Publisher/content providers for article access** | **Optional** for manual or local workflows, but **required** for automated article retrieval. |
+    | **LLM providers for extraction, vision models and RAG** | **At least one required** for extraction, vision models and RAG workflows. However, default models are different for extraction/RAG and vision-language models. |
+    | **Default embedding provider for vector database creation** | **Required** unless you configure a custom embedding provider. |
+
+Use only the providers relevant to your workflow. You do not need every key.
+
+## Publisher Providers
+
+### Elsevier / Scopus
+
+Environment variable: `SCOPUS_API_KEY`
+
+Used for:
+
+- Scopus-based metadata retrieval
+- Elsevier article retrieval in XML format
+
+How to get it:
+
+1. Create or sign in to your [Elsevier developer account](https://dev.elsevier.com/).
+2. Open the [API key management area](https://dev.elsevier.com/apikey/manage).
+3. Create a key for Scopus or content APIs.
+4. Copy the generated key into your `.env` file as `SCOPUS_API_KEY`.
+
+```bash
+SCOPUS_API_KEY=your_scopus_api_key
+```
+
+### Springer Nature Open Access API
+
+Environment variable: `SPRINGER_OPENACCESS_API_KEY`
+
+Used for:
+
+- Springer Open Access article retrieval in XML format
+
+How to get it:
+
+1. Create or sign in to your [Springer Nature account](https://dev.springernature.com/).
+2. Fill up the form to request an Open Access API key at [https://dev.springernature.com/register/](https://dev.springernature.com/register/).
+3. Get the Open Access API key from the [Springer Nature API management page](https://datasolutions.springernature.com/account/api-management/).
+4. Copy the key into your `.env` file.
+
+```bash
+SPRINGER_OPENACCESS_API_KEY=your_springer_openaccess_api_key
+```
+
+### Springer Nature TDM API
+
+Environment variable: `SPRINGER_TDM_API_KEY`
+
+Used for:
+
+- Springer subscription article retrieval in XML format
+
+How to get it:
+
+1. Subscribe to the Springer Nature TDM service via [https://dev.springernature.com/subscription/](https://dev.springernature.com/subscription/) and select the appropriate access level based on your institution and use case.
+2. Copy the issued TDM key or token into your `.env` file.
+
+```bash
+SPRINGER_TDM_API_KEY=your_springer_tdm_api_key
+```
+
+### Wiley TDM API
+
+Environment variable: `WILEY_API_KEY`
+
+Used for:
+
+- Wiley full-text article download as PDF
+
+How to get it:
+
+1. Create your [Wiley account](https://onlinelibrary.wiley.com/action/registration).
+2. Login to your Wiley account at [https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining](https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining) under the "**Get a Text and Data Mining Token**" section.
+3. Accept the terms and conditions to generate your API token.
+4. Copy the API token into your `.env` file.
+
+```bash
+WILEY_API_KEY=your_wiley_api_key
+```
+
+### IOP Publishing
+
+Environment variable: `IOP_papers_path` (*not an API key but a required path variable for processing IOP Science XML files*)
+
+Used for:
+
+- Local processing of IOP Science XML files downloaded in bulk
+
+How to get it:
+
+1. Email [contentsupport@ioppublishing.org](mailto:contentsupport@ioppublishing.org) to request bulk access to the IOP Science XML files, typically through SFTP as IOP Publishing does not provide direct API access for bulk downloads.
+2. Once you have access, download the XML files to a local directory.
+3. Set `IOP_papers_path` to the absolute local folder path containing all the downloaded files.
+
+```bash
+IOP_papers_path=/absolute/path/to/iop_papers
+```
+
+## LLM Providers
+
+These providers can be used for extraction models, RAG chat models, and vision-language models where supported by your configuration.
+
+### OpenAI
+
+Environment variable: `OPENAI_API_KEY`
+
+Typical model prefixes: `openai/...` or OpenAI model names directly
+
+How to get it:
+
+1. Create or sign in to your [OpenAI account](https://platform.openai.com/).
+2. Open the [API keys section](https://platform.openai.com/api-keys).
+3. Create a new secret key.
+4. Store it in `.env`.
+
+```bash
+OPENAI_API_KEY=your_openai_api_key
+```
+
+### Google Gemini
+
+Environment variable: `GEMINI_API_KEY`
+
+Typical model prefixes: `gemini/...`
+
+How to get it:
+
+1. Create or sign in to your [Google AI Studio account](https://aistudio.google.com/).
+2. Generate an API key from the [Gemini API key page](https://aistudio.google.com/app/apikey).
+3. Store it in `.env` as `GEMINI_API_KEY`.
+
+```bash
+GEMINI_API_KEY=your_gemini_api_key
+```
+
+### Anthropic
+
+Environment variable: `ANTHROPIC_API_KEY`
+
+Typical model prefixes: `anthropic/...`
+
+How to get it:
+
+1. Create or sign in to your [Anthropic Console account](https://console.anthropic.com/).
+2. Create a new API key from the [Anthropic keys page](https://console.anthropic.com/settings/keys).
+3. Store it in `.env`.
+
+```bash
+ANTHROPIC_API_KEY=your_anthropic_api_key
+```
+
+### DeepSeek
+
+Environment variable: `DEEPSEEK_API_KEY`
+
+Typical model prefixes: `deepseek/...`
+
+How to get it:
+
+1. Create or sign in to your [DeepSeek platform account](https://platform.deepseek.com/).
+2. Generate an API key from the [DeepSeek API keys page](https://platform.deepseek.com/api_keys).
+3. Store it in `.env`.
+
+```bash
+DEEPSEEK_API_KEY=your_deepseek_api_key
+```
+
+### OpenRouter
+
+Environment variable: `OPENROUTER_API_KEY`
+
+Typical model prefixes: `openrouter/...`
+
+How to get it:
+
+1. Create or sign in to your [OpenRouter account](https://openrouter.ai/).
+2. Generate an API key from the [OpenRouter keys page](https://openrouter.ai/keys).
+3. Store it in `.env`.
+
+```bash
+OPENROUTER_API_KEY=your_openrouter_api_key
+```
+
+### Together AI
+
+Environment variable: `TOGETHER_API_KEY`
+
+Typical model prefixes: `together_ai/...`
+
+How to get it:
+
+1. Create or sign in to your [Together AI account](https://www.together.ai/).
+2. Generate an API key from the [Together AI API keys page](https://api.together.ai/settings/api-keys).
+3. Store it in `.env`.
+
+```bash
+TOGETHER_API_KEY=your_together_api_key
+```
+
+### Cohere
+
+Environment variable: `COHERE_API_KEY`
+
+Typical model prefixes: `cohere/...`
+
+How to get it:
+
+1. Create or sign in to your [Cohere account](https://dashboard.cohere.com/).
+2. Create an API key from the [Cohere API keys page](https://dashboard.cohere.com/api-keys).
+3. Store it in `.env`.
+
+```bash
+COHERE_API_KEY=your_cohere_api_key
+```
+
+### Fireworks AI
+
+Environment variable: `FIREWORKS_API_KEY`
+
+Typical model prefixes: `fireworks_ai/...`
+
+How to get it:
+
+1. Create or sign in to your [Fireworks AI account](https://fireworks.ai/).
+2. Generate an API key from the [Fireworks AI API keys page](https://app.fireworks.ai/settings/users/api-keys).
+3. Store it in `.env`.
+
+```bash
+FIREWORKS_API_KEY=your_fireworks_api_key
+```
+
+### Ollama
+
+Environment variable: none required
+
+Used for:
+
+- Local model inference
+
+How to set it up:
+
+1. Install Ollama from the [main Ollama website](https://ollama.com/).
+2. Pull the model you want to use by following the [Ollama library and setup docs](https://ollama.com/library).
+3. Set `base_url` or `rag_base_url` if needed, such as `http://localhost:11434`.
+
+## Default Embedding Provider
+
+### Hugging Face
+
+Environment variable: `HF_TOKEN`
+
+Used for:
+
+- Accessing the default Hugging Face embedding model workflow
+- Accessing gated or rate-limited Hugging Face models
+- Optional embedding/model downloads when required
+
+How to get it:
+
+1. Create or sign in to your [Hugging Face account](https://huggingface.co/).
+2. Open the [access tokens page](https://huggingface.co/settings/tokens).
+3. Create a new token with the required permissions.
+4. Store it in `.env`.
+
+```bash
+HF_TOKEN=your_huggingface_token
+```
+
+## Recommended `.env` Template
+
+Use the subset you need:
+
+```bash
+# Publisher providers
+SCOPUS_API_KEY=your_scopus_api_key
+SPRINGER_OPENACCESS_API_KEY=your_springer_openaccess_api_key
+SPRINGER_TDM_API_KEY=your_springer_tdm_api_key
+WILEY_API_KEY=your_wiley_api_key
+IOP_papers_path=/absolute/path/to/iop_papers
+
+# LLM providers
+OPENAI_API_KEY=your_openai_api_key
+GEMINI_API_KEY=your_gemini_api_key
+ANTHROPIC_API_KEY=your_anthropic_api_key
+DEEPSEEK_API_KEY=your_deepseek_api_key
+OPENROUTER_API_KEY=your_openrouter_api_key
+TOGETHER_API_KEY=your_together_api_key
+COHERE_API_KEY=your_cohere_api_key
+FIREWORKS_API_KEY=your_fireworks_api_key
+
+# Model and embedding access
+HF_TOKEN=your_huggingface_token
+```
+
+## Notes
+
+- Keep all keys in your local `.env` file and never commit them to version control.
+- For most users, the minimum setup is one publisher source plus one LLM provider.
+- If you use Gemini models, use `GEMINI_API_KEY`.
+- If you use the default embedding setup, make sure `HF_TOKEN` is available.
+
+## Related Pages
+
+- [Installation](installation.md)
+- [Article Processing](../usage/article-processing.md)
+- [Data Extraction](../usage/data-extraction.md)
+- [RAG Configuration](../rag-config.md)

From 5516bd792860139c8bff420d8b2196d0da40d25c Mon Sep 17 00:00:00 2001
From: aritraroy24 <aritraroy24@gmail.com>
Date: Thu, 2 Apr 2026 10:26:04 +0100
Subject: [PATCH 2/3] chore: bump version to 0.1.6

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3c6085be..3c07ded7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "comproscanner"
-version = "0.1.5"
+version = "0.1.6"
 description = "Multi-agent system for extracting and processing structured composition-property data from scientific literature"
 readme = "README.md"
 authors = [{ name = "Aritra Roy", email = "contact@aritraroy.live" }]

From 2df5f0a2d8d7318726e6bed80cad034dc3763ba0 Mon Sep 17 00:00:00 2001
From: aritraroy24 <aritraroy24@gmail.com>
Date: Thu, 2 Apr 2026 11:11:17 +0100
Subject: [PATCH 3/3] fix: standardize model prefixes in rag_tool and update
 changelog

---
 .gitignore                                     |  4 ----
 CHANGELOG.md                                   | 18 +++++++++++-------
 docs/about/changelog.md                        | 14 +++++++-------
 docs/getting-started/api-key-guide.md          | 11 ++++++-----
 docs/rag-config.md                             |  6 +++---
 .../extract_flow/tools/rag_tool.py             | 10 +++++-----
 6 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/.gitignore b/.gitignore
index d3db6400..b031c1a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -186,10 +186,6 @@ applications
 vlm_test
 examples/vlm_piezo_test
 
-# Claude files
-CLAUDE.md
-.claude
-
 # Test results
 db
 results
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23b298d0..3695ada2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,7 +24,7 @@
 - `process_articles()` now routes user-provided `doi_list` by `general_publisher` from metadata and sends each DOI only to its matching source processor.
 
 ---
-## [0.1.6] - 02-04-2026
+## [0.1.6] - 2026-04-02
 ### Changed
 - Updated [README.md](README.md), [CITATION.cff](CITATION.cff) and docs with the published version (advance article) of the ComProScanner paper in _Digital Discovery_ as fully open access:
   - [ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature](https://doi.org/10.1039/D5DD00521C) 
@@ -32,8 +32,12 @@
 ### Added
 - Guide for API key creation for various LLM providers and publisher APIs added to the documentation at `docs/getting-started/api-key-guide.md` with detailed instructions for each provider.
 
+### Fixed
+- Model prefix handling in `rag_tool.py` standardized to reflect the docs.
+- `HF_TOKEN` documentation clarified as optional — only required for gated or private Hugging Face models.
+
 ---
-## [0.1.5] - 08-02-2026
+## [0.1.5] - 2026-02-08
 
 ### Added
 - Data related to comparison with other agentic data extraction frameworks added for the ComProScanner paper in the `examples/piezo_test/comparing_existing_frameworks` folder.
@@ -105,7 +109,7 @@
 - README badges section converted from HTML to markdown format for better compatibility across platforms.
 
 ---
-## [0.1.4] - 02-12-2025
+## [0.1.4] - 2025-12-02
 
 ### Added
 
@@ -141,7 +145,7 @@
   - [ComProScanner Workflow](https://raw.githubusercontent.com/aritraroy24/ComProScanner/main/assets/overall_workflow.png)
 
 ---
-## [0.1.3] - 04-11-2025
+## [0.1.3] - 2025-11-04
 
 ### Fixed
 
@@ -150,7 +154,7 @@
   - To `from langchain.text_splitter.recursive_character import RecursiveCharacterTextSplitter`
 
 ---
-## [0.1.2] - 24-10-2025
+## [0.1.2] - 2025-10-24
 
 ### Added
 
@@ -158,7 +162,7 @@
   - [arXiv:2510.20362](https://arxiv.org/abs/2510.20362)
 
 ---
-## [0.1.1] - 22-10-2025
+## [0.1.1] - 2025-10-22
 
 ### Fixed
 
@@ -167,7 +171,7 @@
   - [ComProScanner Workflow](https://i.ibb.co/QWd2qd3/overall-workflow.png)
 
 ---
-## [0.1.0] - 22-10-2025
+## [0.1.0] - 2025-10-22
 
 ### Added
 
diff --git a/docs/about/changelog.md b/docs/about/changelog.md
index 23b298d0..438b8421 100644
--- a/docs/about/changelog.md
+++ b/docs/about/changelog.md
@@ -24,7 +24,7 @@
 - `process_articles()` now routes user-provided `doi_list` by `general_publisher` from metadata and sends each DOI only to its matching source processor.
 
 ---
-## [0.1.6] - 02-04-2026
+## [0.1.6] - 2026-04-02
 ### Changed
 - Updated [README.md](README.md), [CITATION.cff](CITATION.cff) and docs with the published version (advance article) of the ComProScanner paper in _Digital Discovery_ as fully open access:
   - [ComProScanner: a multi-agent based framework for composition-property structured data extraction from scientific literature](https://doi.org/10.1039/D5DD00521C) 
@@ -33,7 +33,7 @@
 - Guide for API key creation for various LLM providers and publisher APIs added to the documentation at `docs/getting-started/api-key-guide.md` with detailed instructions for each provider.
 
 ---
-## [0.1.5] - 08-02-2026
+## [0.1.5] - 2026-02-08
 
 ### Added
 - Data related to comparison with other agentic data extraction frameworks added for the ComProScanner paper in the `examples/piezo_test/comparing_existing_frameworks` folder.
@@ -105,7 +105,7 @@
 - README badges section converted from HTML to markdown format for better compatibility across platforms.
 
 ---
-## [0.1.4] - 02-12-2025
+## [0.1.4] - 2025-12-02
 
 ### Added
 
@@ -141,7 +141,7 @@
   - [ComProScanner Workflow](https://raw.githubusercontent.com/aritraroy24/ComProScanner/main/assets/overall_workflow.png)
 
 ---
-## [0.1.3] - 04-11-2025
+## [0.1.3] - 2025-11-04
 
 ### Fixed
 
@@ -150,7 +150,7 @@
   - To `from langchain.text_splitter.recursive_character import RecursiveCharacterTextSplitter`
 
 ---
-## [0.1.2] - 24-10-2025
+## [0.1.2] - 2025-10-24
 
 ### Added
 
@@ -158,7 +158,7 @@
   - [arXiv:2510.20362](https://arxiv.org/abs/2510.20362)
 
 ---
-## [0.1.1] - 22-10-2025
+## [0.1.1] - 2025-10-22
 
 ### Fixed
 
@@ -167,7 +167,7 @@
   - [ComProScanner Workflow](https://i.ibb.co/QWd2qd3/overall-workflow.png)
 
 ---
-## [0.1.0] - 22-10-2025
+## [0.1.0] - 2025-10-22
 
 ### Added
 
diff --git a/docs/getting-started/api-key-guide.md b/docs/getting-started/api-key-guide.md
index 0cb974ba..81645188 100644
--- a/docs/getting-started/api-key-guide.md
+++ b/docs/getting-started/api-key-guide.md
@@ -200,7 +200,7 @@ OPENROUTER_API_KEY=your_openrouter_api_key
 
 Environment variable: `TOGETHER_API_KEY`
 
-Typical model prefixes: `together_ai/...`
+Typical model prefixes: `together/...`
 
 How to get it:
 
@@ -232,7 +232,7 @@ COHERE_API_KEY=your_cohere_api_key
 
 Environment variable: `FIREWORKS_API_KEY`
 
-Typical model prefixes: `fireworks_ai/...`
+Typical model prefixes: `fireworks/...`
 
 How to get it:
 
@@ -264,11 +264,12 @@ How to set it up:
 
 Environment variable: `HF_TOKEN`
 
+> **Optional.** Only required for downloading gated or private Hugging Face models. Public models work without a token.
+
 Used for:
 
-- Accessing the default Hugging Face embedding model workflow
-- Accessing gated or rate-limited Hugging Face models
-- Optional embedding/model downloads when required
+- Accessing gated or private Hugging Face models
+- Rate-limited API access
 
 How to get it:
 
diff --git a/docs/rag-config.md b/docs/rag-config.md
index 9b843481..52376775 100644
--- a/docs/rag-config.md
+++ b/docs/rag-config.md
@@ -123,7 +123,7 @@ scanner.extract_composition_property_data(
 scanner.extract_composition_property_data(
     main_extraction_keyword="d33",
     rag_db_path="embeddings/piezo",
-    rag_chat_model="deepseek-chat",
+    rag_chat_model="deepseek/deepseek-chat",
     rag_max_tokens=1024,
     rag_top_k=4,
 )
@@ -178,7 +178,7 @@ scanner.extract_composition_property_data(
 scanner.extract_composition_property_data(
     main_extraction_keyword="d33",
     rag_db_path="embeddings/piezo",
-    rag_chat_model="together_ai/meta-llama/Llama-3-70b-chat-hf",
+    rag_chat_model="together/meta-llama/Llama-3-70b-chat-hf",
     rag_max_tokens=1024,
     rag_top_k=4,
 )
@@ -220,7 +220,7 @@ scanner.extract_composition_property_data(
 scanner.extract_composition_property_data(
     main_extraction_keyword="d33",
     rag_db_path="embeddings/piezo",
-    rag_chat_model="fireworks_ai/accounts/fireworks/models/llama-v3-8b-instruct",
+    rag_chat_model="fireworks/models/llama-v3-8b-instruct",
     rag_max_tokens=1024,
     rag_top_k=4,
 )
diff --git a/src/comproscanner/extract_flow/tools/rag_tool.py b/src/comproscanner/extract_flow/tools/rag_tool.py
index deff690d..b8ee8220 100644
--- a/src/comproscanner/extract_flow/tools/rag_tool.py
+++ b/src/comproscanner/extract_flow/tools/rag_tool.py
@@ -82,28 +82,28 @@ def _get_llm(self) -> BaseChatModel:
             "callbacks": callbacks,
         }
         # OpenAI models
-        if model.startswith(("gpt-", "text-", "o1", "o3")):
+        if model.startswith(("openai/", "gpt-", "text-", "o1", "o3")):
             self._check_package_exists("langchain_openai", model)
             from langchain_openai import ChatOpenAI
 
             return ChatOpenAI(model=model, request_timeout=1000, **common_params)
 
         # Deepseek models
-        if model.startswith("deepseek"):
+        if model.startswith("deepseek/"):
             self._check_package_exists("langchain_deepseek", model)
             from langchain_deepseek import ChatDeepSeek
 
             return ChatDeepSeek(model=model, request_timeout=1000, **common_params)
 
         # Google Gemini models
-        elif model.startswith("gemini-"):
+        elif model.startswith("gemini/"):
             self._check_package_exists("langchain_google_genai", model)
             from langchain_google_genai import ChatGoogleGenerativeAI
 
             return ChatGoogleGenerativeAI(model=model, **common_params)
 
         # Anthropic Claude models
-        elif model.startswith("claude-"):
+        elif model.startswith("claude/"):
             self._check_package_exists("langchain_anthropic", model)
             from langchain_anthropic import ChatAnthropic
 
@@ -143,7 +143,7 @@ def _get_llm(self) -> BaseChatModel:
             return ChatCohere(model=model_name, **common_params)
 
         # Fireworks models
-        elif model.startswith(("fireworks/", "accounts/fireworks")):
+        elif model.startswith(("fireworks/")):
             self._check_package_exists("langchain_fireworks", model)
             from langchain_fireworks import ChatFireworks