diff --git a/pages/generative-apis/menu.ts b/pages/generative-apis/menu.ts index 2edb20b386..f20c875a76 100644 --- a/pages/generative-apis/menu.ts +++ b/pages/generative-apis/menu.ts @@ -147,6 +147,10 @@ export const generativeApisMenu = { label: 'Security and reliability in Generative APIs', slug: 'security-and-reliability', }, + { + label: 'Understanding Generative APIs costs', + slug: 'cost-estimator', + }, { label: 'Adding AI to VS Code using Continue', slug: 'adding-ai-to-vscode-using-continue', diff --git a/pages/generative-apis/reference-content/cost-estimator.mdx b/pages/generative-apis/reference-content/cost-estimator.mdx new file mode 100644 index 0000000000..b72af3d49b --- /dev/null +++ b/pages/generative-apis/reference-content/cost-estimator.mdx @@ -0,0 +1,35 @@ +--- +title: Understanding Generative APIs costs +description: Compare costs for the different deployment options of Scaleway Generative APIs - Serverless versus Dedicated Deployment. +tags: generative-apis how-much cost charge +dates: + validation: 2026-05-11 + posted: 2026-05-11 +--- + +Understanding the financial impact of AI workloads is essential for making informed decisions. The Cost estimator (available via the [Scaleway console](https://console.scaleway.com/generative-api/cost-estimator)) provides a clear, side‑by‑side view of how the same generative workload behaves under the different deployment options offered by Scaleway: [Generative APIs - Serverless versus Generative APIs Dedicated Deployment](/generative-apis/faq/#what-is-the-difference-between-serverless-and-dedicated-deployment). + + + - The Cost estimator provides an estimate based on standard benchmarks, assuming significant concurrency and a low cache hit rate. Only performance tests in production, based on your actual workload, can provide a fully accurate estimate. + - Performance may vary significantly for extreme input/output ratios (e.g., 100:1 or 1:10). In these cases, processing is bottlenecked by either input-heavy or output-heavy workloads. + - For dedicated deployments, caching is implicit and exclusive to each user. This can significantly improve performance for use cases with many similar input tokens, such as a long system prompt with a common prefix shared across requests, typical of extended conversations. + + +## Compare costs + +1. Log in to the [Scaleway console](https://console.scaleway.com/). + +2. Click **Generative APIs** in the **AI** section of the side menu. + +3. Select the **Cost estimator** tab. + +4. Model your workload by setting the following: + - **Number of users** + - **Queries per user per day** + - **Hours of usage/day** + - **Load** + +5. Set your chosen **Model** and **GPU**. + The estimator instantly calculates the total monthly cost for both Serverless and Dedicated modes. + +6. Compare cost differences side‑by‑side. \ No newline at end of file