learning-commons-org · adnanrhussain · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.github/workflows/test-sdk-typescript.yml b/.github/workflows/test-sdk-typescript.yml
@@ -6,12 +6,12 @@ on:
       - main
     paths:
       - 'sdks/typescript/**'
-      - 'evals/prompts/**'
+      - 'evals/**/*.txt'
       - '.github/workflows/test-sdk-typescript.yml'
   pull_request:
     paths:
       - 'sdks/typescript/**'
-      - 'evals/prompts/**'
+      - 'evals/**/*.txt'
       - '.github/workflows/test-sdk-typescript.yml'
 
 jobs:

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,4 +1,4 @@
 {
   "sdks/typescript": "0.4.0",
-  "evals/prompts": "1.4.0"
+  "evals": "1.4.0"
 }
diff --git a/README.md b/README.md
@@ -45,8 +45,8 @@ To use the evaluators, clone the repository and follow the instructions below.
 If you’d like to download or access our evaluators and datasets directly, follow the links below.
 
 * Evaluators literacy package
-  * [Prompts](./evals/prompts)
-  * [Notebooks](./evals)
+  * [Qualitative Text Complexity](./evals/literacy/qualitative-text-complexity/)
+  * [Examples (Python notebooks)](./evals/literacy/qualitative-text-complexity/examples/)
 * Datasets
   * [Learning Commons annotations of CLEAR for qualitative text complexity v1.0 2025-09-02.csv](https://aidt-evaluators-files-public-prod.s3.us-west-2.amazonaws.com/Learning+Commons+annotations+of+CLEAR+for+qualitative+text+complexity+v1.0+2025-09-02.csv)
 
@@ -82,7 +82,7 @@ Remember to activate the virtual environment for each new shell session when wor
 The required packages are listed in the `requirements.txt` file.
 
 ```shell
-pip install -r evals/requirements.txt
+pip install -r evals/utils/requirements.txt
 ```
 
 ### 3. Set your API keys
@@ -132,7 +132,7 @@ Remember to activate the virtual environment for each new shell session when wor
 ### 2. Install dependencies
 
 ```cmd
-pip install -r evals/requirements.txt
+pip install -r evals/utils/requirements.txt
 ```
 
 ### 3. Set your API keys
@@ -171,7 +171,7 @@ jupyter lab
 
 Jupyter will open in your web browser (usually at `http://localhost:8888`).
 
-2. Browse into the `evals` folder, then double click on the evaluator you want to try.
+2. Browse into `evals/literacy/qualitative-text-complexity/examples/`, then double click on the evaluator you want to try.
 3. You can now copy the text you want to evaluate into the last code cell of the notebook to run an evaluator on your text sample.
 
 If you prefer using an IDE with Python and Jupyter notebook support, such as VSCode with Microsoft's Python and Jupyter extensions, please refer to Microsoft's instructions for their installation and configuration.)

diff --git a/evals/prompts/CHANGELOG.md → evals/CHANGELOG.md b/evals/prompts/CHANGELOG.md → evals/CHANGELOG.md
@@ -1,10 +1,18 @@
-# Prompts Changelog
+# Evals Changelog
 
 All notable changes to the evaluator prompt files will be documented here.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ---
+
+## [Unreleased]
+
+### Changed
+- Reorganized package from `evals/prompts` into `evals` with domain-based folder structure
+
+---
+
 ## [1.4.0] - 2026-03-20
 
 ### Added

diff --git a/evals/README.md b/evals/README.md
@@ -1,41 +1,14 @@
-## **Evaluators** Code and Prompts
+## Evals
 
-## **Requirements**
+Evaluator prompt definitions organized by domain and category.
 
-Please follow the detailed instructions in the top-level [README](../README.md) to set up your environment.
+See the [Quickstart](../README.md#quickstart) in the root README for setup and running instructions.
 
-## **Quick Start**
+## Structure
 
-Use the provided Makefile to quickly set up your environment and install dependencies with `make` tool.
-
-### Install dependencies
-
-```shell
-make install
-```
-
-### Set your API keys
-
-Set `GOOGLE_API_KEY` and `OPENAI_API_KEY` in the environment variable in your shell session, or add to `.env` file.
-
-```shell
-export GOOGLE_API_KEY="..."
-export OPENAI_API_KEY="..."
-```
-
-### Run the Evaluator Code
-
-You are now ready to run the evaluator examples. We recommend using a Jupyter Notebook for interactive exploration.
-
-Start Jupyter Notebook:
-```shell
-make jupyter
-```
-
-Jupyter will open in your web browser (usually at http://localhost:8888).
-
-1. In Jupyter file browser, double click on the evaluator you want to try.
-2. Copy the text you want to evaluate into the last code cell of the notebook to try the evaluation of your text sample.
-3. Use the toolbar to run all cells, or run each cell in sequence until the end.
-
-    _If you prefer using an IDE with Python and Jupyter notebook support, such as VSCode with Microsoft's Python and Jupyter extensions, please refer to Microsoft's instructions for their installation and configuration._
+| Path | Description |
+| :--- | :--- |
+| [`literacy/`](./literacy/) | Literacy evaluators |
+| [`literacy/qualitative-text-complexity/`](./literacy/qualitative-text-complexity/) | Qualitative text complexity evaluators (GLA, Sentence Structure, Vocabulary, Conventionality, SMK) |
+| [`math/`](./math/) | Math evaluators (coming soon) |
+| [`utils/`](./utils/) | Setup scripts and dependencies |
diff --git a/evals/__init__.py b/evals/__init__.py
diff --git a/evals/prompts/README.md → ...acy/qualitative-text-complexity/README.md b/evals/prompts/README.md → ...acy/qualitative-text-complexity/README.md
diff --git a/evals/prompts/conventionality/system.txt → ...ext-complexity/conventionality/system.txt b/evals/prompts/conventionality/system.txt → ...ext-complexity/conventionality/system.txt
diff --git a/evals/prompts/conventionality/user.txt → ...-text-complexity/conventionality/user.txt b/evals/prompts/conventionality/user.txt → ...-text-complexity/conventionality/user.txt
diff --git a/evals/conventionality_evaluator.ipynb → .../examples/conventionality_evaluator.ipynb b/evals/conventionality_evaluator.ipynb → .../examples/conventionality_evaluator.ipynb
@@ -72,7 +72,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from prompts import conventionality_prompts as prompts\n",
+    "import conventionality_prompts as prompts\n",
     "\n",
     "# Set your api key in your environment, .env file, or enter when prompted.\n",
     "# os.environ['GOOGLE_API_KEY'] = 'YOUR API KEY'\n",

diff --git a/evals/prompts/conventionality_prompts.py → ...exity/examples/conventionality_prompts.py b/evals/prompts/conventionality_prompts.py → ...exity/examples/conventionality_prompts.py
diff --git a/evals/prompts/gla_prompts.py → ...e-text-complexity/examples/gla_prompts.py b/evals/prompts/gla_prompts.py → ...e-text-complexity/examples/gla_prompts.py
diff --git a/evals/grade_level_evaluator.ipynb → ...xity/examples/grade_level_evaluator.ipynb b/evals/grade_level_evaluator.ipynb → ...xity/examples/grade_level_evaluator.ipynb
@@ -126,7 +126,7 @@
    "outputs": [],
    "source": [
     "#This is the system prompt, user prompt and model output setting. Do not change this\n",
-    "from prompts.gla_prompts import gla_system_prompt, gla_user_prompt\n",
+    "from gla_prompts import gla_system_prompt, gla_user_prompt\n",
     "\n",
     "class OutputRanges(BaseModel):\n",
     "    reasoning: str = Field(description=\"your reasoning for your answer in numbered bullet points for 4 steps with a 5th bullet point for synthesis.\")\n",

diff --git a/evals/prompts/sent_str_prompts.py → ...t-complexity/examples/sent_str_prompts.py b/evals/prompts/sent_str_prompts.py → ...t-complexity/examples/sent_str_prompts.py
diff --git a/evals/sentence_structure_evaluator.ipynb → ...amples/sentence_structure_evaluator.ipynb b/evals/sentence_structure_evaluator.ipynb → ...amples/sentence_structure_evaluator.ipynb
@@ -115,7 +115,7 @@
    },
    "outputs": [],
    "source": [
-    "from prompts import sent_str_prompts as prompts\n",
+    "import sent_str_prompts as prompts\n",
     "\n",
     "# Set your api key in your environment, .env file, or enter when prompted.\n",
     "# os.environ['OPENAI_API_KEY'] = 'YOUR API KEY'\n",

diff --git a/evals/smk_evaluator.ipynb → ...t-complexity/examples/smk_evaluator.ipynb b/evals/smk_evaluator.ipynb → ...t-complexity/examples/smk_evaluator.ipynb
@@ -67,7 +67,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from prompts import smk_prompts as prompts\n",
+    "import smk_prompts as prompts\n",
     "\n",
     "# Set your api key in your environment, .env file, or enter when prompted.\n",
     "# os.environ['GOOGLE_API_KEY'] = 'YOUR API KEY'\n",

diff --git a/evals/prompts/smk_prompts.py → ...e-text-complexity/examples/smk_prompts.py b/evals/prompts/smk_prompts.py → ...e-text-complexity/examples/smk_prompts.py
diff --git a/evals/text_complexity_combo.ipynb → ...xity/examples/text_complexity_combo.ipynb b/evals/text_complexity_combo.ipynb → ...xity/examples/text_complexity_combo.ipynb
@@ -127,7 +127,7 @@
    },
    "outputs": [],
    "source": [
-    "from prompts import vocab_prompts as v_prompts, sent_str_prompts as s_prompts, smk_prompts as smk_prompts, conventionality_prompts as conv_prompts\n",
+    "import vocab_prompts as v_prompts, sent_str_prompts as s_prompts, smk_prompts as smk_prompts, conventionality_prompts as conv_prompts\n",
     "\n",
     "# Set your api keys in your environment, .env file, or enter when prompted.\n",
     "# os.environ['GOOGLE_API_KEY'] = 'YOUR API KEY'\n",

diff --git a/evals/prompts/vocab_prompts.py → ...text-complexity/examples/vocab_prompts.py b/evals/prompts/vocab_prompts.py → ...text-complexity/examples/vocab_prompts.py
diff --git a/evals/vocabulary_evaluator.ipynb → ...exity/examples/vocabulary_evaluator.ipynb b/evals/vocabulary_evaluator.ipynb → ...exity/examples/vocabulary_evaluator.ipynb
@@ -120,7 +120,7 @@
    },
    "outputs": [],
    "source": [
-    "from prompts import vocab_prompts as prompts\n",
+    "import vocab_prompts as prompts\n",
     "\n",
     "# Set your api keys in your environment, .env file, or enter when prompted.\n",
     "# os.environ['GOOGLE_API_KEY'] = 'YOUR API KEY'\n",

diff --git a/...ts/grade-level-appropriateness/system.txt → ...ty/grade-level-appropriateness/system.txt b/...ts/grade-level-appropriateness/system.txt → ...ty/grade-level-appropriateness/system.txt
diff --git a/...mpts/grade-level-appropriateness/user.txt → ...xity/grade-level-appropriateness/user.txt b/...mpts/grade-level-appropriateness/user.txt → ...xity/grade-level-appropriateness/user.txt
diff --git a/...ts/sentence-structure/analysis-system.txt → ...ty/sentence-structure/analysis-system.txt b/...ts/sentence-structure/analysis-system.txt → ...ty/sentence-structure/analysis-system.txt
diff --git a/...mpts/sentence-structure/analysis-user.txt → ...xity/sentence-structure/analysis-user.txt b/...mpts/sentence-structure/analysis-user.txt → ...xity/sentence-structure/analysis-user.txt
diff --git a/.../sentence-structure/complexity-system.txt → .../sentence-structure/complexity-system.txt b/.../sentence-structure/complexity-system.txt → .../sentence-structure/complexity-system.txt
diff --git a/...ts/sentence-structure/complexity-user.txt → ...ty/sentence-structure/complexity-user.txt b/...ts/sentence-structure/complexity-user.txt → ...ty/sentence-structure/complexity-user.txt
diff --git a/...pts/sentence-structure/rubric-grade-3.txt → ...ity/sentence-structure/rubric-grade-3.txt b/...pts/sentence-structure/rubric-grade-3.txt → ...ity/sentence-structure/rubric-grade-3.txt
diff --git a/...pts/sentence-structure/rubric-grade-4.txt → ...ity/sentence-structure/rubric-grade-4.txt b/...pts/sentence-structure/rubric-grade-4.txt → ...ity/sentence-structure/rubric-grade-4.txt
diff --git a/...sentence-structure/rubric-grades-5-12.txt → ...sentence-structure/rubric-grades-5-12.txt b/...sentence-structure/rubric-grades-5-12.txt → ...sentence-structure/rubric-grades-5-12.txt
diff --git a/...ompts/subject-matter-knowledge/system.txt → ...exity/subject-matter-knowledge/system.txt b/...ompts/subject-matter-knowledge/system.txt → ...exity/subject-matter-knowledge/system.txt
diff --git a/...prompts/subject-matter-knowledge/user.txt → ...plexity/subject-matter-knowledge/user.txt b/...prompts/subject-matter-knowledge/user.txt → ...plexity/subject-matter-knowledge/user.txt
diff --git a/...ompts/vocabulary/background-knowledge.txt → ...exity/vocabulary/background-knowledge.txt b/...ompts/vocabulary/background-knowledge.txt → ...exity/vocabulary/background-knowledge.txt
diff --git a/.../prompts/vocabulary/grades-3-4-system.txt → ...mplexity/vocabulary/grades-3-4-system.txt b/.../prompts/vocabulary/grades-3-4-system.txt → ...mplexity/vocabulary/grades-3-4-system.txt
diff --git a/evals/prompts/vocabulary/grades-3-4-user.txt → ...complexity/vocabulary/grades-3-4-user.txt b/evals/prompts/vocabulary/grades-3-4-user.txt → ...complexity/vocabulary/grades-3-4-user.txt
diff --git a/...rompts/vocabulary/other-grades-system.txt → ...lexity/vocabulary/other-grades-system.txt b/...rompts/vocabulary/other-grades-system.txt → ...lexity/vocabulary/other-grades-system.txt
diff --git a/.../prompts/vocabulary/other-grades-user.txt → ...mplexity/vocabulary/other-grades-user.txt b/.../prompts/vocabulary/other-grades-user.txt → ...mplexity/vocabulary/other-grades-user.txt
diff --git a/evals/math/README.md b/evals/math/README.md
@@ -0,0 +1,3 @@
+# Math Evaluators
+
+Coming soon.
diff --git a/evals/prompts/__init__.py b/evals/prompts/__init__.py
diff --git a/evals/.env.example → evals/utils/.env.example b/evals/.env.example → evals/utils/.env.example
diff --git a/evals/Makefile → evals/utils/Makefile b/evals/Makefile → evals/utils/Makefile
diff --git a/evals/install.sh → evals/utils/install.sh b/evals/install.sh → evals/utils/install.sh
diff --git a/evals/requirements.txt → evals/utils/requirements.txt b/evals/requirements.txt → evals/utils/requirements.txt
diff --git a/evals/run.sh → evals/utils/run.sh b/evals/run.sh → evals/utils/run.sh
diff --git a/release-please-config.json b/release-please-config.json
@@ -1,5 +1,6 @@
 {
     "include-v-in-tag": true,
+    "last-release-sha": "b17cfb728723d27277fcc4a986d95c6189010cd4",
     "pull-request-title-pattern": "chore${scope}: release${component} ${version}",
     "changelog-sections": [
         {
@@ -54,10 +55,10 @@
             "changelog-path": "CHANGELOG.md",
             "component": "sdks-typescript"
         },
-        "evals/prompts": {
+        "evals": {
             "release-type": "simple",
             "changelog-path": "CHANGELOG.md",
-            "component": "evals-prompts"
+            "component": "evals"
         }
     }
 }
diff --git a/sdks/typescript/src/prompts/conventionality/index.ts b/sdks/typescript/src/prompts/conventionality/index.ts
@@ -1,5 +1,5 @@
-import SYSTEM_PROMPT from '../../../../../evals/prompts/conventionality/system.txt';
-import USER_PROMPT_TEMPLATE from '../../../../../evals/prompts/conventionality/user.txt';
+import SYSTEM_PROMPT from '../../../../../evals/literacy/qualitative-text-complexity/conventionality/system.txt';
+import USER_PROMPT_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/conventionality/user.txt';
 
 /**
  * Get the Conventionality evaluator system prompt

diff --git a/sdks/typescript/src/prompts/grade-level-appropriateness/index.ts b/sdks/typescript/src/prompts/grade-level-appropriateness/index.ts
@@ -1,5 +1,5 @@
-import SYSTEM_PROMPT_TEMPLATE from '../../../../../evals/prompts/grade-level-appropriateness/system.txt';
-import USER_PROMPT_TEMPLATE from '../../../../../evals/prompts/grade-level-appropriateness/user.txt';
+import SYSTEM_PROMPT_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/grade-level-appropriateness/system.txt';
+import USER_PROMPT_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/grade-level-appropriateness/user.txt';
 
 /**
  * Get the system prompt for grade level appropriateness evaluation

diff --git a/sdks/typescript/src/prompts/sentence-structure/analysis.ts b/sdks/typescript/src/prompts/sentence-structure/analysis.ts
@@ -1,5 +1,5 @@
-import SYSTEM_PROMPT_ANALYSIS_TEMPLATE from '../../../../../evals/prompts/sentence-structure/analysis-system.txt';
-import USER_PROMPT_ANALYSIS_TEMPLATE from '../../../../../evals/prompts/sentence-structure/analysis-user.txt';
+import SYSTEM_PROMPT_ANALYSIS_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/analysis-system.txt';
+import USER_PROMPT_ANALYSIS_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/analysis-user.txt';
 
 /**
  * Get the system prompt for sentence grammatical analysis

diff --git a/sdks/typescript/src/prompts/sentence-structure/complexity.ts b/sdks/typescript/src/prompts/sentence-structure/complexity.ts
@@ -1,8 +1,8 @@
-import SYSTEM_PROMPT_COMPLEXITY_TEMPLATE from '../../../../../evals/prompts/sentence-structure/complexity-system.txt';
-import USER_PROMPT_COMPLEXITY_TEMPLATE from '../../../../../evals/prompts/sentence-structure/complexity-user.txt';
-import RUBRIC_GRADE_3 from '../../../../../evals/prompts/sentence-structure/rubric-grade-3.txt';
-import RUBRIC_GRADE_4 from '../../../../../evals/prompts/sentence-structure/rubric-grade-4.txt';
-import RUBRIC_GRADES_5_12 from '../../../../../evals/prompts/sentence-structure/rubric-grades-5-12.txt';
+import SYSTEM_PROMPT_COMPLEXITY_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/complexity-system.txt';
+import USER_PROMPT_COMPLEXITY_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/complexity-user.txt';
+import RUBRIC_GRADE_3 from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/rubric-grade-3.txt';
+import RUBRIC_GRADE_4 from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/rubric-grade-4.txt';
+import RUBRIC_GRADES_5_12 from '../../../../../evals/literacy/qualitative-text-complexity/sentence-structure/rubric-grades-5-12.txt';
 
 /**
  * Get the system prompt for sentence structure complexity evaluation

diff --git a/sdks/typescript/src/prompts/subject-matter-knowledge/index.ts b/sdks/typescript/src/prompts/subject-matter-knowledge/index.ts
@@ -1,5 +1,5 @@
-import SYSTEM_PROMPT from '../../../../../evals/prompts/subject-matter-knowledge/system.txt';
-import USER_PROMPT_TEMPLATE from '../../../../../evals/prompts/subject-matter-knowledge/user.txt';
+import SYSTEM_PROMPT from '../../../../../evals/literacy/qualitative-text-complexity/subject-matter-knowledge/system.txt';
+import USER_PROMPT_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/subject-matter-knowledge/user.txt';
 
 /**
  * Get the SMK evaluator system prompt

diff --git a/sdks/typescript/src/prompts/vocabulary/background-knowledge.ts b/sdks/typescript/src/prompts/vocabulary/background-knowledge.ts
@@ -1,4 +1,4 @@
-import BACKGROUND_KNOWLEDGE_TEMPLATE from '../../../../../evals/prompts/vocabulary/background-knowledge.txt';
+import BACKGROUND_KNOWLEDGE_TEMPLATE from '../../../../../evals/literacy/qualitative-text-complexity/vocabulary/background-knowledge.txt';
 
 /**
  * Generate the background knowledge prompt for a given text and grade level

diff --git a/sdks/typescript/src/prompts/vocabulary/system.ts b/sdks/typescript/src/prompts/vocabulary/system.ts
@@ -1,5 +1,5 @@
-import SYSTEM_PROMPT_GRADES_3_4 from '../../../../../evals/prompts/vocabulary/grades-3-4-system.txt';
-import SYSTEM_PROMPT_OTHER_GRADES from '../../../../../evals/prompts/vocabulary/other-grades-system.txt';
+import SYSTEM_PROMPT_GRADES_3_4 from '../../../../../evals/literacy/qualitative-text-complexity/vocabulary/grades-3-4-system.txt';
+import SYSTEM_PROMPT_OTHER_GRADES from '../../../../../evals/literacy/qualitative-text-complexity/vocabulary/other-grades-system.txt';
 
 /**
  * Get the appropriate system prompt based on grade level

diff --git a/sdks/typescript/src/prompts/vocabulary/user.ts b/sdks/typescript/src/prompts/vocabulary/user.ts
@@ -1,5 +1,5 @@
-import USER_PROMPT_TEMPLATE_GRADES_3_4 from '../../../../../evals/prompts/vocabulary/grades-3-4-user.txt';
-import USER_PROMPT_TEMPLATE_OTHER_GRADES from '../../../../../evals/prompts/vocabulary/other-grades-user.txt';
+import USER_PROMPT_TEMPLATE_GRADES_3_4 from '../../../../../evals/literacy/qualitative-text-complexity/vocabulary/grades-3-4-user.txt';
+import USER_PROMPT_TEMPLATE_OTHER_GRADES from '../../../../../evals/literacy/qualitative-text-complexity/vocabulary/other-grades-user.txt';
 
 /**
  * Generate the user prompt for vocabulary complexity evaluation

diff --git a/sdks/typescript/tests/README.md b/sdks/typescript/tests/README.md
@@ -194,7 +194,7 @@ interface TestResult {
 ## Test Strategy
 
 ### Local Development
-Tests run against `src/` with prompts copied from `../../evals/prompts/`:
+Tests run against `src/` with prompts copied from `../../evals/literacy/qualitative-text-complexity/`:
 ```bash
 npm run test:unit
 npm run test:integration