diff --git a/docs/alora.md b/docs/alora.md index 75d02037..63b4a98a 100644 --- a/docs/alora.md +++ b/docs/alora.md @@ -37,7 +37,7 @@ Use the `m alora train` command to fine-tune a LoRA or aLoRA adapter requirement ```bash m alora train path/to/data.jsonl \ - --basemodel ibm-granite/granite-3.2-8b-instruct \ + --basemodel ibm-granite/granite-4.0-micro \ --outfile ./checkpoints/alora_adapter \ --adapter alora \ --epochs 6 \ @@ -47,6 +47,10 @@ m alora train path/to/data.jsonl \ --grad-accum 4 ``` +> **Note on Model Selection**: Use non-hybrid models (e.g., `granite-4.0-micro`) for aLoRA training. +> Hybrid models (`granite-4.0-h-micro`) are recommended for general inference but adapters should be +> trained on non-hybrid base models for compatibility with the `ibm-granite/rag-intrinsics-lib` repository. + ### 📌 Parameters | Flag | Type | Default | Description | diff --git a/docs/dev/requirement_aLoRA_rerouting.md b/docs/dev/requirement_aLoRA_rerouting.md index 74794f8a..546f1606 100644 --- a/docs/dev/requirement_aLoRA_rerouting.md +++ b/docs/dev/requirement_aLoRA_rerouting.md @@ -37,10 +37,10 @@ from mellea.core import Requirement from mellea.backends.adapters import GraniteCommonAdapter m = start_session( - "huggingface.LocalHFBackend:ibm-granite/granite-3.2-8b-instruct") + "huggingface.LocalHFBackend:ibm-granite/granite-4.0-micro") # By default, the AloraRequirement uses a GraniteCommonAdapter with "requirement_check". -m.backend.add_adapter(GraniteCommonAdapter("ibm-granite/rag-intrinsics-lib", "requirement_check", base_model_name="granite-3.2-8b-instruct")) +m.backend.add_adapter(GraniteCommonAdapter("ibm-granite/rag-intrinsics-lib", "requirement_check", base_model_name="granite-4.0-micro")) m.instruct( "Corporate wants you to find the difference between these two strings:\n\naaa\naba") diff --git a/docs/examples/aLora/101_example.py b/docs/examples/aLora/101_example.py index 5ad625f9..1d5a6695 100644 --- a/docs/examples/aLora/101_example.py +++ b/docs/examples/aLora/101_example.py @@ -13,7 +13,7 @@ # Define a backend and add the constraint aLora backend = LocalHFBackend( - model_id="ibm-granite/granite-3.2-8b-instruct", cache=SimpleLRUCache(5) + model_id="ibm-granite/granite-4.0-h-micro", cache=SimpleLRUCache(5) ) custom_stembolt_failure_constraint = HFConstraintAlora( diff --git a/docs/examples/generative_slots/generative_gsm8k.py b/docs/examples/generative_slots/generative_gsm8k.py index efaf1e0d..44da109d 100644 --- a/docs/examples/generative_slots/generative_gsm8k.py +++ b/docs/examples/generative_slots/generative_gsm8k.py @@ -1,6 +1,6 @@ # pytest: ollama, qualitative, llm, slow -"""Example of chain-of-thought reasoning on a mathematical question from the GSM8K dataset, structured as code for improved performance with the granite-3.3-8B model. The original accuracy in standard "thinking" mode is approximately 80%, while this implementation achieves 85-89% accuracy—up to 9 points higher. +"""Example of chain-of-thought reasoning on a mathematical question from the GSM8K dataset, structured as code for improved performance with Granite 4 models. The original accuracy in standard "thinking" mode is approximately 80%, while this implementation achieves 85-89% accuracy—up to 9 points higher. This demonstrates that generative decorators are sufficient for complex reasoning tasks: not only do they maintain or improve performance, but they also significantly enhance observability and control. For instance, the structured Thought titles can be easily surfaced in a UI, providing instant insight into the model's reasoning process. """ diff --git a/docs/examples/instruct_validate_repair/101_email_with_validate.py b/docs/examples/instruct_validate_repair/101_email_with_validate.py index 742da99a..b510cf26 100644 --- a/docs/examples/instruct_validate_repair/101_email_with_validate.py +++ b/docs/examples/instruct_validate_repair/101_email_with_validate.py @@ -3,7 +3,7 @@ from docs.examples.helper import req_print, w from mellea import start_session from mellea.backends import ModelOption -from mellea.backends.model_ids import IBM_GRANITE_3_3_8B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.stdlib.sampling import RejectionSamplingStrategy # create a session using Granite 4 Micro (3B) on Ollama and a simple context [see below] diff --git a/docs/examples/intrinsics/intrinsics.py b/docs/examples/intrinsics/intrinsics.py index b7fcb267..862c8399 100644 --- a/docs/examples/intrinsics/intrinsics.py +++ b/docs/examples/intrinsics/intrinsics.py @@ -13,7 +13,7 @@ # Create the backend. Example for a VLLM Server. Commented out in favor of the hugging face code for now. # # Assumes a locally running VLLM server. # backend = OpenAIBackend( -# model_id="ibm-granite/granite-3.3-8b-instruct", +# model_id="ibm-granite/granite-4.0-micro", # base_url="http://0.0.0.0:8000/v1", # api_key="EMPTY", # ) diff --git a/docs/examples/m_serve/client.py b/docs/examples/m_serve/client.py index 31154be5..ea36c7c9 100644 --- a/docs/examples/m_serve/client.py +++ b/docs/examples/m_serve/client.py @@ -6,7 +6,7 @@ response = client.chat.completions.create( messages=[{"role": "user", "content": "Find all the real roots of x^3 + 1."}], - model="granite3.3:8b", + model="granite4:micro-h", ) print(response.choices[0]) diff --git a/docs/examples/tutorial/document_mobject.py b/docs/examples/tutorial/document_mobject.py index d413d7d7..a3b55c50 100644 --- a/docs/examples/tutorial/document_mobject.py +++ b/docs/examples/tutorial/document_mobject.py @@ -1,7 +1,7 @@ # pytest: ollama, qualitative, llm, requires_heavy_ram from mellea.backends import model_ids -from mellea.backends.model_ids import IBM_GRANITE_3_3_8B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.stdlib.components.docs.richdocument import RichDocument rd = RichDocument.from_document_file("https://arxiv.org/pdf/1906.04043") diff --git a/docs/kv_smash/hf_example.py b/docs/kv_smash/hf_example.py index c5db6967..1964e213 100644 --- a/docs/kv_smash/hf_example.py +++ b/docs/kv_smash/hf_example.py @@ -2,7 +2,7 @@ from mellea.backends import ModelOption from mellea.backends.huggingface import LocalHFBackend -from mellea.backends.model_ids import IBM_GRANITE_3_3_8B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.core import CBlock from mellea.stdlib.components import Message from mellea.stdlib.context import ChatContext @@ -30,7 +30,7 @@ async def example(): role="user", content="What is the likely ZIP code of Nathan Fulton's work address?", ) - backend = LocalHFBackend(model_id=IBM_GRANITE_3_3_8B) + backend = LocalHFBackend(model_id=IBM_GRANITE_4_HYBRID_MICRO) mot = await backend._generate_from_context_with_kv_cache( action=msg, ctx=ctx, model_options={ModelOption.MAX_NEW_TOKENS: 64} ) diff --git a/docs/kv_smash/kv_with_chat.py b/docs/kv_smash/kv_with_chat.py index bdf6f38e..7f43c539 100644 --- a/docs/kv_smash/kv_with_chat.py +++ b/docs/kv_smash/kv_with_chat.py @@ -2,9 +2,9 @@ from mellea.backends.huggingface import LocalHFBackend from mellea.backends.kv_block_helpers import DynamicCache, merge_dynamic_caches -from mellea.backends.model_ids import IBM_GRANITE_3_3_8B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO -backend = LocalHFBackend(model_id=IBM_GRANITE_3_3_8B) +backend = LocalHFBackend(model_id=IBM_GRANITE_4_HYBRID_MICRO) model = backend._model tokenizer = backend._tokenizer diff --git a/docs/kv_smash/kvcache.py b/docs/kv_smash/kvcache.py index 51e1b5cc..40a472ac 100644 --- a/docs/kv_smash/kvcache.py +++ b/docs/kv_smash/kvcache.py @@ -9,7 +9,7 @@ from mellea.backends.kv_block_helpers import DynamicCache, merge_dynamic_caches -model_id = "ibm-granite/granite-3.3-8b-instruct" +model_id = "ibm-granite/granite-4.0-tiny-preview" device = torch.device("mps") model = AutoModelForCausalLM.from_pretrained(model_id).to(device) tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model_id) diff --git a/docs/tutorial.md b/docs/tutorial.md index 1892c250..f88a8f89 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -791,7 +791,7 @@ We will train a lightweight adapter with the `m alora train` command on this sma ```bash m alora train /to/stembolts_data.jsonl \ --promptfile ./prompt_config.json \ - --basemodel ibm-granite/granite-3.2-8b-instruct \ + --basemodel ibm-granite/granite-4.0-h-micro \ --outfile ./checkpoints/alora_adapter \ --adapter alora \ --epochs 6 \ @@ -1321,9 +1321,9 @@ Assuming a component's TemplateRepresentation contains a `template_order` field, If the default formatter searches the template path or the package, it uses the following logic: - look in the `.../templates/prompts/...` directory -- traverse sub-directories in that path that match the formatter's model id (ie `ibm-granite/granite-3.2-8b-instruct` will match `.../templates/prompts/granite/granite-3-2/instruct`) or default (ie `.../templates/prompts/default`) +- traverse sub-directories in that path that match the formatter's model id (ie `ibm-granite/granite-4.0-h-micro` will match `.../templates/prompts/granite/granite-4-0-h/micro`) or default (ie `.../templates/prompts/default`) - return the template at the deepest directory path -- the default template formatter assumes that a model will only have one match in any given directory; in other words, traversing a `templates` directory with both `prompts/granite/...` and `prompts/ibm/...` for `ibm-granite/granite-3.2-8b-instruct` should not happen +- the default template formatter assumes that a model will only have one match in any given directory; in other words, traversing a `templates` directory with both `prompts/granite/...` and `prompts/ibm/...` for `ibm-granite/granite-4.0-h-micro` should not happen #### Editing an Existing Class To customize the template and template representation of an existing class, simply create a new class that inherits from the class you want to edit. Then, override the format_for_llm function and create a new template. diff --git a/mellea/backends/model_ids.py b/mellea/backends/model_ids.py index d20d3179..ed4621f1 100644 --- a/mellea/backends/model_ids.py +++ b/mellea/backends/model_ids.py @@ -25,14 +25,28 @@ class ModelIdentifier: #### IBM models #### #################### -IBM_GRANITE_4_MICRO_3B = ModelIdentifier( - hf_model_name="ibm-granite/granite-4.0-micro", - ollama_name="granite4:micro", +# Granite 4 Hybrid Models (Recommended for general use) +IBM_GRANITE_4_HYBRID_MICRO = ModelIdentifier( + hf_model_name="ibm-granite/granite-4.0-h-micro", + ollama_name="granite4:micro-h", + watsonx_name=None, # Only h-small available on Watsonx +) + +IBM_GRANITE_4_HYBRID_TINY = ModelIdentifier( + hf_model_name="ibm-granite/granite-4.0-h-tiny", + ollama_name="granite4:tiny-h", + watsonx_name=None, # Only h-small available on Watsonx +) + +IBM_GRANITE_4_HYBRID_SMALL = ModelIdentifier( + hf_model_name="ibm-granite/granite-4.0-h-small", + ollama_name="granite4:small-h", watsonx_name="ibm/granite-4-h-small", ) -# todo: watsonx model is different from ollama model - should be same. +# Deprecated Granite 3 models - kept for backward compatibility +# These maintain their original model references (not upgraded to Granite 4) IBM_GRANITE_3_2_8B = ModelIdentifier( hf_model_name="ibm-granite/granite-3.2-8b-instruct", ollama_name="granite3.2:8b", @@ -45,6 +59,23 @@ class ModelIdentifier: watsonx_name="ibm/granite-3-3-8b-instruct", ) +# Deprecated: Use IBM_GRANITE_4_HYBRID_MICRO or IBM_GRANITE_4_HYBRID_SMALL instead +# Kept for backward compatibility with per-backend model selection: +# - Ollama/HF: Uses MICRO (fits in CI memory constraints) +# - Watsonx: Uses SMALL (required for watsonx support) +IBM_GRANITE_4_MICRO_3B = ModelIdentifier( + hf_model_name="ibm-granite/granite-4.0-h-micro", + ollama_name="granite4:micro-h", + watsonx_name="ibm/granite-4-h-small", +) + +# Granite 3.3 Vision Model (2B) +IBM_GRANITE_3_3_VISION_2B = ModelIdentifier( + hf_model_name="ibm-granite/granite-vision-3.3-2b", + ollama_name="ibm/granite3.3-vision:2b", + watsonx_name=None, +) + IBM_GRANITE_GUARDIAN_3_0_2B = ModelIdentifier( hf_model_name="ibm-granite/granite-guardian-3.0-2b", ollama_name="granite3-guardian:2b", diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py index 8ad7796e..b9ae9432 100644 --- a/mellea/backends/openai.py +++ b/mellea/backends/openai.py @@ -83,7 +83,7 @@ def __init__( """Initialize and OpenAI compatible backend. For any additional kwargs that you need to pass the the client, pass them as a part of **kwargs. Args: - model_id : A generic model identifier or OpenAI compatible string. Defaults to model_ids.IBM_GRANITE_3_3_8B. + model_id : A generic model identifier or OpenAI compatible string. Defaults to model_ids.IBM_GRANITE_4_HYBRID_MICRO. formatter: A custom formatter based on backend.If None, defaults to TemplateFormatter base_url : Base url for LLM API. Defaults to None. model_options : Generation options to pass to the LLM. Defaults to None. diff --git a/mellea/backends/watsonx.py b/mellea/backends/watsonx.py index 0fe1c2bd..8e03507f 100644 --- a/mellea/backends/watsonx.py +++ b/mellea/backends/watsonx.py @@ -54,7 +54,7 @@ class WatsonxAIBackend(FormatterBackend): def __init__( self, - model_id: str | ModelIdentifier = model_ids.IBM_GRANITE_3_3_8B, + model_id: str | ModelIdentifier = model_ids.IBM_GRANITE_4_HYBRID_SMALL, formatter: ChatFormatter | None = None, base_url: str | None = None, model_options: dict | None = None, @@ -66,7 +66,7 @@ def __init__( """A generic watsonx backend that wraps around the ibm_watsonx_ai sdk. Args: - model_id : Model id. Defaults to model_ids.IBM_GRANITE_3_3_8B. + model_id : Model id. Defaults to model_ids.IBM_GRANITE_4_HYBRID_SMALL. formatter : input formatter. Defaults to TemplateFormatter in __init__. base_url : url for watson ML deployment. Defaults to env(WATSONX_URL). model_options : Global model options to pass to the model. Defaults to None. diff --git a/test/backends/test_huggingface.py b/test/backends/test_huggingface.py index 6b5bfb85..a8ee0a48 100644 --- a/test/backends/test_huggingface.py +++ b/test/backends/test_huggingface.py @@ -27,7 +27,7 @@ ] from mellea import MelleaSession -from mellea.backends import ModelOption +from mellea.backends import ModelOption, model_ids from mellea.backends.adapters import GraniteCommonAdapter from mellea.backends.cache import SimpleLRUCache from mellea.backends.huggingface import LocalHFBackend, _assert_correct_adapters @@ -46,7 +46,12 @@ @pytest.fixture(scope="module") def backend(): - """Shared HuggingFace backend for all tests in this module.""" + """Shared HuggingFace backend for all tests in this module. + + Uses Granite 3.3-8b for aLoRA adapter compatibility. + The ibm-granite/rag-intrinsics-lib repository only has adapters for + Granite 3.3 models. Granite 4 adapters are not yet available. + """ backend = LocalHFBackend( model_id="ibm-granite/granite-3.3-8b-instruct", formatter=TemplateFormatter(model_id="ibm-granite/granite-4.0-tiny-preview"), diff --git a/test/backends/test_litellm_ollama.py b/test/backends/test_litellm_ollama.py index 7999528a..ece9c890 100644 --- a/test/backends/test_litellm_ollama.py +++ b/test/backends/test_litellm_ollama.py @@ -14,7 +14,7 @@ from mellea.stdlib.context import SimpleContext from mellea.stdlib.sampling import RejectionSamplingStrategy -_MODEL_ID = f"ollama_chat/{model_ids.IBM_GRANITE_4_MICRO_3B.ollama_name}" +_MODEL_ID = f"ollama_chat/{model_ids.IBM_GRANITE_4_HYBRID_MICRO.ollama_name}" @pytest.fixture(scope="function") diff --git a/test/backends/test_openai_ollama.py b/test/backends/test_openai_ollama.py index d95648b0..95989729 100644 --- a/test/backends/test_openai_ollama.py +++ b/test/backends/test_openai_ollama.py @@ -12,7 +12,7 @@ from mellea import MelleaSession from mellea.backends import ModelOption -from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.backends.openai import OpenAIBackend from mellea.core import CBlock, ModelOutputThunk from mellea.formatters import TemplateFormatter @@ -23,8 +23,8 @@ def backend(gh_run: int): """Shared OpenAI backend configured for Ollama.""" return OpenAIBackend( - model_id=IBM_GRANITE_4_MICRO_3B.ollama_name, # type: ignore - formatter=TemplateFormatter(model_id=IBM_GRANITE_4_MICRO_3B.hf_model_name), # type: ignore + model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name, # type: ignore + formatter=TemplateFormatter(model_id=IBM_GRANITE_4_HYBRID_MICRO.hf_model_name), # type: ignore base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1", api_key="ollama", ) diff --git a/test/backends/test_openai_vllm/test_openai_vllm.py b/test/backends/test_openai_vllm/test_openai_vllm.py index 2029dfe5..ad55fe39 100644 --- a/test/backends/test_openai_vllm/test_openai_vllm.py +++ b/test/backends/test_openai_vllm/test_openai_vllm.py @@ -133,7 +133,7 @@ class Answer(pydantic.BaseModel): class TestOpenAIALoraStuff: backend = OpenAIBackend( model_id="ibm-granite/granite-3.3-8b-instruct", - formatter=TemplateFormatter(model_id="ibm-granite/granite-4.0-tiny-preview"), + formatter=TemplateFormatter(model_id="ibm-granite/granite-3.3-8b-instruct"), base_url="http://localhost:8000/v1", api_key="EMPTY", ) diff --git a/test/backends/test_vision_ollama.py b/test/backends/test_vision_ollama.py index e157d0e5..21782b1d 100644 --- a/test/backends/test_vision_ollama.py +++ b/test/backends/test_vision_ollama.py @@ -9,7 +9,7 @@ pytestmark = [pytest.mark.ollama, pytest.mark.llm] from mellea import MelleaSession, start_session -from mellea.backends import ModelOption +from mellea.backends import ModelOption, model_ids from mellea.core import ImageBlock, ModelOutputThunk from mellea.stdlib.components import Instruction, Message diff --git a/test/backends/test_vision_openai.py b/test/backends/test_vision_openai.py index bfcfd681..599d6cbf 100644 --- a/test/backends/test_vision_openai.py +++ b/test/backends/test_vision_openai.py @@ -11,7 +11,7 @@ from mellea import MelleaSession, start_session from mellea.backends import ModelOption -from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.core import ImageBlock, ModelOutputThunk from mellea.stdlib.components import Instruction, Message @@ -21,7 +21,7 @@ def m_session(gh_run): if gh_run == 1: m = start_session( "openai", - model_id=IBM_GRANITE_4_MICRO_3B.ollama_name, # type: ignore + model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name, # type: ignore base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1", api_key="ollama", model_options={ModelOption.MAX_NEW_TOKENS: 5}, diff --git a/test/backends/test_watsonx.py b/test/backends/test_watsonx.py index 6902e675..306bd5d5 100644 --- a/test/backends/test_watsonx.py +++ b/test/backends/test_watsonx.py @@ -18,7 +18,7 @@ ] from mellea import MelleaSession -from mellea.backends import ModelOption +from mellea.backends import ModelOption, model_ids from mellea.backends.watsonx import WatsonxAIBackend from mellea.core import CBlock, ModelOutputThunk from mellea.formatters import TemplateFormatter @@ -32,8 +32,8 @@ def backend(): pytest.skip("Skipping watsonx tests.") else: return WatsonxAIBackend( - model_id="ibm/granite-3-3-8b-instruct", - formatter=TemplateFormatter(model_id="ibm-granite/granite-3.3-8b-instruct"), + model_id=model_ids.IBM_GRANITE_4_HYBRID_SMALL, + formatter=TemplateFormatter(model_id=model_ids.IBM_GRANITE_4_HYBRID_SMALL), ) @@ -223,6 +223,21 @@ async def get_client_async(): assert len(backend._client_cache.cache.values()) == 2 +def test_default_model(): + """Verify WatsonxAIBackend uses correct default model.""" + if int(os.environ.get("CICD", 0)) == 1: + pytest.skip("Skipping watsonx tests.") + + # Create backend without specifying model_id + default_backend = WatsonxAIBackend() + + # Verify it uses IBM_GRANITE_4_HYBRID_SMALL as default + assert default_backend._model_id == model_ids.IBM_GRANITE_4_HYBRID_SMALL, ( + f"Expected default model to be IBM_GRANITE_4_HYBRID_SMALL, " + f"but got {default_backend._model_id}" + ) + + if __name__ == "__main__": import pytest diff --git a/test/core/test_component_typing.py b/test/core/test_component_typing.py index a20de283..19ca2d23 100644 --- a/test/core/test_component_typing.py +++ b/test/core/test_component_typing.py @@ -6,7 +6,7 @@ import mellea.stdlib.functional as mfuncs from mellea import MelleaSession, start_session -from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.backends.ollama import OllamaModelBackend from mellea.core import ( CBlock, @@ -64,10 +64,10 @@ def backend(gh_run: int): """Shared backend.""" if gh_run == 1: return OllamaModelBackend( - model_id=IBM_GRANITE_4_MICRO_3B.ollama_name # type: ignore + model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name # type: ignore ) else: - return OllamaModelBackend(model_id="granite3.3:8b") + return OllamaModelBackend(model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name) # type: ignore @pytest.fixture(scope="module") @@ -117,11 +117,10 @@ def test_incorrect_type_override(): # Marking as qualitative for now since there's so much generation required for this. -# Uses granite3.3:8b (8B, heavy) in local mode +# Uses granite4:micro-h (3B hybrid, lightweight) in local mode @pytest.mark.qualitative @pytest.mark.ollama @pytest.mark.requires_gpu -@pytest.mark.requires_heavy_ram @pytest.mark.llm async def test_generating(session): m = session diff --git a/test/formatters/test_template_formatter.py b/test/formatters/test_template_formatter.py index 5d851053..5dd995b8 100644 --- a/test/formatters/test_template_formatter.py +++ b/test/formatters/test_template_formatter.py @@ -5,7 +5,7 @@ import pytest -from mellea.backends.model_ids import IBM_GRANITE_3_2_8B, ModelIdentifier +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO, ModelIdentifier from mellea.core import CBlock, Component, ModelOutputThunk, TemplateRepresentation from mellea.formatters import TemplateFormatter from mellea.stdlib.components import Instruction, Message, MObject @@ -173,7 +173,7 @@ def _parse(self, computed: ModelOutputThunk) -> str: def test_load_with_model_id(instr: Instruction): - tf = TemplateFormatter(IBM_GRANITE_3_2_8B) + tf = TemplateFormatter(IBM_GRANITE_4_HYBRID_MICRO) tmpl = tf._load_template(instr.format_for_llm()) assert tmpl.name is not None assert "granite" in tmpl.name, ( diff --git a/test/stdlib/components/test_genslot.py b/test/stdlib/components/test_genslot.py index d3814ae4..9f63d4d7 100644 --- a/test/stdlib/components/test_genslot.py +++ b/test/stdlib/components/test_genslot.py @@ -4,7 +4,7 @@ import pytest from mellea import MelleaSession, generative, start_session -from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.backends.ollama import OllamaModelBackend from mellea.core import Requirement from mellea.stdlib.components.genslot import ( @@ -17,13 +17,8 @@ from mellea.stdlib.requirements import simple_validate from mellea.stdlib.sampling import RejectionSamplingStrategy -# Module-level markers: Uses granite3.3:8b (8B, heavy) in local mode -pytestmark = [ - pytest.mark.ollama, - pytest.mark.requires_gpu, - pytest.mark.requires_heavy_ram, - pytest.mark.llm, -] +# Module-level markers: Uses granite4:micro-h (3B hybrid, lightweight) in local mode +pytestmark = [pytest.mark.ollama, pytest.mark.requires_gpu, pytest.mark.llm] @pytest.fixture(scope="module") @@ -31,10 +26,10 @@ def backend(gh_run: int): """Shared backend.""" if gh_run == 1: return OllamaModelBackend( - model_id=IBM_GRANITE_4_MICRO_3B.ollama_name # type: ignore + model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name # type: ignore ) else: - return OllamaModelBackend(model_id="granite3.3:8b") + return OllamaModelBackend(model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name) # type: ignore @generative diff --git a/test/stdlib/test_spans.py b/test/stdlib/test_spans.py index 311ec3c1..06c9f8a2 100644 --- a/test/stdlib/test_spans.py +++ b/test/stdlib/test_spans.py @@ -2,18 +2,13 @@ from mellea.backends import ModelOption from mellea.backends.huggingface import LocalHFBackend -from mellea.backends.model_ids import IBM_GRANITE_3_3_8B +from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO from mellea.core import CBlock from mellea.stdlib.components import SimpleComponent from mellea.stdlib.session import MelleaSession, start_session -# Module-level markers for all tests using granite-3.3-8b (8B model) -pytestmark = [ - pytest.mark.huggingface, - pytest.mark.requires_gpu, - pytest.mark.requires_heavy_ram, - pytest.mark.llm, -] +# Module-level markers for all tests using Granite 4 hybrid micro (3B model) +pytestmark = [pytest.mark.huggingface, pytest.mark.requires_gpu, pytest.mark.llm] # We edit the context type in the async tests below. Don't change the scope here. @@ -21,7 +16,7 @@ def m_session(gh_run): m = start_session( "hf", - model_id=IBM_GRANITE_3_3_8B, + model_id=IBM_GRANITE_4_HYBRID_MICRO, model_options={ModelOption.MAX_NEW_TOKENS: 64}, ) yield m