Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions crates/traverse-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1620,12 +1620,14 @@ fn render_app_registration_state(
let workflows = app_registration_workflows(manifest_path, manifest)?;
let digest_verification = app_registration_digest_verification(manifest);
let model_readiness = app_registration_model_readiness(manifest);
let model_dependencies = manifest.model_dependencies.clone();
let bundle_fingerprint = serde_json::json!({
"app_id": manifest.app_id.clone(),
"app_version": manifest.version.clone(),
"manifest_digest": manifest_digest.clone(),
"components": components.clone(),
"workflows": workflows.clone(),
"model_dependencies": model_dependencies.clone(),
"model_readiness": model_readiness.clone(),
"effective_config": {
"values": manifest.effective_config.values.clone(),
Expand All @@ -1648,6 +1650,7 @@ fn render_app_registration_state(
"components": components,
"workflows": workflows,
"digest_verification": digest_verification,
"model_dependencies": model_dependencies,
"model_readiness": model_readiness,
"effective_config": {
"values": manifest.effective_config.values.clone(),
Expand Down
8 changes: 4 additions & 4 deletions crates/traverse-registry/src/application_manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,13 +322,13 @@ pub struct ApplicationWorkflowRef {
pub path: String,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct ApplicationEffectiveConfig {
pub values: Value,
pub redacted_secret_keys: Vec<String>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct ApplicationModelDependency {
pub interface_id: String,
pub version_range: String,
Expand All @@ -338,13 +338,13 @@ pub struct ApplicationModelDependency {
pub candidates: Vec<ModelCandidate>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct ModelSelectionPolicy {
pub strategy: String,
pub allow_fallback: bool,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct ModelCandidate {
pub candidate_id: String,
pub provider_capability_id: String,
Expand Down
40 changes: 36 additions & 4 deletions crates/traverse-registry/src/workspace_app_state.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::{
ArtifactDigests, BinaryFormat, BinaryReference, CapabilityArtifactRecord,
CapabilityRegistration, CapabilityRegistry, ComposabilityMetadata, CompositionKind,
CompositionPattern, ImplementationKind, RegistryProvenance, RegistryScope, SourceKind,
SourceReference, WorkflowDefinition, WorkflowRegistration, WorkflowRegistry,
ApplicationModelDependency, ArtifactDigests, BinaryFormat, BinaryReference,
CapabilityArtifactRecord, CapabilityRegistration, CapabilityRegistry, ComposabilityMetadata,
CompositionKind, CompositionPattern, ImplementationKind, RegistryProvenance, RegistryScope,
SourceKind, SourceReference, WorkflowDefinition, WorkflowRegistration, WorkflowRegistry,
};
use serde::Deserialize;
use serde_json::Value;
Expand Down Expand Up @@ -30,6 +30,7 @@ pub struct WorkspaceApplicationRegistration {
pub manifest_path: String,
pub manifest_digest: String,
pub bundle_digest: String,
pub model_dependencies: Vec<ApplicationModelDependency>,
pub state_path: PathBuf,
}

Expand Down Expand Up @@ -69,6 +70,8 @@ struct PersistedWorkspaceApplicationState {
state_scope: String,
components: Vec<PersistedWorkspaceComponent>,
workflows: Vec<PersistedWorkspaceWorkflow>,
#[serde(default)]
model_dependencies: Vec<ApplicationModelDependency>,
registration_fingerprint: Value,
}

Expand Down Expand Up @@ -168,6 +171,7 @@ pub fn load_workspace_application_registries(
manifest_path: state.manifest_path,
manifest_digest: state.manifest_digest,
bundle_digest: state.bundle_digest,
model_dependencies: state.model_dependencies,
state_path,
});
}
Expand Down Expand Up @@ -602,6 +606,10 @@ mod tests {
assert_eq!(loaded.workspace_id, "local");
assert_eq!(loaded.applications.len(), 1);
assert_eq!(loaded.applications[0].app_id, "expedition.readiness");
assert_eq!(
loaded.applications[0].model_dependencies[0].interface_id,
"traverse.inference.generate"
);
assert!(
loaded
.capability_registry
Expand Down Expand Up @@ -1159,6 +1167,30 @@ mod tests {
"workflow_digest": "sha256:test-workflow",
"path": repo.join("workflows/examples/expedition/plan-expedition/workflow.json").display().to_string()
}],
"model_dependencies": [{
"interface_id": "traverse.inference.generate",
"version_range": "^1.0",
"selection_policy": {
"strategy": "priority",
"allow_fallback": true
},
"required_capabilities": ["text_generation"],
"minimum_context_window": 8192,
"candidates": [{
"candidate_id": "ollama-llama-3-2-readiness",
"provider_capability_id": "traverse.inference.generate",
"provider_implementation_id": "ollama.local.generate",
"model_identifier": "llama3.2:3b",
"placement_target": "local",
"priority": 10,
"required_provider_config_keys": ["ollama_base_url"],
"metadata": {
"implementation_kind": "real_local_provider",
"provider": "ollama",
"model_context_window": 8192
}
}]
}],
"effective_config": {
"values": {
"workspace_id": "expedition-local",
Expand Down
143 changes: 141 additions & 2 deletions crates/traverse-runtime/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ use std::fmt;
use std::io::{Read, Write};
use std::net::{TcpStream, ToSocketAddrs};
use std::time::Duration;
use traverse_contracts::ExecutionTarget;
use traverse_registry::{
ApplicationModelDependency, ModelAvailabilityProbe, ModelCandidate, ModelCandidateAvailability,
ModelCandidateRejectionCode, ModelResolutionEvidence, ModelResolutionRequest,
resolve_model_dependency,
ModelCandidateRejectionCode, ModelResolutionEvidence, ModelResolutionPhase,
ModelResolutionRequest, resolve_model_dependency,
};

const OLLAMA_PROVIDER: &str = "ollama";
Expand Down Expand Up @@ -59,6 +60,76 @@ pub struct OllamaInferenceEvidence {
pub selected_model: String,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct GovernedModelExecutionRequest {
pub interface_id: String,
pub prompt: String,
#[serde(default)]
pub system_prompt: Option<String>,
#[serde(default)]
pub options: Value,
pub requested_placement: ExecutionTarget,
#[serde(default)]
pub provider_configs: BTreeMap<String, OllamaProviderConfig>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct GovernedModelExecutionOutcome {
pub output: OllamaInferenceOutput,
pub model_resolution: ModelResolutionEvidence,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum GovernedModelExecutionErrorCode {
InterfaceNotDeclared,
ModelDependencyUnsatisfied,
ProviderExecutionFailed,
}

impl GovernedModelExecutionErrorCode {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::InterfaceNotDeclared => "model_interface_not_declared",
Self::ModelDependencyUnsatisfied => "model_dependency_unsatisfied",
Self::ProviderExecutionFailed => "model_provider_failure",
}
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GovernedModelExecutionError {
pub code: GovernedModelExecutionErrorCode,
pub message: String,
pub model_resolution: Option<Box<ModelResolutionEvidence>>,
}

impl GovernedModelExecutionError {
#[must_use]
pub fn new(code: GovernedModelExecutionErrorCode, message: impl Into<String>) -> Self {
Self {
code,
message: message.into(),
model_resolution: None,
}
}

#[must_use]
pub fn with_model_resolution(mut self, evidence: ModelResolutionEvidence) -> Self {
self.model_resolution = Some(Box::new(evidence));
self
}
}

impl fmt::Display for GovernedModelExecutionError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}: {}", self.code.as_str(), self.message)
}
}

impl std::error::Error for GovernedModelExecutionError {}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OllamaInferenceProvider {
config: OllamaProviderConfig,
Expand All @@ -75,6 +146,10 @@ impl OllamaInferenceProvider {
Ok(Self { config })
}

fn from_validated_config(config: OllamaProviderConfig) -> Self {
Self { config }
}

#[must_use]
pub fn provider_implementation_id(&self) -> &'static str {
"ollama.local.generate"
Expand Down Expand Up @@ -226,6 +301,70 @@ pub fn resolve_ollama_model_dependency(
resolve_model_dependency(dependency, request, probe)
}

/// Resolves and executes one app-declared model dependency through Traverse.
///
/// The caller supplies runtime-local provider configuration, while the selected
/// provider/model must come from the registered app dependency declaration.
///
/// # Errors
///
/// Returns [`GovernedModelExecutionError`] when the dependency does not match
/// the requested interface, no model candidate can be selected, selected
/// provider config is unavailable, or real provider execution fails.
pub fn execute_governed_ollama_model_dependency(
dependency: &ApplicationModelDependency,
request: &GovernedModelExecutionRequest,
) -> Result<GovernedModelExecutionOutcome, GovernedModelExecutionError> {
if dependency.interface_id != request.interface_id {
return Err(GovernedModelExecutionError::new(
GovernedModelExecutionErrorCode::InterfaceNotDeclared,
"requested inference interface is not declared by this app dependency",
));
}

let probe = request.provider_configs.iter().fold(
OllamaModelAvailabilityProbe::default(),
|probe, (implementation_id, config)| {
probe.with_provider_config(implementation_id.clone(), config.clone())
},
);
let resolution_request = ModelResolutionRequest {
phase: ModelResolutionPhase::Execution,
requested_interface_id: request.interface_id.clone(),
requested_placement: request.requested_placement.clone(),
};
let evidence = resolve_ollama_model_dependency(dependency, &resolution_request, &probe);
let Some(selected) = evidence.selected.as_ref() else {
return Err(GovernedModelExecutionError::new(
GovernedModelExecutionErrorCode::ModelDependencyUnsatisfied,
"no app-declared model candidate satisfied execution-time resolution",
)
.with_model_resolution(evidence));
};
let provider = OllamaInferenceProvider::from_validated_config(
request.provider_configs[&selected.provider_implementation_id].clone(),
);
let output = provider
.generate(&OllamaInferenceRequest {
model: selected.model_identifier.clone(),
prompt: request.prompt.clone(),
system_prompt: request.system_prompt.clone(),
options: request.options.clone(),
})
.map_err(|error| {
GovernedModelExecutionError::new(
GovernedModelExecutionErrorCode::ProviderExecutionFailed,
error.to_string(),
)
.with_model_resolution(evidence.clone())
})?;

Ok(GovernedModelExecutionOutcome {
output,
model_resolution: evidence,
})
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum OllamaInferenceErrorCode {
Expand Down
Loading
Loading