Skip to content

sini/gen-schema

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

85 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

gen-schema

A typed record registry for Nix with extension points, strict validation, refinement contracts, identity hashing, cross-instance references, first-class mixins, introspection, and declarative methods. Built on the NixOS module system.

gen-schema gives you what lib.types.submodule doesn't: open kind definitions that any module can extend, strict-by-default validation that catches typos immediately, refinement contracts co-located with type declarations, stable identity comparison via id_hash, cross-registry references that resolve to instances, reusable mixins with structural compatibility, and auto-generated documentation from your schema.

Quick Start

As a flake-parts module

# flake.nix
{
  inputs.gen-schema.url = "github:sini/gen-schema";
  inputs.flake-parts.url = "github:hercules-ci/flake-parts";

  outputs = inputs: inputs.flake-parts.lib.mkFlake { inherit inputs; } {
    imports = [ inputs.gen-schema.flakeModules.default ];

    # Define a kind
    schema.host = {
      options.addr = lib.mkOption { type = lib.types.str; };
      options.role = lib.mkOption { type = lib.types.str; default = "worker"; };
    };

    # Create a registry and instances
    options.hosts = schemaLib.mkInstanceRegistry config.schema "host" {};
    hosts.igloo = { addr = "10.0.1.1"; role = "web"; };
    hosts.iceberg = { addr = "10.0.2.1"; };  # role defaults to "worker"

    # Use them
    flake.fleet = {
      iglooAddr = config.hosts.igloo.addr;     # → "10.0.1.1"
      iglooHash = config.hosts.igloo.id_hash;  # → deterministic SHA-256
    };
  };
}

The flake-parts module provides schema and schemaLib with default settings (strict = true, no baseModule). For custom strict, baseModule, sidecars, or computed settings, use the programmatic API instead.

Programmatic use

# Without flake-parts — call the library directly
let
  schemaLib = gen-schema.lib;
  # or: schemaLib = import ./path/to/gen-schema/nix/lib { inherit lib; };
in
lib.evalModules {
  modules = [{
    options.schema = schemaLib.mkSchemaOption {};
    config.schema.host.options.addr = lib.mkOption { type = lib.types.str; };
  }];
}

Without flakes

let
  schemaLib = import ./path/to/gen-schema/nix/lib { inherit lib; };
in
# use schemaLib.mkSchemaOption, schemaLib.mkInstanceRegistry, etc.

Use Cases

Plugin system — extensible application config

A base application defines its schema. Plugins extend it from external flake inputs without touching the base:

# Base app — defines the plugin kind
config.schema.plugin = {
  options.enabled = lib.mkOption { type = lib.types.bool; default = true; };
  options.priority = lib.mkOption { type = lib.types.int; default = 50; };
};

# Logging plugin (separate flake input) — extends the kind
config.schema.plugin.options.logLevel = lib.mkOption {
  type = lib.types.enum [ "debug" "info" "warn" "error" ];
  default = "info";
};

# Metrics plugin (another flake input) — extends the same kind
config.schema.plugin.options.metricsEndpoint = lib.mkOption {
  type = lib.types.nullOr lib.types.str;
  default = null;
};

# Instances — validated against the merged schema from all inputs
config.plugins.logging = { logLevel = "debug"; priority = 10; };
config.plugins.metrics = { metricsEndpoint = "/metrics"; };
config.plugins.logging.badOption = "x";  # → STRICT MODE error with fix guidance

Microservice registry — services referencing each other

config.schema.service = {
  options.port = lib.mkOption { type = lib.types.int; };
  options.protocol = lib.mkOption { type = lib.types.str; default = "http"; };
  options.healthPath = lib.mkOption { type = lib.types.str; default = "/health"; };
};

# Services can reference each other (direct ref — registry in scope)
options.services = schemaLib.mkInstanceRegistry config.schema "service" {
  extraModules = [({ ... }: {
    options.upstream = lib.mkOption {
      type = lib.types.nullOr (schemaLib.ref config.services);
      default = null;
      description = "Upstream service this proxies to";
    };
  })];
};

config.services.api = { port = 8080; };
config.services.gateway = { port = 443; upstream = "api"; };

# Ref resolves to the full instance — accepts string keys or instance values:
config.services.gateway.upstream.port  # → 8080

Kubernetes resources — typed manifests with cross-references

config.schema.namespace = {
  options.labels = lib.mkOption { type = lib.types.attrsOf lib.types.str; default = {}; };
};

config.schema.deployment = {
  options.replicas = lib.mkOption { type = lib.types.int; default = 1; };
  options.image = lib.mkOption { type = lib.types.str; };
  options.containerPort = lib.mkOption { type = lib.types.int; };
};

config.schema.service = {
  options.port = lib.mkOption { type = lib.types.int; };
  options.targetPort = lib.mkOption { type = lib.types.int; };
};

# Deployments reference their namespace (deferred ref on kind + binding)
config.schema.deployment.options.namespace = lib.mkOption {
  type = schemaLib.ref "namespace";
};
options.deployments = schemaLib.mkInstanceRegistry config.schema "deployment" {
  refs.namespace = config.namespaces;
};

config.namespaces.production = { labels.env = "prod"; };
config.deployments.api = {
  namespace = "production";  # → resolves to the namespace instance
  image = "myapp:v1.2.3";
  replicas = 3;
  containerPort = 8080;
};

config.deployments.api.namespace.labels.env  # → "prod"

Homelab config — hosts with environment inheritance

# Shared base for all entity types
options.schema = schemaLib.mkSchemaOption {
  baseModule.options.tags = lib.mkOption {
    type = lib.types.listOf lib.types.str;
    default = [];
  };
};

config.schema.host = {
  options.ip = lib.mkOption { type = lib.types.str; };
  options.os = lib.mkOption { type = lib.types.str; default = "nixos"; };
  methods.sshCmd = schemaLib.schemaFn
    "SSH command for this host"
    lib.types.str
    ({ name, ip, ... }: "ssh root@${ip} # ${name}");
};

config.schema.network = {
  options.cidr = lib.mkOption { type = lib.types.str; };
  options.gateway = lib.mkOption { type = lib.types.str; };
};

# Hosts reference their network (deferred ref)
config.schema.host.options.network = lib.mkOption {
  type = schemaLib.ref "network";
};
options.hosts = schemaLib.mkInstanceRegistry config.schema "host" {
  refs.network = config.networks;
};
options.networks = schemaLib.mkInstanceRegistry config.schema "network" {};

config.networks.lan = { cidr = "10.0.1.0/24"; gateway = "10.0.1.1"; };
config.hosts.nas = {
  ip = "10.0.1.10";
  network = "lan";
  tags = [ "storage" "backup" ];
};

config.hosts.nas.sshCmd        # → "ssh root@10.0.1.10 # nas"
config.hosts.nas.network.cidr  # → "10.0.1.0/24"
config.hosts.nas.tags          # → [ "storage" "backup" ] (from baseModule)

Core Concepts

Kinds

A kind is a named record type. Declare one by setting config.schema.<name>:

config.schema.host = {
  options.addr = lib.mkOption { type = lib.types.str; };
  options.system = lib.mkOption { type = lib.types.str; };
  options.role = lib.mkOption { type = lib.types.str; };
  config.system = lib.mkDefault "x86_64-linux";
};

Kinds are deferred modules — they define options and config but aren't evaluated until imported by an instance.

Kind names starting with _ are reserved for internal use (_meta, _strict). They are excluded from _meta.kindNames and renderDocs.

Extension

Any module can extend any kind. Extensions merge through deferred module merge:

# Module A declares base host options
config.schema.host.options.addr = lib.mkOption { type = str; };

# Module B (maybe from another flake input) adds monitoring fields
config.schema.host.options.metricsPort = lib.mkOption { type = int; default = 9100; };
config.schema.host.options.monitored = lib.mkOption { type = bool; default = true; };

Both contributions merge cleanly. Neither module needs to know about the other.

Base Module

A module injected into every kind automatically. Use it for options shared across all kinds without manual imports:

options.schema = schemaLib.mkSchemaOption {
  baseModule = {
    options.description = lib.mkOption {
      type = lib.types.str;
      default = "";
      description = "Human-readable description.";
    };
  };
};

# Every kind gets `description` for free:
config.fleet.hosts.igloo.description  # → ""
config.fleet.users.tux.description    # → ""

baseModule is static — set at mkSchemaOption call time, not extensible by downstream modules. For extensible shared bases, use the kind mix-in pattern instead (a shared kind imported by others via imports).

Default Propagation

Kind modules can set default config values. These flow through to every instance via deferred module merge:

config.schema.host = {
  options.system = lib.mkOption { type = lib.types.str; };
  config.system = lib.mkDefault "x86_64-linux";
};

config.fleet.hosts.igloo = {};          # system → "x86_64-linux"
config.fleet.hosts.mac.system = "aarch64-darwin";  # override works

This is standard NixOS module system behavior — lib.mkDefault sets a low-priority value that any explicit setting overrides.

Strict Validation

Kinds are strict by default — undeclared keys error immediately with a fix suggestion:

STRICT MODE: "addrr" is not declared on host.
Fix: schema.host.options.addrr = lib.mkOption { ... };

Opt out per-kind:

config.schema.host._module.freeformType = lib.types.attrsOf lib.types.anything;

Or globally:

options.schema = schemaLib.mkSchemaOption { strict = false; };

Instances

Instances are concrete values of a kind. Create them with mkInstanceRegistry:

options.fleet.hosts = schemaLib.mkInstanceRegistry config.schema "host" {};

config.fleet.hosts.igloo = {
  addr = "10.0.1.1";
  role = "web";
  # system defaults to "x86_64-linux"
};

config.fleet.hosts.iceberg = {
  addr = "10.0.2.1";
  system = "aarch64-linux";
};

Each instance:

  • Gets a name option defaulting to the attrset key
  • Gets _module.args.<kind> = config for self-reference
  • Gets id_hash — a stable SHA-256 for safe comparison
  • Inherits the kind's strict/freeform setting

Nested Registries

Registries can nest inside instances via extraModules. This establishes parent-child relationships structurally:

# Capture the top-level schema before entering extraModules closures
let schema = config.schema;
in {
  options.fleet.hosts = schemaLib.mkInstanceRegistry schema "host" {
    extraModules = [({ config, ... }:
      let hostConfig = config;  # capture the host instance's config
      in {
        options.users = schemaLib.mkInstanceRegistry schema "user" {
          extraModules = [
            # Inject parent host into child user's module args
            ({ ... }: { config._module.args.host = hostConfig; })
          ];
        };
      }
    )];
  };
}

config.fleet.hosts.igloo = {
  addr = "10.0.1.1";
  users.tux.shell = "/bin/zsh";
  users.deploy.shell = "/bin/sh";
};

# Child instances access their parent:
config.fleet.hosts.igloo.users.tux._module.args.host.addr  # → "10.0.1.1"

Note the scoping: schema is captured at the top level (before the extraModules closure), and hostConfig captures the host instance's config (before the nested extraModules closure). Without these captures, config inside the closures would shadow the outer config.

Cross-entity bindings (_module.args.host = hostConfig) are the consumer's responsibility via extraModules. The schema library doesn't impose nesting semantics — different consumers wire cross-entity context differently.

Per-Kind Strict Override

Individual registries can override the schema-level strict setting:

# Schema is strict by default
options.schema = schemaLib.mkSchemaOption { strict = true; };

# But this specific registry allows freeform
options.fleet.configs = schemaLib.mkInstanceRegistry config.schema "config" {
  strict = false;
};

Identity Hashing

Nix's == on module system values can diverge or infinitely recurse. id_hash gives you a cheap, stable string comparison:

# Safe entity comparison
builtins.filter (h: h.id_hash != host.id_hash) allHosts

# Set membership
lib.elem target.id_hash (map (h: h.id_hash) candidates)

The hash is computed from all non-internal primitive options (str, int, bool), prefixed by the kind name. Two hosts with the same values hash identically. A host and a user with the same name hash differently (kind prefix).

id_hash is marked internal = true and readOnly = true — it won't appear in NixOS option documentation generators, but is always accessible via instance.id_hash.

Three-layer precedence for key selection:

  1. Explicit _identity.keys — list the exact keys. Multiple modules can contribute via mkMerge.
  2. identity = false — exclude individual options from reflection.
  3. Auto-reflection — all non-internal primitives included (default).
# Layer 1: explicit keys — composable across modules
# Module A:
config.schema.host.config._identity.keys = [ "name" "addr" ];
# Module B (extends the kind):
config.schema.host.config._identity.keys = [ "vpnAlias" ];
# Result: [ "name" "addr" "vpnAlias" ] — list merge via mkMerge

# Layer 2: exclude an option from reflection
options.description = lib.mkOption { type = str; } // { identity = false; };

# Layer 3: automatic (default) — all non-internal str/int/bool options

Explicit keys are validated — referencing a nonexistent option or a non-primitive type throws at eval time:

_identity.keys: 'nonexistent' is not declared on kind 'host'
_identity.keys: 'tags' on kind 'host' is not a primitive type (str/int/bool)

Cross-Instance References

schema.ref declares a reference to another kind's instances. Two modes:

Deferred ref — declare on the kind, bind at registry time:

# Kind declares referential intent
config.schema.service.options.host = lib.mkOption {
  type = schemaLib.ref "host";
};

# Registry binds the ref to a concrete registry
options.fleet.services = schemaLib.mkInstanceRegistry config.schema "service" {
  refs.host = config.fleet.hosts;
};

Direct ref — resolve immediately when the registry is in scope:

options.fleet.services = schemaLib.mkInstanceRegistry config.schema "service" {
  extraModules = [({ ... }: {
    options.upstream = lib.mkOption {
      type = lib.types.nullOr (schemaLib.ref config.fleet.services);
      default = null;
    };
  })];
};

Both modes accept string keys or instance values:

config.fleet.services.nginx.host = "igloo";                    # string key → lookup
config.fleet.services.gateway.upstream = config.fleet.services.nginx;  # instance → passthrough

config.fleet.services.nginx.host.addr  # → "10.0.1.1"
config.fleet.services.gateway.upstream.port  # → 80

Invalid references throw at eval time:

ref field 'host' on kind 'service': reference 'nonexistent' not found in instance registry

Refs in Collections

ref works inside listOf and nullOr wrappers at any nesting depth:

config.schema.service.options.replicas = lib.mkOption {
  type = lib.types.listOf (schemaLib.ref "host");
  default = [];
};

# String keys and instance values both work:
config.fleet.services.nginx.replicas = [ "igloo" "iceberg" ];
config.fleet.services.nginx.replicas = [ config.fleet.hosts.igloo "iceberg" ];

# Nullable and nested wrappers:
type = lib.types.nullOr (lib.types.listOf (schemaLib.ref "host"));

Custom Ref Coercion

For domain-specific resolution, pass an extended binding with a coerce function:

options.fleet.services = schemaLib.mkInstanceRegistry config.schema "service" {
  refs.host = {
    instances = config.fleet.hosts;
    coerce = default: val:
      if val == "primary" then config.fleet.hosts.igloo
      else default;
  };
};

default is a lazy thunk of the standard coercion result — only forced if you select it. In listOf context, default is a single-element list and the hook can return multiple instances (1→many expansion).

Deferred Coerce (Self-Referential Registries)

When a registry's ref field points back to itself (e.g., a trait's needs referencing other traits in the same registry), standard coerce hooks cause infinite recursion — the coerce chain accesses the registry, which triggers apply, which runs the coerce chain again.

Set deferred = true to defer coercion to the applyPipeline (after all instances are evaluated). The coerce hook receives the raw materialized instances as its first argument, breaking the cycle:

options.traits = schemaLib.mkInstanceRegistry config.schema "trait" {
  refs.needs = {
    instances = config.traits;
    deferred = true;
    # 3-arg signature: registry is the raw instances (not config.traits)
    coerce = registry: default: val:
      if isSelector val then resolveAgainst registry val
      else default;
  };
};

Signature difference: Non-deferred hooks take 2 args (default: val:). Deferred hooks take 3 args (registry: default: val:), where registry is the pre-apply instance attrset. gen-schema pre-applies the registry, so mkCoerceChain sees a standard 2-arg function internally.

Deferred coerce runs before validators in applyPipeline, so validators see resolved instances and can check properties like .name on referenced entries.

Deduplicated Sets

setOf deduplicates by id_hash, preserving first-seen order:

config.schema.group.options.members = lib.mkOption {
  type = schemaLib.setOf (schemaLib.ref "host");
  default = [];
};

config.fleet.groups.web.members = [ "igloo" "iceberg" "igloo" ];
# → [ igloo iceberg ] — duplicate removed by identity hash

Composes with custom coerce hooks — expansion produces duplicates, setOf removes them.

Parent-Child Topology

Kinds can declare their parent kind via the parent sidecar. This establishes a schema-level nesting relationship:

config.schema.host = {
  options.addr = lib.mkOption { type = lib.types.str; };
};

config.schema.user = {
  parent = "host";  # users nest inside hosts
  options.shell = lib.mkOption { type = lib.types.str; };
};

The parent sidecar is optional — kinds without it are root kinds. The schema derives both directions:

config.schema._meta.topology.host   # → { parent = null; children = [ "user" ]; }
config.schema._meta.topology.user   # → { parent = "host"; children = []; }

Declaring a parent that doesn't exist as a schema kind throws at eval time.

Schema Introspection

Every schema has _meta for programmatic access:

config.schema._meta.kindNames    # → [ "host" "service" "user" ]
config.schema._meta.roots        # → [ "host" ]  — kinds with no parent
config.schema._meta.leaves       # → [ "user" ]  — kinds with no children

# Per-kind metadata
meta = config.schema._meta.kindMeta "host";
meta.optionNames   # → [ "addr" "role" ... ]
meta.options       # → full option declarations
meta.refs          # → { }  (ref fields on this kind)

# Unified edge view (Neron scope graph P + I edges)
config.schema._meta.edges
# → [
#   { from = "user"; to = "host"; type = "parent"; field = null; }
#   { from = "service"; to = "host"; type = "ref"; field = "host"; }
# ]

# Ref edges only
config.schema._meta.refEdges
# → [ { from = "service"; field = "host"; to = "host"; } ]

_meta.edges combines parent edges (from topology) and ref edges (from schema.ref declarations) into a single typed list. Every edge has { from, to, type, field }field is null for parent edges and the option name for ref edges.

Scope Graph Bridge

buildKindGraph and buildInstanceGraph convert schema metadata into the { parentGraph, importGraph, decls, types } structure that scope-engine's buildNodes expects:

# Kind-level graph: kinds as nodes, topology as edges
kindGraph = schemaLib.buildKindGraph config.schema;
kindGraph.parentGraph.vertices   # → [ "host" "service" "user" ]
kindGraph.parentGraph.edges      # → [ { from = "user"; to = "host"; } ]

# Instance-level graph: instances as nodes, resolved refs as edges
fleet = { host = config.hosts; service = config.services; };
instanceGraph = schemaLib.buildInstanceGraph config.schema fleet;
instanceGraph.parentGraph.vertices  # → [ "host:igloo" "host:iceberg" ... ]
instanceGraph.importGraph.edges     # → [ { from = "service:nginx"; to = "host:igloo"; } ]

Kind Mix-ins

A kind can import another kind's schema, inheriting all options:

config.schema.user = {
  options.userName = lib.mkOption { type = str; };
  options.shell = lib.mkOption { type = str; default = "/bin/bash"; };
};

config.schema.admin-user = {
  imports = [ config.schema.user ];  # inherits userName, shell
  options.sudoPrivileges = lib.mkOption { type = bool; default = true; };
  options.sshKeys = lib.mkOption { type = listOf str; default = []; };
};

Each gets its own registry. Identity hashes include the kind prefix — a user "root" and an admin "root" hash differently.

Multiple mix-ins compose cleanly:

config.schema.deploy-user = {
  imports = [
    config.schema.user
    config.schema.ssh-access
    config.schema.sudo-access
  ];
};

Declarative Methods

schemaFn declares functions on entity instances. Named arguments are automatically resolved from the instance's config:

config.schema.host.methods.describe = schemaLib.schemaFn
  "Human-readable summary"
  lib.types.str
  ({ name, role, addr, ... }: "${name} (${role}) at ${addr}");

# On instances:
config.fleet.hosts.igloo.describe  # → "igloo (web) at 10.0.1.1"

Methods can close over values from the declaring module's scope:

# hasService captures config.fleet.services from the module;
# name comes from the host instance's config
config.schema.host.methods.hasService = schemaLib.schemaFn
  "Check if a service targets this host"
  (lib.types.functionTo lib.types.bool)
  ({ name, ... }:
    serviceName:
    let services = config.fleet.services;
    in services ? ${serviceName}
       && services.${serviceName}.host.name == name);

config.fleet.hosts.igloo.hasService "nginx"     # → true
config.fleet.hosts.igloo.hasService "postgres"   # → false

Methods compose across modules — multiple modules can each add methods to the same kind:

# Module A
config.schema.host.methods.ping = schemaLib.schemaFn
  "Ping command" lib.types.str
  ({ addr, ... }: "ping ${addr}");

# Module B (separate file, separate flake input — doesn't matter)
config.schema.host.methods.ssh = schemaLib.schemaFn
  "SSH command" lib.types.str
  ({ name, ... }: "ssh ${name}");

# Both methods available on every host instance:
config.fleet.hosts.igloo.ping  # → "ping 10.0.1.1"
config.fleet.hosts.igloo.ssh   # → "ssh igloo"

If two modules declare the same method name, the later definition wins (attrset // semantics).

Methods with arguments that don't match any config key produce a clear error:

method 'bad' on host: references config keys 'nonexistent' which are not declared on this kind

Methods must be declared via inline attrsets, not path modules. This is a constraint shared with all sidecar fields.

Sidecar Fields

Declare custom sidecar fields on kinds — data extracted from definitions before module merge and exposed on the merged result:

options.schema = schemaLib.mkSchemaOption {
  sidecars = {
    includes = { default = []; };           # list → merged via ++
    excludes = { default = []; };           # list → merged via ++
    metadata = { default = {}; };           # attrset → merged via //
    priority = { default = 0; merge = _acc: val: val; };  # explicit: last-wins
  };
};

config.schema.host = {
  includes = [ policy-a policy-b ];
  options.addr = lib.mkOption { type = str; };
};

# Read sidecar values directly:
config.schema.host.includes  # → [ policy-a policy-b ]

Merge strategy inference:

Default type Inferred merge Example
List ([]) acc ++ val includes, excludes
Attrset ({}) acc // val metadata, methods (built-in)
Other Explicit merge required priority = { default = 0; merge = _acc: val: val; }

Providing a non-list, non-attrset default without an explicit merge function throws at evaluation time.

Sidecar keys are stripped before the deferred module merge — they never leak into the module system. Sidecars must be declared via inline attrsets, not path modules (path defs get the sidecar's default value).

methods is a built-in sidecar with { default = {}; }. User-declared sidecars are additional. __functor is reserved and cannot be used as a sidecar key.

Multiple modules contributing to the same sidecar merge according to the sidecar's strategy:

# Module A
config.schema.host.includes = [ policy-a ];

# Module B
config.schema.host.includes = [ policy-b policy-c ];

# Result: [ policy-a policy-b policy-c ]

Computed Fields

Derived values computed from sidecar content and raw definitions:

options.schema = schemaLib.mkSchemaOption {
  sidecars = {
    includes = { default = []; };
    excludes = { default = []; };
  };
  computed = sidecars: defs: {
    isEntity =
      let
        sidecarKeys = lib.attrNames sidecars;
        hasStructuralContent = lib.any (d:
          let v = d.value;
              stripped = if builtins.isAttrs v
                then builtins.removeAttrs v sidecarKeys else v;
          in !builtins.isAttrs stripped || stripped != {}
        ) defs;
      in
      sidecars.includes != []
      || sidecars.excludes != []
      || hasStructuralContent;
  };
};

config.schema.host.isEntity   # → true (has includes)
config.schema.conf.isEntity   # → false (empty — shared base only)

Introspection

Every schema has _meta for programmatic access:

config.schema._meta.kindNames                # → [ "host" "service" "user" ]

# Per-kind metadata — evaluates a throwaway instance to reflect on options
meta = config.schema._meta.kindMeta "host";
meta.optionNames   # → [ "addr" "describe" "hasService" "metricsPort" ... ]
meta.options       # → full option declarations (type, description, default, ...)

kindMeta is lazy — the lib.evalModules call only fires when accessed. Querying an undeclared kind throws:

kindMeta: 'nonexistent' is not a declared schema kind

Schema Validators

Declare cross-field validation constraints on kinds. Validators are a built-in sidecar — they travel with the kind and run automatically on every registry of that kind.

config.schema.host.validators = [
  (gen.mkValidator "has-addr"
    ({ addr, ... }: addr != "")
    "host must have a non-empty addr")
  (gen.mkValidator "valid-role"
    ({ role, ... }: lib.elem role [ "web" "db" "worker" ])
    "role must be one of: web, db, worker")
];

Validators compose across modules — multiple modules can contribute validators to the same kind via the sidecar ++ merge:

# Module A
config.schema.host.validators = [ (gen.mkValidator "a" ...) ];
# Module B
config.schema.host.validators = [ (gen.mkValidator "b" ...) ];
# Both fire on every host registry

When validation fails, errors accumulate (not short-circuit) and include the instance name, validator name, and message:

schema validation failed:
  host 'igloo': has-addr — host must have a non-empty addr
  host 'iceberg': valid-role — role must be one of: web, db, worker

For standalone validation without throwing, use validateInstances:

result = schemaLib.validateInstances config.schema "host" config.fleet.hosts;
# → { right = instances; } or { left = [ { name; validator; message; } ]; }

Derive Hooks

derive and deriveEither on mkInstanceRegistry compute values from the full evaluated registry and merge them back at high priority. The pipeline is: validate → derive → apply.

Plain derive — attrset in, attrset out:

options.fleet.users = schemaLib.mkInstanceRegistry config.schema "user" {
  derive = users:
    let uids = assignIds { min = 1000; max = 60000; } users;
    in lib.mapAttrs (name: _: { uid = uids.${name}; }) users;
  extraModules = [({ ... }: {
    options.uid = lib.mkOption { type = lib.types.int; readOnly = true; internal = true; };
  })];
};

config.fleet.users.tux.uid  # → 34213 (deterministic from id_hash)

Derive can read id_hash and all instance config — it runs after full module system evaluation. Derived fields must be internal = true (excluded from id_hash to avoid cycles) and readOnly = true (the derive hook is the only writer).

deriveEither — returns Either with configurable error handling:

options.fleet.services = schemaLib.mkInstanceRegistry config.schema "service" {
  deriveEither = {
    derive = services: someEitherPipeline services;
    onError = left: lib.warn "enrichment failed" {};  # optional, default throws
  };
};

derive and deriveEither are mutually exclusive. onError receives the left value — throw, warn, or return a fallback attrset. The default onError throws with a formatted message.

Validator errors flow through the same onError handler — a custom onError on deriveEither handles both validator failures and derive failures.

Documentation Generation

renderDocs produces markdown reference from schema metadata:

schemaLib.renderDocs config.schema

Outputs a table per kind with option name, type, default, and description — including extensions from composition and methods.

API Reference

mkSchemaOption

mkSchemaOption {
  strict ? true,        # strict-by-default validation on instances
  baseModule ? null,     # module imported into every kind
  sidecars ? {},         # { name = { default; merge? }; } — user-defined sidecar fields
  computed ? null,       # (sidecars -> defs -> attrset) — derived fields on merged result
}

Returns lib.mkOption — use as options.schema = mkSchemaOption { ... }.

mkSchemaEntryType is also exported for advanced use — it returns the raw deferredModule type used for schema kind values, without wrapping in mkOption or adding _meta/_strict. Most consumers should use mkSchemaOption.

mkInstanceType

mkInstanceType schema kind {
  extraModules ? [],     # additional modules (cross-entity bindings, den-specific options)
  strict ? schema._strict or true,
}

Returns lib.types.submodule — the type for a single instance of a kind.

mkInstanceRegistry

mkInstanceRegistry schema kind {
  extraModules ? [],
  refs ? {},             # bindings for deferred refs (see below)
  strict ? schema._strict or true,
  description ? "${kind} instances",
  derive ? null,         # { name → instance } → { name → attrset } — plain enrichment
  deriveEither ? null,   # { derive; onError? } — Either-based enrichment
}

Returns lib.mkOption with type = attrsOf (mkInstanceType ...) and an apply pipeline that runs validators then derive.

derive and deriveEither are mutually exclusive.

refs binds deferred ref fields to concrete registries. Three forms:

# Simple — registry directly:
refs.host = config.fleet.hosts;

# Extended — with custom coercion (2-arg):
refs.host = {
  instances = config.fleet.hosts;
  coerce = default: val: ...;  # default is lazy thunk of standard result
};

# Deferred — for self-referential registries (3-arg):
refs.needs = {
  instances = config.traits;
  deferred = true;
  coerce = registry: default: val: ...;  # registry = raw pre-apply instances
};

deferred = true runs coercion inside applyPipeline (after instances are materialized) instead of at option-apply time. The custom hook receives the raw instances as registry — use this instead of capturing the config value in the closure. Required when the ref field points back to the same registry being defined.

ref

ref target

target is a string → deferred ref (kind name, bound via refs on mkInstanceRegistry). target is an attrset → direct ref (resolved immediately). Both modes accept string keys or instance values.

setOf

setOf elemType

A list type that deduplicates by id_hash, preserving first-seen order. Only meaningful with ref element types — setOf requires instance refs. Composes with custom coerce hooks: expansion produces duplicates, setOf removes them. Uses nestedTypes.elemType so getRefKind traverses through it like listOf.

toSet

toSet instances

Converts a list of instances to a set with O(1) membership lookup via attrset backing. Deduplicates by id_hash (first-seen wins), so safe to call on any instance list. Returns:

{
  member = x: ...;  # O(1) membership test (à la Data.Set.member)
  toList = [ ... ];  # deduplicated list, first-seen order
  length = n;        # number of unique instances
}

schemaFn

schemaFn description type fn

Declares a method on a kind. fn receives an attrset of config values matching its named arguments. Declare via schema.<kind>.methods.<name> = schemaFn ....

mkValidator (from gen)

gen.mkValidator name pred message

Creates a validator record. pred receives the instance config and returns bool. Declare via schema.<kind>.validators = [ (gen.mkValidator ...) ]. Provided by gen, not gen-schema.

validateInstances

validateInstances schema kind instances

Runs the kind's validators against instances. Returns { right = instances; } on success or { left = [ { name; validator; message; } ]; } on failure. Does not throw — returns Either for consumer-controlled handling.

renderDocs

renderDocs schema

Returns a markdown string with a table per kind.

buildKindGraph

buildKindGraph schema

Returns { parentGraph, importGraph, decls, types } — the kind-level scope graph. Kinds are nodes, topology provides parent edges, refs provide import edges. Compatible with scope-engine's buildNodes.

buildInstanceGraph

buildInstanceGraph schema fleet

Returns { parentGraph, importGraph, decls, types } — the instance-level scope graph. fleet is { kindName = { instanceName = instanceConfig; }; }. Instance refs are resolved to import edges.

_internal

schemaLib._internal.mkMethodsModule   # methods option/config wiring

Not part of the public API contract. Available for testing and advanced use.

Identity, strict, validation, and ref primitives are in gen — import gen directly if you need them outside of gen-schema.

schema.types.refined

Refinement contracts co-located with type declarations. Predicates validate during applyPipeline (strict by default).

# Single refinement
port = mkOption {
  type = schema.types.refined lib.types.int {
    check = self: self > 0 && self < 65536;
    message = "must be valid TCP port";
  };
};

# Composed refinements (all must pass)
port = mkOption {
  type = schema.types.refined lib.types.int [
    { check = self: self > 0; message = "must be positive"; }
    { check = self: self < 65536; message = "must be < 65536"; }
  ];
};

# Reusable
port = mkOption { type = schema.types.refined lib.types.int schema.refinements.tcpPort; };

Set lazy = true on a refinement to defer validation to access time via builtins.addErrorContext:

{ check = self: self > 0; message = "must be positive"; lazy = true; }

schema.refinements.*

Built-in reusable refinements: tcpPort, nonEmpty, positive. Use with schema.types.refined to avoid repeating common predicates.

schema.blame

Field-level error attribution for structured contract violations.

schema.blame "fieldName" "error message"
# → { __blame = true; field = "fieldName"; message = "error message"; }

schema.mkFieldValidator

Row-polymorphic validators with automatic field filtering. Validators with fields are automatically skipped for kinds that don't have all required fields. Validators without fields run unconditionally (backwards compatible).

schema.mkFieldValidator {
  name = "https-port";
  fields = [ "port" "protocol" ];
  check = inst: !(inst.protocol == "https" && inst.port == 80);
  message = "HTTPS should not use port 80";
}

schema.mkMixin

First-class reusable schema fragments with structural compatibility. define receives a record-algebra record and returns a plain attrset.

monitorable = schema.mkMixin {
  requires = [ "port" "hostname" ];
  provides = [ "metrics_port" ];
  # kinds = [ "service" ];  # optional kind constraint
  define = parent: {
    metrics_port = (record.select parent "port") + 1000;
  };
};

schema.composeMixins

Compose multiple mixins into one. Requires propagation: earlier mixins' provides satisfy later mixins' requires.

enhanced = schema.composeMixins [ monitorable loggable healthcheck ];

# Mixed direction: beta mixin is overridden by what came before
combined = schema.composeMixins [
  monitorable
  (schema.beta tlsBase)  # Beta: existing fields win over tlsBase's
  loggable
];

schema.beta

Annotates a mixin for Beta direction — parent controls, meaning existing fields take precedence over the mixin's contributions.

schema.emitModule

Bridges record-algebra records to NixOS modules. Strips refinement metadata from types. Extracts sidecars with full shadow stacks.

emitted = schema.emitModule [ "validators" "methods" ] recordAlgebraRecord;
# → { module = <NixOS module>; sidecars = { ... }; refinements = { ... }; }

mkSchemaEntryType mixins parameter

Mixins are auto-applied when baseModule is an inline attrset:

mkSchemaEntryType {
  mixins = [ monitorable loggable ];
  baseModule = {
    port = mkOption { type = types.int; };
    hostname = mkOption { type = types.str; };
  };
}

Academic Foundations

Feature Paper
Refinement contracts Findler & Felleisen — Contracts for Higher-Order Functions (ICFP 2002), co-location from Rondon et al. — Liquid Types (PLDI 2008)
Lazy contracts Chitil — Practical Typed Lazy Contracts (ICFP 2012)
Record algebra (gen) Leijen — Extensible Records with Scoped Labels (TFP 2005)
Mixin composition Bracha & Cook — Mixin-Based Inheritance (OOPSLA 1990)
NixOS module bridge Cardelli — Program Fragments, Linking, and Modularization (POPL 1997)
Scope graphs Néron et al. — A Theory of Name Resolution (ESOP 2015)

Architecture

Schema kinds (deferred modules, parent sidecar, ref types)
  ↓ imported by
Instance types (submodules with strict + identity injected)
  ↓ collected into
Instance registries (attrsOf instance type, ref binding via apply)
  ↓ referenced by             ↓ introspected by
Cross-instance refs        _meta (topology, edges, roots, leaves)
  (schema.ref)                ↓ bridged to
                        Scope graph (buildKindGraph, buildInstanceGraph)

Kinds are pure schema — options, config, defaults, methods, sidecars. No strict validation or identity hashing at the kind level.

Instances add infrastructuremkInstanceType injects mkStrictModule and mkIdentityModule. This separation means kind-level composition via imports works without duplicate module conflicts.

Sidecars are extracted before merge — sidecar keys on kind definitions are folded, merged, and exposed on the result. They never enter the deferred module merge.

File Layout

nix/lib/
  default.nix       — public API surface, wiring (imports gen for primitives)
  entry-type.nix     — mkSchemaEntryType, mkSchemaOption (sidecar extraction, _meta, topology)
  instance.nix       — mkInstanceType, mkInstanceRegistry (strict + identity injection, refs)
  ref.nix            — schema.ref (dual-mode cross-instance references, getRefKind)
  scope-graph.nix    — buildKindGraph, buildInstanceGraph (scope-engine bridge)
  methods.nix        — schemaFn, mkMethodsModule (method option/config generation)
  validate.nix       — validateInstances (schema-specific wrapper around gen.runValidators)
  docs.nix           — renderDocs (markdown generation)
nix/flakeModule.nix  — flake-parts integration (provides schema option + schemaLib)

Identity hashing (mkIdentityModule), strict validation (mkStrictModule), validators (mkValidator, runValidators), and cross-instance references (mkRefType) are provided by gen and consumed internally.

Demo

See templates/demo/ for a complete fleet management example using flake-parts + import-tree. The demo exercises all features: kinds, instances, strict validation, identity hashing, cross-instance references, schema composition, kind mix-ins, declarative methods, and documentation generation.

cd templates/demo
nix eval --override-input gen-schema ../.. .#fleet
nix eval --override-input gen-schema ../.. .#docs --raw

Testing

Tests use nix-unit in templates/ci/:

cd templates/ci
nix develop --override-input gen-schema ../.. -c nix-unit \
  --override-input gen-schema ../.. --flake .#.tests

License

MIT

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors