From 1da41ad12cbf03f6fe49aca578e4cb34d9065298 Mon Sep 17 00:00:00 2001 From: Melissa Du <61556662+melissaxdu@users.noreply.github.com> Date: Fri, 2 Jun 2023 14:17:56 -0700 Subject: [PATCH 1/3] Add files via upload --- training.json | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 training.json diff --git a/training.json b/training.json new file mode 100644 index 000000000..a1038c4f9 --- /dev/null +++ b/training.json @@ -0,0 +1,110 @@ +{ + "$schema": "https://github.com/mosaicml/llm-foundry/blob/main/mcli/mcli-1b-max-seq-len-8k.yaml", + "$id": "https://docs.mosaicml.com/projects/mcli/en/latest/training/yaml_schema.html", + "title": "JSON Schema for MosaicML YAML", + "description": "YAML schema for MosaicML training configuration", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "image": { + "type": "string" + }, + "command": { + "type": "string" + }, + "compute": { + "type": "object", + "properties": { + "gpus": { + "type": "integer", + "description": "number of gpus" + }, + "cluster": { + "type": "string", + "description": "name of cluster" + }, + "gpu_type": { + "type": "string" + }, + "instance": { + "type": "string" + }, + "nodes": { + "type": "string" + }, + "cpus": { + "type": "integer" + } + }, + "required": [ "cluster" ] + }, + "scheduling": { + "type": "object", + "properties": { + "priority": { + "type": "string" + } + } + }, + "integrations": { + "type": "array", + "items": { + "type": "object", + "properties": { + "integration_type": { + "type": "string", + "enum": [ + "git_repo", + "apt_packages", + "pip_packages", + "wandb" + ] + }, + "git_repo": { + "type": "string" + }, + "git_branch": { + "type": "string" + }, + "pip_install": { + "type": "string" + }, + "packages": { + "type": "array", + "items": { + "type": "string" + } + }, + "project": { + "type": "string" + }, + "entity": { + "type": "string" + } + }, + "required": [ "integration_type" ] + } + }, + "env_variables": { + "type": "object", + "properties": { + "key": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "required": [ "key", "value" ] + }, + "parameters": { + "type": "object" + }, + "metadata": { + "type": "string" + } + }, + "required": ["name", "image", "command"] + } From 029e07b06f14d27f7a3f57c56a037d6ceb39b788 Mon Sep 17 00:00:00 2001 From: Melissa Du <61556662+melissaxdu@users.noreply.github.com> Date: Fri, 2 Jun 2023 15:38:14 -0700 Subject: [PATCH 2/3] Update training.json --- training.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training.json b/training.json index a1038c4f9..2b0276f38 100644 --- a/training.json +++ b/training.json @@ -1,7 +1,7 @@ { "$schema": "https://github.com/mosaicml/llm-foundry/blob/main/mcli/mcli-1b-max-seq-len-8k.yaml", "$id": "https://docs.mosaicml.com/projects/mcli/en/latest/training/yaml_schema.html", - "title": "JSON Schema for MosaicML YAML", + "title": "JSON Schema for MosaicML Training YAML", "description": "YAML schema for MosaicML training configuration", "type": "object", "properties": { From f533da39c9c968de40c511b26ec398e5d23309f0 Mon Sep 17 00:00:00 2001 From: Melissa Du <61556662+melissaxdu@users.noreply.github.com> Date: Tue, 6 Jun 2023 20:55:32 -0700 Subject: [PATCH 3/3] Update training.json --- training.json | 240 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 177 insertions(+), 63 deletions(-) diff --git a/training.json b/training.json index 2b0276f38..93e6172c6 100644 --- a/training.json +++ b/training.json @@ -1,110 +1,224 @@ { - "$schema": "https://github.com/mosaicml/llm-foundry/blob/main/mcli/mcli-1b-max-seq-len-8k.yaml", "$id": "https://docs.mosaicml.com/projects/mcli/en/latest/training/yaml_schema.html", "title": "JSON Schema for MosaicML Training YAML", - "description": "YAML schema for MosaicML training configuration", + "description": "YAML schema for MosaicML training configuration. See documentation for more details.", "type": "object", "properties": { "name": { - "type": "string" + "type": "string", + "description": "Name of run." }, "image": { - "type": "string" + "type": "string", + "description": "Path to model Docker image. Images on DockerHub can be configured as 'organization/image_name'." }, "command": { - "type": "string" + "type": "string", + "description": "Command executed at start of run, typically to launch your training jobs and scripts. Composer launch commands are included here. Command can incorporate environment variables defined in the 'env_variables' field." }, "compute": { + "$ref": "#/$defs/ComputingConfig" + }, + "scheduling": { + "$ref": "#/$defs/SchedulingConfig" + }, + "integrations": { + "type": "array", + "description": "List of integrations to customize aspects of both the run setup and environment. Some integrations may require adding secrets. See documentation for available integrations and how to implement them.", + "items": { + "oneOf": [ + { "$ref":"#/$defs/apt_packages" }, + { "$ref":"#/$defs/git_repo" }, + { "$ref":"#/$defs/wandb" }, + { "$ref":"#/$defs/pip_packages" }, + { "$ref":"#/$defs/comet_ml" } + ], + "description": "Type of integration. Can be 'git_repo', 'apt_packages', 'pip_packages', 'comel_ml', or 'wandb'." + } + }, + "env_variables": { + "type": "array", + "description": "List of variables that can be accessed by the 'command' field.", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "Name used to access the value of environment variable." + }, + "value": { + "type": "string", + "description": "Value of environment variable." + } + }, + "required": [ "key", "value" ] + } + }, + "parameters": { "type": "object", + "description": "Provided parameters are mounted as a YAML file of your run at '/mnt/config/parameters.yaml'." + + }, + "metadata": { + "type": "object", + "description": "Multi-purposed, unstructured place to put information about a run. Can be updated while run is running to export metrics and other information from the run." + } + }, + "required": ["name", "image", "command"], + + "$defs": { + "SchedulingConfig": { + "type": "object", + "description": "How the MosaicML platform’s scheduler will manage your run.", + "properties": { + "priority": { + "type": "string", + "description": "Priority level of run. Can be 'low', 'medium', or 'high'." + } + } + }, + "ComputingConfig": { + "type": "object", + "description": "Compute resources to request for your run. The MosaicML platform will try and infer which compute resources to use automatically, but some fields may be required depending on which and what types of clusters are available to you.", "properties": { "gpus": { "type": "integer", - "description": "number of gpus" + "description": "Number of gpus (required unless nodes is specified or run is cpu-only)." }, "cluster": { "type": "string", - "description": "name of cluster" + "description": "Name of cluster (required if you have multiple clusters)." }, "gpu_type": { - "type": "string" + "type": "string", + "description": "Type of gpus (optional)." }, "instance": { - "type": "string" + "type": "string", + "description": "Explicit instance name within the cluster (optional)." }, "nodes": { - "type": "string" + "type": "string", + "description": "Number of clusters (optional unless gpus is not specified or run is cpu-only)." }, "cpus": { - "type": "integer" + "type": "integer", + "description": "Number of cpus (optional)." } - }, - "required": [ "cluster" ] + } }, - "scheduling": { + "apt_packages": { "type": "object", "properties": { - "priority": { - "type": "string" + "integration_type": { + "type": "string", + "enum": [ "apt_packages" ] + }, + "packages": { + "type": "array", + "items": { + "type": "string" + } + }, + "upgrade": { + "type": "boolean" } - } + }, + "required": [ "integration_type", "packages" ] }, - "integrations": { - "type": "array", - "items": { - "type": "object", - "properties": { - "integration_type": { - "type": "string", - "enum": [ - "git_repo", - "apt_packages", - "pip_packages", - "wandb" - ] - }, - "git_repo": { - "type": "string" - }, - "git_branch": { - "type": "string" - }, - "pip_install": { - "type": "string" - }, - "packages": { - "type": "array", - "items": { - "type": "string" - } - }, - "project": { - "type": "string" - }, - "entity": { + "pip_packages": { + "type": "object", + "properties": { + "integration_type": { + "type": "string", + "enum": [ "pip_packages" ] + }, + "packages": { + "type": "array", + "items": { "type": "string" } - }, - "required": [ "integration_type" ] - } + } + }, + "required": [ "integration_type", "packages" ] }, - "env_variables": { + "git_repo": { "type": "object", "properties": { - "key": { + "integration_type": { + "type": "string", + "enum": [ "git_repo" ] + }, + "git_repo": { + "type": "string" + }, + "git_branch": { + "type": "string" + }, + "path": { + "type": "string" + }, + "ssh_clone": { + "type": "boolean" + }, + "pip_install": { + "type": "string" + }, + "host": { "type": "string" }, - "value": { + "git_commit": { "type": "string" + }, + "git_clone_recursive": { + "type": "boolean" } }, - "required": [ "key", "value" ] + "required": [ "integration_type", "git_repo" ] }, - "parameters": { - "type": "object" + "wandb": { + "type": "object", + "properties": { + "integration_type": { + "type": "string", + "enum": [ "wandb" ] + }, + "project": { + "type": "string" + }, + "entity": { + "type": "string" + }, + "group": { + "type": "string" + }, + "jobType": { + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ "integration_type" ] }, - "metadata": { - "type": "string" + "comet_ml": { + "type": "object", + "properties": { + "integration_type": { + "type": "string", + "enum": [ "comet_ml" ] + }, + "project": { + "type": "string" + }, + "workspace": { + "type": "string" + } + }, + "required": [ "integration_type" ] } - }, - "required": ["name", "image", "command"] + } }