From 6f24434781a9b4f36eb6138d312f50add6e8a518 Mon Sep 17 00:00:00 2001 From: Pulin Agrawal Date: Sun, 12 Sep 2021 02:37:49 -0700 Subject: [PATCH 1/2] add Google Colab notebook for training models --- wbap_hackathon.ipynb | 327 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 wbap_hackathon.ipynb diff --git a/wbap_hackathon.ipynb b/wbap_hackathon.ipynb new file mode 100644 index 0000000..47880e0 --- /dev/null +++ b/wbap_hackathon.ipynb @@ -0,0 +1,327 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "wbap-hackathon.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "sSZjbw6kWch4" + }, + "source": [ + "# WBAP Hackathon 2021\n", + "A google colab friendly notebook. It will allow you to use colab gpu to train your hackathon model from your code in a github repo. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tm9NDeAhUrCY" + }, + "source": [ + "## Specify the GitHub repository and branch that you want to run from" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XwPnR2ibNlh2" + }, + "source": [ + "branch = 'master'\n", + "github_url = 'https://github.com/pulinagrawal/WM_Hackathon'" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "crY-fJF8UzWl" + }, + "source": [ + "## Run the cells in this section to setup environment \n", + "It can take several minutes to run." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BbsZn_xidsIc", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "be970238-98b9-4db5-ba27-9ee976465627" + }, + "source": [ + "!git clone $github_url\n", + "import os\n", + "from pathlib import Path\n", + "os.chdir(str('WM_Hackathon'))\n", + "!git checkout $branch\n", + "\n", + "!pip install ray[debug]===0.8.7\n", + "!pip install -r requirements.txt\n", + "\n", + "os.chdir(str(Path('..')))\n", + "!git clone https://github.com/Cerenaut/cerenaut-pt-core \n", + "!pwd\n", + "os.chdir(str(Path('.')/'cerenaut-pt-core'))\n", + "!pwd\n", + "!python setup.py develop\n", + "os.chdir(str(Path('..')/'WM_Hackathon'))\n", + "!pwd\n", + "!pip install --ignore-installed --no-deps -e .\n", + "\n", + "# A hack to force the runtime to restart, needed to include the above dependencies.\n", + "import os\n", + "os._exit(0)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'WM_Hackathon'...\n", + "remote: Enumerating objects: 1631, done.\u001b[K\n", + "remote: Counting objects: 100% (158/158), done.\u001b[K\n", + "remote: Compressing objects: 100% (112/112), done.\u001b[K\n", + "remote: Total 1631 (delta 82), reused 103 (delta 46), pack-reused 1473\u001b[K\n", + "Receiving objects: 100% (1631/1631), 63.58 MiB | 37.92 MiB/s, done.\n", + "Resolving deltas: 100% (1031/1031), done.\n", + "Already on 'master'\n", + "Your branch is up to date with 'origin/master'.\n", + "Collecting ray[debug]===0.8.7\n", + " Downloading ray-0.8.7-cp37-cp37m-manylinux1_x86_64.whl (22.0 MB)\n", + "\u001b[K |████████████████████████████████| 22.0 MB 1.4 MB/s \n", + "\u001b[?25hCollecting colorful\n", + " Downloading colorful-0.5.4-py2.py3-none-any.whl (201 kB)\n", + "\u001b[K |████████████████████████████████| 201 kB 42.1 MB/s \n", + "\u001b[?25hCollecting redis<3.5.0,>=3.3.2\n", + " Downloading redis-3.4.1-py2.py3-none-any.whl (71 kB)\n", + "\u001b[K |████████████████████████████████| 71 kB 8.2 MB/s \n", + "\u001b[?25hRequirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.17.3)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.13)\n", + "Collecting gpustat\n", + " Downloading gpustat-0.6.0.tar.gz (78 kB)\n", + "\u001b[K |████████████████████████████████| 78 kB 6.4 MB/s \n", + "\u001b[?25hRequirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.39.0)\n", + "Collecting colorama\n", + " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.23.0)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (0.11.0)\n", + "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.19.5)\n", + "Collecting py-spy>=0.2.0\n", + " Downloading py_spy-0.3.9-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (3.2 MB)\n", + "\u001b[K |████████████████████████████████| 3.2 MB 43.8 MB/s \n", + "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.6.0)\n", + "Requirement already satisfied: google in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.0.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.0.12)\n", + "Collecting opencensus\n", + " Downloading opencensus-0.7.13-py2.py3-none-any.whl (127 kB)\n", + "\u001b[K |████████████████████████████████| 127 kB 29.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.0.2)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (7.1.2)\n", + "Collecting aioredis\n", + " Downloading aioredis-2.0.0-py3-none-any.whl (69 kB)\n", + "\u001b[K |████████████████████████████████| 69 kB 6.4 MB/s \n", + "\u001b[?25hCollecting aiohttp\n", + " Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[K |████████████████████████████████| 1.3 MB 43.5 MB/s \n", + "\u001b[?25hRequirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from grpcio>=1.28.1->ray[debug]===0.8.7) (1.15.0)\n", + "Collecting async-timeout<4.0,>=3.0\n", + " Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (21.2.0)\n", + "Collecting multidict<7.0,>=4.5\n", + " Downloading multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl (142 kB)\n", + "\u001b[K |████████████████████████████████| 142 kB 47.5 MB/s \n", + "\u001b[?25hRequirement already satisfied: chardet<5.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (3.0.4)\n", + "Collecting yarl<2.0,>=1.0\n", + " Downloading yarl-1.6.3-cp37-cp37m-manylinux2014_x86_64.whl (294 kB)\n", + "\u001b[K |████████████████████████████████| 294 kB 34.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: typing-extensions>=3.6.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (3.7.4.3)\n", + "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.7/dist-packages (from yarl<2.0,>=1.0->aiohttp->ray[debug]===0.8.7) (2.10)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from google->ray[debug]===0.8.7) (4.6.3)\n", + "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray[debug]===0.8.7) (7.352.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from gpustat->ray[debug]===0.8.7) (5.4.8)\n", + "Collecting blessings>=1.6\n", + " Downloading blessings-1.7-py3-none-any.whl (18 kB)\n", + "Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray[debug]===0.8.7) (1.26.3)\n", + "Collecting opencensus-context==0.1.2\n", + " Downloading opencensus_context-0.1.2-py2.py3-none-any.whl (4.4 kB)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (2018.9)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (21.0)\n", + "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (57.4.0)\n", + "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (1.53.0)\n", + "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (1.34.0)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (4.2.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (0.2.8)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (4.7.2)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (2.4.7)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (0.4.8)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->ray[debug]===0.8.7) (2021.5.30)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->ray[debug]===0.8.7) (1.24.3)\n", + "Building wheels for collected packages: gpustat\n", + " Building wheel for gpustat (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for gpustat: filename=gpustat-0.6.0-py3-none-any.whl size=12617 sha256=dcff7e2bdb511431712b0755be501970cd5429c93a1d45a59552c043bc53387f\n", + " Stored in directory: /root/.cache/pip/wheels/e6/67/af/f1ad15974b8fd95f59a63dbf854483ebe5c7a46a93930798b8\n", + "Successfully built gpustat\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "crpLmFVqVPf3" + }, + "source": [ + "### Verify that the installed version of ray is '0.8.7' and your current directory is '/content/WM_Hackathon'" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "oqM6Ke2QfBSQ", + "outputId": "038b14fc-96e2-410a-ab61-ff213d9b7c8b" + }, + "source": [ + "import ray\n", + "ray.__version__" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'0.8.7'" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ob-HPAVpULmL", + "outputId": "50c61288-2e55-49ef-ccdc-829f66b1539e" + }, + "source": [ + "import os\n", + "from pathlib import Path\n", + "os.chdir('/content/WM_Hackathon')\n", + "!echo Your current directory is\n", + "!pwd" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Your current directory is\n", + "/content/WM_Hackathon\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5k3X62AfV-1s" + }, + "source": [ + "# Now you can run train_agent.py with your config" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "guWkf0s-gKlG" + }, + "source": [ + "!python train_agent.py m2s-v0 configs/m2s_env.json configs/stub_agent_env_full.json configs/stub_agent_full.json" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lE8hgC0KWGbY" + }, + "source": [ + "## Apologies. Haven't figured out tensorboard yet.\n", + "(This should have technically worked)" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 821 + }, + "id": "b_4Ck7cblmA9", + "outputId": "6b790b76-e40c-4520-ae27-dcb24fd7fef1" + }, + "source": [ + "%reload_ext tensorboard\n", + "%tensorboard --logdir /content/WM_Hackathon/runs" + ], + "execution_count": null, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " (async () => {\n", + " const url = await google.colab.kernel.proxyPort(6007, {\"cache\": true});\n", + " const iframe = document.createElement('iframe');\n", + " iframe.src = url;\n", + " iframe.setAttribute('width', '100%');\n", + " iframe.setAttribute('height', '800');\n", + " iframe.setAttribute('frameborder', 0);\n", + " document.body.appendChild(iframe);\n", + " })();\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + ] +} \ No newline at end of file From 19522ba74c5cf1d78b19d58b1881e77b5ef26c27 Mon Sep 17 00:00:00 2001 From: Pulin Agrawal Date: Sun, 12 Sep 2021 02:53:09 -0700 Subject: [PATCH 2/2] QOL improvements --- wbap_hackathon.ipynb | 955 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 886 insertions(+), 69 deletions(-) diff --git a/wbap_hackathon.ipynb b/wbap_hackathon.ipynb index 47880e0..191187a 100644 --- a/wbap_hackathon.ipynb +++ b/wbap_hackathon.ipynb @@ -63,9 +63,10 @@ "metadata": { "id": "BbsZn_xidsIc", "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 1000 }, - "outputId": "be970238-98b9-4db5-ba27-9ee976465627" + "outputId": "1806875d-7611-48e5-918c-2a4b5b0db8e0" }, "source": [ "!git clone $github_url\n", @@ -85,102 +86,276 @@ "!python setup.py develop\n", "os.chdir(str(Path('..')/'WM_Hackathon'))\n", "!pwd\n", - "!pip install --ignore-installed --no-deps -e .\n", - "\n", - "# A hack to force the runtime to restart, needed to include the above dependencies.\n", - "import os\n", - "os._exit(0)" + "!pip install --ignore-installed --no-deps -e .\n" ], - "execution_count": null, + "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'WM_Hackathon'...\n", - "remote: Enumerating objects: 1631, done.\u001b[K\n", - "remote: Counting objects: 100% (158/158), done.\u001b[K\n", - "remote: Compressing objects: 100% (112/112), done.\u001b[K\n", - "remote: Total 1631 (delta 82), reused 103 (delta 46), pack-reused 1473\u001b[K\n", - "Receiving objects: 100% (1631/1631), 63.58 MiB | 37.92 MiB/s, done.\n", - "Resolving deltas: 100% (1031/1031), done.\n", + "remote: Enumerating objects: 1655, done.\u001b[K\n", + "remote: Counting objects: 100% (182/182), done.\u001b[K\n", + "remote: Compressing objects: 100% (127/127), done.\u001b[K\n", + "remote: Total 1655 (delta 96), reused 117 (delta 55), pack-reused 1473\u001b[K\n", + "Receiving objects: 100% (1655/1655), 63.59 MiB | 25.29 MiB/s, done.\n", + "Resolving deltas: 100% (1045/1045), done.\n", "Already on 'master'\n", "Your branch is up to date with 'origin/master'.\n", "Collecting ray[debug]===0.8.7\n", " Downloading ray-0.8.7-cp37-cp37m-manylinux1_x86_64.whl (22.0 MB)\n", - "\u001b[K |████████████████████████████████| 22.0 MB 1.4 MB/s \n", - "\u001b[?25hCollecting colorful\n", - " Downloading colorful-0.5.4-py2.py3-none-any.whl (201 kB)\n", - "\u001b[K |████████████████████████████████| 201 kB 42.1 MB/s \n", - "\u001b[?25hCollecting redis<3.5.0,>=3.3.2\n", - " Downloading redis-3.4.1-py2.py3-none-any.whl (71 kB)\n", - "\u001b[K |████████████████████████████████| 71 kB 8.2 MB/s \n", - "\u001b[?25hRequirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.17.3)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.13)\n", - "Collecting gpustat\n", - " Downloading gpustat-0.6.0.tar.gz (78 kB)\n", - "\u001b[K |████████████████████████████████| 78 kB 6.4 MB/s \n", - "\u001b[?25hRequirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.39.0)\n", - "Collecting colorama\n", + "\u001b[K |████████████████████████████████| 22.0 MB 1.6 MB/s \n", + "\u001b[?25hCollecting colorama\n", " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.23.0)\n", - "Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (0.11.0)\n", - "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.19.5)\n", "Collecting py-spy>=0.2.0\n", " Downloading py_spy-0.3.9-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (3.2 MB)\n", - "\u001b[K |████████████████████████████████| 3.2 MB 43.8 MB/s \n", - "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.6.0)\n", - "Requirement already satisfied: google in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.0.3)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.0.12)\n", + "\u001b[K |████████████████████████████████| 3.2 MB 40.8 MB/s \n", + "\u001b[?25hCollecting redis<3.5.0,>=3.3.2\n", + " Downloading redis-3.4.1-py2.py3-none-any.whl (71 kB)\n", + "\u001b[K |████████████████████████████████| 71 kB 7.6 MB/s \n", + "\u001b[?25hCollecting aiohttp\n", + " Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[K |████████████████████████████████| 1.3 MB 53.0 MB/s \n", + "\u001b[?25hRequirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (7.1.2)\n", "Collecting opencensus\n", " Downloading opencensus-0.7.13-py2.py3-none-any.whl (127 kB)\n", - "\u001b[K |████████████████████████████████| 127 kB 29.6 MB/s \n", + "\u001b[K |████████████████████████████████| 127 kB 51.5 MB/s \n", "\u001b[?25hRequirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.0.2)\n", - "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (7.1.2)\n", + "Collecting colorful\n", + " Downloading colorful-0.5.4-py2.py3-none-any.whl (201 kB)\n", + "\u001b[K |████████████████████████████████| 201 kB 47.7 MB/s \n", + "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.23.0)\n", + "Collecting gpustat\n", + " Downloading gpustat-0.6.0.tar.gz (78 kB)\n", + "\u001b[K |████████████████████████████████| 78 kB 7.1 MB/s \n", + "\u001b[?25hRequirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.17.3)\n", + "Requirement already satisfied: google in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (2.0.3)\n", "Collecting aioredis\n", " Downloading aioredis-2.0.0-py3-none-any.whl (69 kB)\n", - "\u001b[K |████████████████████████████████| 69 kB 6.4 MB/s \n", - "\u001b[?25hCollecting aiohttp\n", - " Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[K |████████████████████████████████| 1.3 MB 43.5 MB/s \n", - "\u001b[?25hRequirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from grpcio>=1.28.1->ray[debug]===0.8.7) (1.15.0)\n", - "Collecting async-timeout<4.0,>=3.0\n", - " Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)\n", + "\u001b[K |████████████████████████████████| 69 kB 7.4 MB/s \n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.0.12)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (0.11.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (3.13)\n", + "Requirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.39.0)\n", + "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from ray[debug]===0.8.7) (1.19.5)\n", + "Requirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from grpcio>=1.28.1->ray[debug]===0.8.7) (1.15.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (21.2.0)\n", - "Collecting multidict<7.0,>=4.5\n", - " Downloading multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl (142 kB)\n", - "\u001b[K |████████████████████████████████| 142 kB 47.5 MB/s \n", - "\u001b[?25hRequirement already satisfied: chardet<5.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (3.0.4)\n", + "Requirement already satisfied: chardet<5.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (3.0.4)\n", "Collecting yarl<2.0,>=1.0\n", " Downloading yarl-1.6.3-cp37-cp37m-manylinux2014_x86_64.whl (294 kB)\n", - "\u001b[K |████████████████████████████████| 294 kB 34.6 MB/s \n", + "\u001b[K |████████████████████████████████| 294 kB 50.1 MB/s \n", + "\u001b[?25hCollecting multidict<7.0,>=4.5\n", + " Downloading multidict-5.1.0-cp37-cp37m-manylinux2014_x86_64.whl (142 kB)\n", + "\u001b[K |████████████████████████████████| 142 kB 46.9 MB/s \n", "\u001b[?25hRequirement already satisfied: typing-extensions>=3.6.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray[debug]===0.8.7) (3.7.4.3)\n", + "Collecting async-timeout<4.0,>=3.0\n", + " Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)\n", "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.7/dist-packages (from yarl<2.0,>=1.0->aiohttp->ray[debug]===0.8.7) (2.10)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from google->ray[debug]===0.8.7) (4.6.3)\n", "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray[debug]===0.8.7) (7.352.0)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from gpustat->ray[debug]===0.8.7) (5.4.8)\n", "Collecting blessings>=1.6\n", " Downloading blessings-1.7-py3-none-any.whl (18 kB)\n", - "Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray[debug]===0.8.7) (1.26.3)\n", "Collecting opencensus-context==0.1.2\n", " Downloading opencensus_context-0.1.2-py2.py3-none-any.whl (4.4 kB)\n", + "Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray[debug]===0.8.7) (1.26.3)\n", + "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (1.34.0)\n", "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (2018.9)\n", - "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (21.0)\n", "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (57.4.0)\n", "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (1.53.0)\n", - "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (1.34.0)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (4.2.2)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (21.0)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (0.2.8)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (4.7.2)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (4.2.2)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (2.4.7)\n", "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray[debug]===0.8.7) (0.4.8)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->ray[debug]===0.8.7) (2021.5.30)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->ray[debug]===0.8.7) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->ray[debug]===0.8.7) (2021.5.30)\n", "Building wheels for collected packages: gpustat\n", " Building wheel for gpustat (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for gpustat: filename=gpustat-0.6.0-py3-none-any.whl size=12617 sha256=dcff7e2bdb511431712b0755be501970cd5429c93a1d45a59552c043bc53387f\n", + " Created wheel for gpustat: filename=gpustat-0.6.0-py3-none-any.whl size=12617 sha256=683c7a914a57a145666d0ff74f0812aaf214d2e9c5fe3c6af911c6deb4803fa7\n", " Stored in directory: /root/.cache/pip/wheels/e6/67/af/f1ad15974b8fd95f59a63dbf854483ebe5c7a46a93930798b8\n", - "Successfully built gpustat\n" + "Successfully built gpustat\n", + "Installing collected packages: multidict, yarl, opencensus-context, blessings, async-timeout, redis, py-spy, opencensus, gpustat, colorful, colorama, aioredis, aiohttp, ray\n", + "Successfully installed aiohttp-3.7.4.post0 aioredis-2.0.0 async-timeout-3.0.1 blessings-1.7 colorama-0.4.4 colorful-0.5.4 gpustat-0.6.0 multidict-5.1.0 opencensus-0.7.13 opencensus-context-0.1.2 py-spy-0.3.9 ray-0.8.7 redis-3.4.1 yarl-1.6.3\n", + "Collecting pygame==2.0.0.dev4\n", + " Downloading pygame-2.0.0.dev4-cp37-cp37m-manylinux1_x86_64.whl (15.2 MB)\n", + "\u001b[K |████████████████████████████████| 15.2 MB 132 kB/s \n", + "\u001b[?25hCollecting gym==0.17.2\n", + " Downloading gym-0.17.2.tar.gz (1.6 MB)\n", + "\u001b[K |████████████████████████████████| 1.6 MB 41.7 MB/s \n", + "\u001b[?25hCollecting numpy==1.19.1\n", + " Downloading numpy-1.19.1-cp37-cp37m-manylinux2010_x86_64.whl (14.5 MB)\n", + "\u001b[K |████████████████████████████████| 14.5 MB 8.2 kB/s \n", + "\u001b[?25hCollecting setuptools==50.3.0\n", + " Downloading setuptools-50.3.0-py3-none-any.whl (785 kB)\n", + "\u001b[K |████████████████████████████████| 785 kB 24.8 MB/s \n", + "\u001b[?25hCollecting torch==1.6.0\n", + " Downloading torch-1.6.0-cp37-cp37m-manylinux1_x86_64.whl (748.8 MB)\n", + "\u001b[K |████████████████████████████████| 748.8 MB 19 kB/s \n", + "\u001b[?25hCollecting scipy==1.5.2\n", + " Downloading scipy-1.5.2-cp37-cp37m-manylinux1_x86_64.whl (25.9 MB)\n", + "\u001b[K |████████████████████████████████| 25.9 MB 8.0 kB/s \n", + "\u001b[?25hCollecting pillow==8.2.0\n", + " Downloading Pillow-8.2.0-cp37-cp37m-manylinux1_x86_64.whl (3.0 MB)\n", + "\u001b[K |████████████████████████████████| 3.0 MB 43.1 MB/s \n", + "\u001b[?25hCollecting torchvision==0.7.0\n", + " Downloading torchvision-0.7.0-cp37-cp37m-manylinux1_x86_64.whl (5.9 MB)\n", + "\u001b[K |████████████████████████████████| 5.9 MB 27.3 MB/s \n", + "\u001b[?25hRequirement already satisfied: ray==0.8.7 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 9)) (0.8.7)\n", + "Collecting matplotlib==3.3.1\n", + " Downloading matplotlib-3.3.1-cp37-cp37m-manylinux1_x86_64.whl (11.6 MB)\n", + "\u001b[K |████████████████████████████████| 11.6 MB 215 kB/s \n", + "\u001b[?25hRequirement already satisfied: scikit-image==0.16.2 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 11)) (0.16.2)\n", + "Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from gym==0.17.2->-r requirements.txt (line 2)) (1.5.0)\n", + "Requirement already satisfied: cloudpickle<1.4.0,>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from gym==0.17.2->-r requirements.txt (line 2)) (1.3.0)\n", + "Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from torch==1.6.0->-r requirements.txt (line 5)) (0.16.0)\n", + "Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (2.6.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (3.0.12)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (3.7.4.post0)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (0.11.0)\n", + "Requirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (1.39.0)\n", + "Requirement already satisfied: py-spy>=0.2.0 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (0.3.9)\n", + "Requirement already satisfied: colorama in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (0.4.4)\n", + "Requirement already satisfied: google in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (2.0.3)\n", + "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (1.0.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (2.23.0)\n", + "Requirement already satisfied: gpustat in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (0.6.0)\n", + "Requirement already satisfied: opencensus in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (0.7.13)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (3.13)\n", + "Requirement already satisfied: colorful in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (0.5.4)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (7.1.2)\n", + "Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (3.17.3)\n", + "Requirement already satisfied: redis<3.5.0,>=3.3.2 in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (3.4.1)\n", + "Requirement already satisfied: aioredis in /usr/local/lib/python3.7/dist-packages (from ray==0.8.7->-r requirements.txt (line 9)) (2.0.0)\n", + "Requirement already satisfied: certifi>=2020.06.20 in /usr/local/lib/python3.7/dist-packages (from matplotlib==3.3.1->-r requirements.txt (line 10)) (2021.5.30)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib==3.3.1->-r requirements.txt (line 10)) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /usr/local/lib/python3.7/dist-packages (from matplotlib==3.3.1->-r requirements.txt (line 10)) (2.4.7)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib==3.3.1->-r requirements.txt (line 10)) (2.8.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib==3.3.1->-r requirements.txt (line 10)) (1.3.1)\n", + "Requirement already satisfied: imageio>=2.3.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image==0.16.2->-r requirements.txt (line 11)) (2.4.1)\n", + "Requirement already satisfied: networkx>=2.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image==0.16.2->-r requirements.txt (line 11)) (2.6.2)\n", + "Requirement already satisfied: PyWavelets>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image==0.16.2->-r requirements.txt (line 11)) (1.1.1)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from cycler>=0.10->matplotlib==3.3.1->-r requirements.txt (line 10)) (1.15.0)\n", + "Requirement already satisfied: chardet<5.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (3.0.4)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (5.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (3.7.4.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (1.6.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (21.2.0)\n", + "Requirement already satisfied: async-timeout<4.0,>=3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (3.0.1)\n", + "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.7/dist-packages (from yarl<2.0,>=1.0->aiohttp->ray==0.8.7->-r requirements.txt (line 9)) (2.10)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.7/dist-packages (from google->ray==0.8.7->-r requirements.txt (line 9)) (4.6.3)\n", + "Requirement already satisfied: blessings>=1.6 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray==0.8.7->-r requirements.txt (line 9)) (1.7)\n", + "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray==0.8.7->-r requirements.txt (line 9)) (7.352.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from gpustat->ray==0.8.7->-r requirements.txt (line 9)) (5.4.8)\n", + "Requirement already satisfied: opencensus-context==0.1.2 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray==0.8.7->-r requirements.txt (line 9)) (0.1.2)\n", + "Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray==0.8.7->-r requirements.txt (line 9)) (1.26.3)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (21.0)\n", + "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (1.53.0)\n", + "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (1.34.0)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (2018.9)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (0.2.8)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (4.2.2)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (4.7.2)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray==0.8.7->-r requirements.txt (line 9)) (0.4.8)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->ray==0.8.7->-r requirements.txt (line 9)) (1.24.3)\n", + "Building wheels for collected packages: gym\n", + " Building wheel for gym (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for gym: filename=gym-0.17.2-py3-none-any.whl size=1650889 sha256=060b16ed22f09daf9287a06570829358e7df04a266fdd09cd0c945eeee8f0bc4\n", + " Stored in directory: /root/.cache/pip/wheels/18/e1/58/89a2aa24e6c2cc800204fc02010612afdf200926c4d6bfe315\n", + "Successfully built gym\n", + "Installing collected packages: setuptools, pillow, numpy, torch, scipy, matplotlib, torchvision, pygame, gym\n", + " Attempting uninstall: setuptools\n", + " Found existing installation: setuptools 57.4.0\n", + " Uninstalling setuptools-57.4.0:\n", + " Successfully uninstalled setuptools-57.4.0\n", + " Attempting uninstall: pillow\n", + " Found existing installation: Pillow 7.1.2\n", + " Uninstalling Pillow-7.1.2:\n", + " Successfully uninstalled Pillow-7.1.2\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 1.19.5\n", + " Uninstalling numpy-1.19.5:\n", + " Successfully uninstalled numpy-1.19.5\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 1.9.0+cu102\n", + " Uninstalling torch-1.9.0+cu102:\n", + " Successfully uninstalled torch-1.9.0+cu102\n", + " Attempting uninstall: scipy\n", + " Found existing installation: scipy 1.4.1\n", + " Uninstalling scipy-1.4.1:\n", + " Successfully uninstalled scipy-1.4.1\n", + " Attempting uninstall: matplotlib\n", + " Found existing installation: matplotlib 3.2.2\n", + " Uninstalling matplotlib-3.2.2:\n", + " Successfully uninstalled matplotlib-3.2.2\n", + " Attempting uninstall: torchvision\n", + " Found existing installation: torchvision 0.10.0+cu102\n", + " Uninstalling torchvision-0.10.0+cu102:\n", + " Successfully uninstalled torchvision-0.10.0+cu102\n", + " Attempting uninstall: gym\n", + " Found existing installation: gym 0.17.3\n", + " Uninstalling gym-0.17.3:\n", + " Successfully uninstalled gym-0.17.3\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torchtext 0.10.0 requires torch==1.9.0, but you have torch 1.6.0 which is incompatible.\n", + "tensorflow 2.6.0 requires numpy~=1.19.2, but you have numpy 1.19.1 which is incompatible.\n", + "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\n", + "albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n", + "Successfully installed gym-0.17.2 matplotlib-3.3.1 numpy-1.19.1 pillow-8.2.0 pygame-2.0.0.dev4 scipy-1.5.2 setuptools-50.3.0 torch-1.6.0 torchvision-0.7.0\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "PIL", + "matplotlib", + "mpl_toolkits", + "numpy", + "pkg_resources" + ] + } + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'cerenaut-pt-core'...\n", + "remote: Enumerating objects: 37, done.\u001b[K\n", + "remote: Counting objects: 100% (37/37), done.\u001b[K\n", + "remote: Compressing objects: 100% (30/30), done.\u001b[K\n", + "remote: Total 37 (delta 6), reused 36 (delta 5), pack-reused 0\u001b[K\n", + "Unpacking objects: 100% (37/37), done.\n", + "/content\n", + "/content/cerenaut-pt-core\n", + "running develop\n", + "running egg_info\n", + "creating cerenaut_pt_core.egg-info\n", + "writing cerenaut_pt_core.egg-info/PKG-INFO\n", + "writing dependency_links to cerenaut_pt_core.egg-info/dependency_links.txt\n", + "writing top-level names to cerenaut_pt_core.egg-info/top_level.txt\n", + "writing manifest file 'cerenaut_pt_core.egg-info/SOURCES.txt'\n", + "writing manifest file 'cerenaut_pt_core.egg-info/SOURCES.txt'\n", + "running build_ext\n", + "Creating /usr/local/lib/python3.7/dist-packages/cerenaut-pt-core.egg-link (link to .)\n", + "Adding cerenaut-pt-core 1.0 to easy-install.pth file\n", + "\n", + "Installed /content/cerenaut-pt-core\n", + "Processing dependencies for cerenaut-pt-core==1.0\n", + "Finished processing dependencies for cerenaut-pt-core==1.0\n", + "/content/WM_Hackathon\n", + "Obtaining file:///content/WM_Hackathon\n", + "Installing collected packages: gym-game\n", + " Running setup.py develop for gym-game\n", + "Successfully installed gym-game-0.0.1\n" ] } ] @@ -202,13 +377,13 @@ "height": 35 }, "id": "oqM6Ke2QfBSQ", - "outputId": "038b14fc-96e2-410a-ab61-ff213d9b7c8b" + "outputId": "b4adfb85-8a9f-47e3-8c96-86676a2211ff" }, "source": [ "import ray\n", "ray.__version__" ], - "execution_count": 2, + "execution_count": 3, "outputs": [ { "output_type": "execute_result", @@ -221,10 +396,30 @@ ] }, "metadata": {}, - "execution_count": 2 + "execution_count": 3 } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "44aV6gSgZjW_" + }, + "source": [ + "## To enable ray to use GPU\n", + "Change to the following config in your agent_av.json\n", + "```\n", + " \"agent\":{\n", + " \"num_workers\": 1,\n", + " \"num_gpus\": 1,\n", + " \"num_gpus_per_worker\": 1,\n", + " \"rollout_fragment_length\": 50,\n", + " \"gamma\": 0.8\n", + " }\n", + "```\n", + "\n" + ] + }, { "cell_type": "code", "metadata": { @@ -232,7 +427,7 @@ "base_uri": "https://localhost:8080/" }, "id": "Ob-HPAVpULmL", - "outputId": "50c61288-2e55-49ef-ccdc-829f66b1539e" + "outputId": "2f82fe4c-4d30-4811-9eec-f8aac1f2a548" }, "source": [ "import os\n", @@ -241,7 +436,7 @@ "!echo Your current directory is\n", "!pwd" ], - "execution_count": 11, + "execution_count": 4, "outputs": [ { "output_type": "stream", @@ -265,13 +460,634 @@ { "cell_type": "code", "metadata": { - "id": "guWkf0s-gKlG" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "guWkf0s-gKlG", + "outputId": "7729a230-1361-4b98-9644-62387e549dc9" }, "source": [ "!python train_agent.py m2s-v0 configs/m2s_env.json configs/stub_agent_env_full.json configs/stub_agent_full.json" ], - "execution_count": null, - "outputs": [] + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/compat/v2_compat.py:101: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "non-resource variables are not supported in the long term\n", + "lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.\n", + "pygame 2.0.0.dev4 (SDL 2.0.10, python 3.7.11)\n", + "Hello from the pygame community. https://www.pygame.org/contribute.html\n", + "Task Gym[PyGame] environment: m2s-v0\n", + "Task Env config file: configs/m2s_env.json\n", + "Env config file: configs/stub_agent_env_full.json\n", + "Agent config file: configs/stub_agent_full.json\n", + "Env config file: configs/m2s_env.json\n", + "D2MS gParams: {'mainTaskRepeat': 6, 'observationRepeat': 2, 'frameRate': 3, 'imageDir': 'resources/dm2s', 'screen_scale': 0.1, 'summaries': True, 'enable_active_vision': 0, 'videoWidth': 800, 'videoHeight': 800, 'states': {'tutor-show': {'interval': 2000}, 'tutor-feedback': {'interval': 1000, 'flash_interval': 100}, 'inter': {'interval': 1200, 'flash_interval': 400}, 'play-show': {'interval': 5000}, 'play-feedback': {'interval': 1000, 'flash_interval': 100}}, 'gameTypes': ['shape'], 'txTypes': ['None'], 'colors': ['DB'], 'shapes': ['Barred_Ring', 'Triangle', 'Cross', 'Heart']}\n", + "ALSA lib confmisc.c:767:(parse_card) cannot find card '0'\n", + "ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_card_driver returned error: No such file or directory\n", + "ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings\n", + "ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory\n", + "ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name\n", + "ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory\n", + "ALSA lib conf.c:5007:(snd_config_expand) Evaluate error: No such file or directory\n", + "ALSA lib pcm.c:2495:(snd_pcm_open_noupdate) Unknown PCM default\n", + "=======================> CONFIG IS: {'mtl_max_length': 6, 'obs_keys': {'visual': ['full']}, 'pfc': {}, 'gaze': {'dims': 1000}, 'pfc_output_delay_size': 1, 'mtl': {'mtl_max_length': 1}, 'full': {'load': 'checkpoints/full.pt', 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6, 'summaries': False}, 'cortex': {'filters': 100, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'fovea': {'load': None, 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6}, 'cortex': {'filters': 64, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'peripheral': {'load': None, 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6}, 'cortex': {'filters': 64, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'mtl_input_delay_size': 1, 'sc': {}}\n", + ">>>>>>>>>>>>>>>>>> full visual_cortex_input_shape: [-1, 3, 80, 80]\n", + ">>>>>>>>>>>>>>>>>> full retina_output_shape: [-1, 6, 74, 74]\n", + ">>>>>>>>>>>>>>>>>> full visual_cortex_output_shape: [-1, 100, 36, 36]\n", + "Loading parameters from checkpoint: checkpoints/full.pt\n", + "ACTIONS=3\n", + "2021-09-12 09:48:57,658\tINFO resource_spec.py:231 -- Starting Ray with 7.32 GiB memory available for workers and up to 3.67 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2021-09-12 09:48:58,576\tINFO services.py:1193 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8265\u001b[39m\u001b[22m\n", + "Agent.model config: use_lstm --> False\n", + "Agent.model config: fcnet_hiddens --> [256, 256]\n", + "Agent config: num_workers --> 1\n", + "Agent config: num_gpus --> 0\n", + "Agent config: num_gpus_per_worker --> 0\n", + "Agent config: rollout_fragment_length --> 50\n", + "Agent config: gamma --> 0.8\n", + "Agent config:\n", + " {'log_level': 'DEBUG', 'framework': 'torch', 'num_workers': 1, 'model': {'custom_model': 'agent_model', 'fcnet_activation': 'tanh', 'fcnet_hiddens': [256, 256], 'max_seq_len': 50, 'framestack': False, 'custom_model_config': {}, 'use_lstm': False}, 'num_gpus': 0, 'num_gpus_per_worker': 0, 'rollout_fragment_length': 50, 'gamma': 0.8}\n", + "2021-09-12 09:49:01,138\tERROR logger.py:196 -- pip install 'ray[tune]' to see TensorBoard files.\n", + "2021-09-12 09:49:01,139\tWARNING logger.py:330 -- Could not instantiate TBXLogger: No module named 'tensorboardX'.\n", + "2021-09-12 09:49:01,175\tWARNING a3c.py:59 -- `sample_async=True` is not supported for PyTorch! Multithreading can lead to crashes.\n", + "Env config file: configs/m2s_env.json\n", + "D2MS gParams: {'mainTaskRepeat': 6, 'observationRepeat': 2, 'frameRate': 3, 'imageDir': 'resources/dm2s', 'screen_scale': 0.1, 'summaries': True, 'enable_active_vision': 0, 'videoWidth': 800, 'videoHeight': 800, 'states': {'tutor-show': {'interval': 2000}, 'tutor-feedback': {'interval': 1000, 'flash_interval': 100}, 'inter': {'interval': 1200, 'flash_interval': 400}, 'play-show': {'interval': 5000}, 'play-feedback': {'interval': 1000, 'flash_interval': 100}}, 'gameTypes': ['shape'], 'txTypes': ['None'], 'colors': ['DB'], 'shapes': ['Barred_Ring', 'Triangle', 'Cross', 'Heart']}\n", + "ALSA lib confmisc.c:767:(parse_card) cannot find card '0'\n", + "ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_card_driver returned error: No such file or directory\n", + "ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings\n", + "ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory\n", + "ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name\n", + "ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory\n", + "ALSA lib conf.c:5007:(snd_config_expand) Evaluate error: No such file or directory\n", + "ALSA lib pcm.c:2495:(snd_pcm_open_noupdate) Unknown PCM default\n", + "=======================> CONFIG IS: {'mtl_max_length': 6, 'obs_keys': {'visual': ['full']}, 'pfc': {}, 'gaze': {'dims': 1000}, 'pfc_output_delay_size': 1, 'mtl': {'mtl_max_length': 1}, 'full': {'load': 'checkpoints/full.pt', 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6, 'summaries': False}, 'cortex': {'filters': 100, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'fovea': {'load': None, 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6}, 'cortex': {'filters': 64, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'peripheral': {'load': None, 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6}, 'cortex': {'filters': 64, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'mtl_input_delay_size': 1, 'sc': {}}\n", + ">>>>>>>>>>>>>>>>>> full visual_cortex_input_shape: [-1, 3, 80, 80]\n", + ">>>>>>>>>>>>>>>>>> full retina_output_shape: [-1, 6, 74, 74]\n", + ">>>>>>>>>>>>>>>>>> full visual_cortex_output_shape: [-1, 100, 36, 36]\n", + "Loading parameters from checkpoint: checkpoints/full.pt\n", + "2021-09-12 09:49:01,314\tDEBUG rollout_worker.py:957 -- Creating policy for default_policy\n", + "2021-09-12 09:49:01,314\tDEBUG preprocessors.py:225 -- Creating sub-preprocessor for Box(100, 36, 36)\n", + "2021-09-12 09:49:01,315\tDEBUG catalog.py:471 -- Created preprocessor : Dict(full:Box(100, 36, 36)) -> (129600,)\n", + "2021-09-12 09:49:01,320\tINFO catalog.py:314 -- Wrapping as None\n", + "2021-09-12 09:49:02,268\tINFO rollout_worker.py:990 -- Built policy map: {'default_policy': }\n", + "2021-09-12 09:49:02,268\tINFO rollout_worker.py:991 -- Built preprocessor map: {'default_policy': }\n", + "2021-09-12 09:49:02,268\tDEBUG rollout_worker.py:415 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)\n", + "2021-09-12 09:49:02,268\tINFO rollout_worker.py:446 -- Built filter map: {'default_policy': }\n", + "2021-09-12 09:49:02,271\tDEBUG rollout_worker.py:540 -- Created rollout worker with env (), policies {'default_policy': }\n", + "2021-09-12 09:49:02,446\tWARNING util.py:37 -- Install gputil for GPU system monitoring.\n", + "---------------> Created writer at logdir(): runs/Sep12_09-49-02_3c50868695a4\n", + "Training Epoch ~~~~~~~~~> 0\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/compat/v2_compat.py:101: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Instructions for updating:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m non-resource variables are not supported in the long term\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:05,050\tWARNING compression.py:16 -- lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m pygame 2.0.0.dev4 (SDL 2.0.10, python 3.7.11)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Hello from the pygame community. https://www.pygame.org/contribute.html\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Env config file: configs/m2s_env.json\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m D2MS gParams: {'mainTaskRepeat': 6, 'observationRepeat': 2, 'frameRate': 3, 'imageDir': 'resources/dm2s', 'screen_scale': 0.1, 'summaries': True, 'enable_active_vision': 0, 'videoWidth': 800, 'videoHeight': 800, 'states': {'tutor-show': {'interval': 2000}, 'tutor-feedback': {'interval': 1000, 'flash_interval': 100}, 'inter': {'interval': 1200, 'flash_interval': 400}, 'play-show': {'interval': 5000}, 'play-feedback': {'interval': 1000, 'flash_interval': 100}}, 'gameTypes': ['shape'], 'txTypes': ['None'], 'colors': ['DB'], 'shapes': ['Barred_Ring', 'Triangle', 'Cross', 'Heart']}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib confmisc.c:767:(parse_card) cannot find card '0'\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_card_driver returned error: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib conf.c:4528:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib conf.c:5007:(snd_config_expand) Evaluate error: No such file or directory\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ALSA lib pcm.c:2495:(snd_pcm_open_noupdate) Unknown PCM default\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m =======================> CONFIG IS: {'gaze': {'dims': 1000}, 'mtl_max_length': 6, 'obs_keys': {'visual': ['full']}, 'peripheral': {'load': None, 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6}, 'cortex': {'filters': 64, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'pfc_output_delay_size': 1, 'full': {'load': 'checkpoints/full.pt', 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6, 'summaries': False}, 'cortex': {'filters': 100, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'sc': {}, 'mtl': {'mtl_max_length': 1}, 'fovea': {'load': None, 'retina': {'f_size': 7, 'f_sigma': 2.0, 'f_k': 1.6}, 'cortex': {'filters': 64, 'kernel_size': 4, 'stride': 2, 'use_bias': True, 'use_tied_weights': True, 'use_lifetime_sparsity': True, 'encoder_padding': 0, 'decoder_padding': 0, 'encoder_nonlinearity': 'leaky_relu', 'decoder_nonlinearity': 'sigmoid', 'sparsity': 5, 'sparsity_output_factor': 1.0}}, 'pfc': {}, 'mtl_input_delay_size': 1}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m >>>>>>>>>>>>>>>>>> full visual_cortex_input_shape: [-1, 3, 80, 80]\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m >>>>>>>>>>>>>>>>>> full retina_output_shape: [-1, 6, 74, 74]\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m >>>>>>>>>>>>>>>>>> full visual_cortex_output_shape: [-1, 100, 36, 36]\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Loading parameters from checkpoint: checkpoints/full.pt\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:06,395\tDEBUG rollout_worker.py:957 -- Creating policy for default_policy\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:06,395\tDEBUG preprocessors.py:225 -- Creating sub-preprocessor for Box(100, 36, 36)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:06,396\tDEBUG catalog.py:471 -- Created preprocessor : Dict(full:Box(100, 36, 36)) -> (129600,)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:06,399\tINFO catalog.py:314 -- Wrapping as None\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,317\tDEBUG rollout_worker.py:415 -- Creating policy evaluation worker 1 on CPU (please ignore any CUDA init errors)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,319\tDEBUG rollout_worker.py:540 -- Created rollout worker with env (), policies {'default_policy': }\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m /usr/local/lib/python3.7/dist-packages/ray/rllib/utils/torch_ops.py:149: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:141.)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m tensor = torch.from_numpy(np.asarray(item))\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,375\tINFO rollout_worker.py:561 -- Generating sample batch of size 50\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,375\tDEBUG sampler.py:465 -- No episode horizon specified, assuming inf.\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m ---------------> Created writer at logdir(): runs/Sep12_09-49-07_3c50868695a4\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,549\tINFO sampler.py:510 -- Raw obs from env: { 0: { 'agent0': { 'full': np.ndarray((100, 36, 36), dtype=float32, min=-0.0, max=1.156, mean=0.029)}}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,549\tINFO sampler.py:511 -- Info return from env: {0: {'agent0': None}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,550\tINFO sampler.py:714 -- Preprocessed obs: np.ndarray((129600,), dtype=float64, min=-0.0, max=1.156, mean=0.029)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,551\tINFO sampler.py:719 -- Filtered obs: np.ndarray((129600,), dtype=float64, min=-0.0, max=1.156, mean=0.029)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,552\tINFO sampler.py:882 -- Inputs to compute_actions():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'env_id': 0,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'info': None,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((129600,), dtype=float64, min=-0.0, max=1.156, mean=0.029),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_action': np.ndarray((), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_reward': 0.0,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rnn_state': []},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'PolicyEvalData'}]}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:07,572\tINFO sampler.py:926 -- Outputs of compute_actions():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'default_policy': ( np.ndarray((1,), dtype=int64, min=2.0, max=2.0, mean=2.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m [],\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'action_dist_inputs': np.ndarray((1, 3), dtype=float32, min=-0.0, max=0.002, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((1,), dtype=float32, min=-1.099, max=-1.099, mean=-1.099),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((1,), dtype=float32, min=0.333, max=0.333, mean=0.333),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((1,), dtype=float32, min=-0.129, max=-0.129, mean=-0.129)})}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:14,434\tINFO sample_batch_builder.py:204 -- Trajectory fragment after postprocess_trajectory():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'agent0': { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.005, max=0.002, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.101, max=-1.097, mean=-1.098),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.333, max=0.334, mean=0.333),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-1.324, max=0.124, mean=-0.424),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=1826321691.0, max=1826321691.0, mean=1826321691.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'tutor-show', 'new_state': 'tutor-show', 'reward': 0.0, 'action': 2, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.08),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.1),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=49.0, mean=24.5),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-1.464, max=-0.004, mean=-0.507),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=-0.153, max=0.051, mean=-0.082)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:14,470\tINFO rollout_worker.py:595 -- Completed sample batch:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.005, max=0.002, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.101, max=-1.097, mean=-1.098),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.333, max=0.334, mean=0.333),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-1.324, max=0.124, mean=-0.424),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=1826321691.0, max=1826321691.0, mean=1826321691.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'tutor-show', 'new_state': 'tutor-show', 'reward': 0.0, 'action': 2, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.08),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.1),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=49.0, mean=24.5),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-1.464, max=-0.004, mean=-0.507),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=-0.153, max=0.051, mean=-0.082)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:14,488\tINFO rollout_worker.py:663 -- Compute gradients on:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.005, max=0.002, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.101, max=-1.097, mean=-1.098),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.333, max=0.334, mean=0.333),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-1.324, max=0.124, mean=-0.424),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=1826321691.0, max=1826321691.0, mean=1826321691.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'tutor-show', 'new_state': 'tutor-show', 'reward': 0.0, 'action': 2, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.08),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.1),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=49.0, mean=24.5),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-1.464, max=-0.004, mean=-0.507),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=-0.153, max=0.051, mean=-0.082)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:49:14,776\tINFO rollout_worker.py:688 -- Compute grad info:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'batch_count': 50,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'learner_stats': { 'grad_gnorm': np.ndarray((), dtype=float32, min=21.191, max=21.191, mean=21.191),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'policy_entropy': 1.0986121892929077,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'policy_loss': -23.295452117919922,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_loss': 0.3713163733482361}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "2021-09-12 09:49:14,929\tINFO rollout_worker.py:701 -- Apply gradients:\n", + "\n", + "[ np.ndarray((3, 256), dtype=float32, min=-1.782, max=1.246, mean=0.0),\n", + " np.ndarray((3,), dtype=float32, min=-1.746, max=2.863, mean=0.0),\n", + " np.ndarray((256, 129600), dtype=float32, min=-0.062, max=0.056, mean=-0.0),\n", + " np.ndarray((256,), dtype=float32, min=-0.08, max=0.073, mean=-0.002),\n", + " np.ndarray((256, 256), dtype=float32, min=-0.028, max=0.024, mean=0.0),\n", + " np.ndarray((256,), dtype=float32, min=-0.068, max=0.081, mean=-0.0),\n", + " np.ndarray((1, 256), dtype=float32, min=-0.157, max=0.167, mean=0.003),\n", + " np.ndarray((1,), dtype=float32, min=0.424, max=0.424, mean=0.424)]\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/ray/rllib/policy/torch_policy.py:413: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:141.)\n", + " p.grad = torch.from_numpy(g).to(self.device)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + " 0: reward -6.00/ -6.00/ -6.00 len 1.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + " 1: reward -6.00/ -4.00/ -2.00 len 2.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 2: reward -6.00/ -3.56/ -0.67 len 3.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 3: reward -6.00/ -3.42/ 0.00 len 4.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 4: reward -6.00/ -3.13/ 0.40 len 5.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 5: reward -6.00/ -2.83/ 0.67 len 6.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + " 6: reward -6.00/ -2.62/ 0.86 len 7.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:14,815\tINFO sampler.py:714 -- Preprocessed obs: np.ndarray((129600,), dtype=float64, min=-0.0, max=0.829, mean=0.028)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:14,815\tINFO sampler.py:719 -- Filtered obs: np.ndarray((129600,), dtype=float64, min=-0.0, max=0.829, mean=0.028)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:14,816\tINFO sampler.py:882 -- Inputs to compute_actions():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'env_id': 0,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'info': { 'action': 1,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'done': False,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_state': 'play-feedback',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'old_state': 'play-feedback',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'reward': 0.0},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((129600,), dtype=float64, min=-0.0, max=0.829, mean=0.028),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_action': 1,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_reward': 0.0,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rnn_state': []},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'PolicyEvalData'}]}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:14,836\tINFO sampler.py:926 -- Outputs of compute_actions():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'default_policy': ( np.ndarray((1,), dtype=int64, min=1.0, max=1.0, mean=1.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m [],\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'action_dist_inputs': np.ndarray((1, 3), dtype=float32, min=-0.015, max=0.017, mean=0.003),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((1,), dtype=float32, min=-1.117, max=-1.117, mean=-1.117),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((1,), dtype=float32, min=0.327, max=0.327, mean=0.327),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((1,), dtype=float32, min=2.671, max=2.671, mean=2.671)})}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:14,961\tINFO sampler.py:510 -- Raw obs from env: { 0: { 'agent0': { 'full': np.ndarray((100, 36, 36), dtype=float32, min=-0.0, max=1.154, mean=0.029)}}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:14,961\tINFO sampler.py:511 -- Info return from env: { 0: { 'agent0': { 'action': 1,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'done': False,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_state': 'play-show',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'old_state': 'play-feedback',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'reward': 0.0}}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:15,499\tINFO sample_batch_builder.py:204 -- Trajectory fragment after postprocess_trajectory():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'agent0': { 'data': { 'action_dist_inputs': np.ndarray((41, 3), dtype=float32, min=-0.021, max=0.021, mean=0.003),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((41,), dtype=float32, min=-1.123, max=-1.079, mean=-1.102),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((41,), dtype=float32, min=0.325, max=0.34, mean=0.332),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((41,), dtype=int64, min=0.0, max=2.0, mean=0.951),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((41,), dtype=float32, min=-3.659, max=2.504, mean=-2.258),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((41,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((41,), dtype=bool, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((41,), dtype=int64, min=1447594677.0, max=1447594677.0, mean=1447594677.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((41,), dtype=object, head={'old_state': 'tutor-show', 'new_state': 'tutor-show', 'reward': 0.0, 'action': 0, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((41, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.041),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((41, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.041),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((41,), dtype=int64, min=0.0, max=2.0, mean=0.927),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((41,), dtype=float32, min=-1.0, max=1.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((41,), dtype=float32, min=-1.0, max=1.0, mean=-0.024),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((41,), dtype=int64, min=0.0, max=40.0, mean=20.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((41,), dtype=int64, min=15.0, max=15.0, mean=15.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((41,), dtype=float32, min=-0.744, max=1.22, mean=0.128),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((41,), dtype=float32, min=-2.809, max=3.415, mean=2.386)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:15,538\tINFO rollout_worker.py:595 -- Completed sample batch:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.021, max=0.021, mean=0.003),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.123, max=-1.079, mean=-1.102),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.325, max=0.34, mean=0.332),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-3.659, max=2.504, mean=-2.256),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=1.0, mean=0.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=1447594677.0, max=1993923741.0, mean=1545933908.52),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'play-feedback', 'new_state': 'play-feedback', 'reward': 0.0, 'action': 1, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.041),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=0.98),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=1.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=1.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=59.0, mean=26.3),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=14.0, max=15.0, mean=14.82),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-0.744, max=1.22, mean=0.173),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=-2.809, max=3.415, mean=2.429)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:15,559\tINFO rollout_worker.py:663 -- Compute gradients on:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.021, max=0.021, mean=0.003),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.123, max=-1.079, mean=-1.102),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.325, max=0.34, mean=0.332),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=1.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-3.659, max=2.504, mean=-2.256),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=1.0, mean=0.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=1447594677.0, max=1993923741.0, mean=1545933908.52),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'play-feedback', 'new_state': 'play-feedback', 'reward': 0.0, 'action': 1, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.041),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.156, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=0.98),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=1.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=1.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=59.0, mean=26.3),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=14.0, max=15.0, mean=14.82),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-0.744, max=1.22, mean=0.173),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=-2.809, max=3.415, mean=2.429)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:15,894\tINFO rollout_worker.py:688 -- Compute grad info:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'batch_count': 50,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'learner_stats': { 'grad_gnorm': np.ndarray((), dtype=float32, min=126.83, max=126.83, mean=126.83),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'policy_entropy': 1.0985008478164673,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'policy_loss': -124.19155883789062,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_loss': 7.331972599029541}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + " 7: reward -6.00/ -2.40/ 1.00 len 8.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:50:15,979\tINFO rollout_worker.py:561 -- Generating sample batch of size 50\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + " 8: reward -6.00/ -2.19/ 1.11 len 9.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "2021-09-12 09:50:23,692\tINFO rollout_worker.py:701 -- Apply gradients:\n", + "\n", + "[ np.ndarray((3, 256), dtype=float32, min=-3.27, max=3.203, mean=0.0),\n", + " np.ndarray((3,), dtype=float32, min=-3.27, max=2.821, mean=-0.0),\n", + " np.ndarray((256, 129600), dtype=float32, min=-0.022, max=0.021, mean=-0.0),\n", + " np.ndarray((256,), dtype=float32, min=-0.028, max=0.027, mean=-0.001),\n", + " np.ndarray((256, 256), dtype=float32, min=-0.075, max=0.075, mean=-0.0),\n", + " np.ndarray((256,), dtype=float32, min=-0.04, max=0.071, mean=0.001),\n", + " np.ndarray((1, 256), dtype=float32, min=-0.379, max=0.382, mean=-0.018),\n", + " np.ndarray((1,), dtype=float32, min=0.37, max=0.37, mean=0.37)]\n", + "\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 9: reward -6.00/ -2.06/ 1.20 len 10.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 10: reward -6.00/ -1.96/ 1.27 len 11.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 11: reward -6.00/ -1.90/ 1.33 len 12.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + " 12: reward -6.00/ -1.86/ 1.38 len 13.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + " 13: reward -6.00/ -1.81/ 1.43 len 14.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + " 14: reward -6.00/ -1.76/ 1.47 len 15.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:15,993\tINFO sampler.py:714 -- Preprocessed obs: np.ndarray((129600,), dtype=float64, min=-0.0, max=0.979, mean=0.029)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:15,993\tINFO sampler.py:719 -- Filtered obs: np.ndarray((129600,), dtype=float64, min=-0.0, max=0.979, mean=0.029)\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,077\tINFO sample_batch_builder.py:204 -- Trajectory fragment after postprocess_trajectory():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'agent0': { 'data': { 'action_dist_inputs': np.ndarray((32, 3), dtype=float32, min=-0.063, max=0.053, mean=0.004),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((32,), dtype=float32, min=-1.162, max=-1.051, mean=-1.081),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((32,), dtype=float32, min=0.313, max=0.349, mean=0.34),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((32,), dtype=int64, min=0.0, max=2.0, mean=0.875),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((32,), dtype=float32, min=-1.917, max=-0.802, mean=-1.133),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((32,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((32,), dtype=bool, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((32,), dtype=int64, min=80939518.0, max=80939518.0, mean=80939518.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((32,), dtype=object, head={'old_state': 'tutor-show', 'new_state': 'tutor-show', 'reward': 0.0, 'action': 1, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((32, 129600), dtype=float32, min=-0.0, max=1.167, mean=0.045),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((32, 129600), dtype=float32, min=-0.0, max=1.167, mean=0.045),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((32,), dtype=int64, min=0.0, max=2.0, mean=0.812),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((32,), dtype=float32, min=-1.0, max=0.0, mean=-0.031),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((32,), dtype=float32, min=-1.0, max=0.0, mean=-0.062),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((32,), dtype=int64, min=0.0, max=31.0, mean=15.5),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((32,), dtype=int64, min=30.0, max=30.0, mean=30.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((32,), dtype=float32, min=-1.089, max=-0.003, mean=-0.198),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((32,), dtype=float32, min=0.356, max=1.087, mean=0.934)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,115\tINFO rollout_worker.py:595 -- Completed sample batch:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.063, max=0.053, mean=0.004),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.162, max=-1.051, mean=-1.091),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.313, max=0.349, mean=0.336),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=0.96),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-2.318, max=-0.802, mean=-1.293),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=1.0, mean=0.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=80939518.0, max=1095239142.0, mean=446087382.64),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'play-feedback', 'new_state': 'play-feedback', 'reward': 0.0, 'action': 1, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.167, mean=0.041),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.167, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=0.94),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.08),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.1),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=55.0, mean=26.66),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=29.0, max=30.0, mean=29.64),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-1.435, max=0.0, mean=-0.368),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=0.356, max=1.087, mean=0.925)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,134\tINFO rollout_worker.py:663 -- Compute gradients on:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'data': { 'action_dist_inputs': np.ndarray((50, 3), dtype=float32, min=-0.063, max=0.053, mean=0.004),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((50,), dtype=float32, min=-1.162, max=-1.051, mean=-1.091),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((50,), dtype=float32, min=0.313, max=0.349, mean=0.336),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=0.96),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'advantages': np.ndarray((50,), dtype=float32, min=-2.318, max=-0.802, mean=-1.293),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'agent_index': np.ndarray((50,), dtype=int64, min=0.0, max=0.0, mean=0.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'dones': np.ndarray((50,), dtype=bool, min=0.0, max=1.0, mean=0.02),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'eps_id': np.ndarray((50,), dtype=int64, min=80939518.0, max=1095239142.0, mean=446087382.64),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'infos': np.ndarray((50,), dtype=object, head={'old_state': 'play-feedback', 'new_state': 'play-feedback', 'reward': 0.0, 'action': 1, 'done': False}),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.167, mean=0.041),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((50, 129600), dtype=float32, min=-0.0, max=1.167, mean=0.039),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_actions': np.ndarray((50,), dtype=int64, min=0.0, max=2.0, mean=0.94),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.08),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rewards': np.ndarray((50,), dtype=float32, min=-1.0, max=0.0, mean=-0.1),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 't': np.ndarray((50,), dtype=int64, min=0.0, max=55.0, mean=26.66),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'unroll_id': np.ndarray((50,), dtype=int64, min=29.0, max=30.0, mean=29.64),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'value_targets': np.ndarray((50,), dtype=float32, min=-1.435, max=0.0, mean=-0.368),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((50,), dtype=float32, min=0.356, max=1.087, mean=0.925)},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'SampleBatch'}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,465\tINFO rollout_worker.py:688 -- Compute grad info:\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'batch_count': 50,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'learner_stats': { 'grad_gnorm': np.ndarray((), dtype=float32, min=50.009, max=50.009, mean=50.009),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'policy_entropy': 1.097775936126709,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'policy_loss': -70.79490661621094,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_loss': 1.809158205986023}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + " 15: reward -6.00/ -1.73/ 1.50 len 16.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,551\tINFO rollout_worker.py:561 -- Generating sample batch of size 50\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,554\tINFO sampler.py:882 -- Inputs to compute_actions():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'env_id': 0,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'info': { 'action': 2,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'done': False,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_state': 'play-feedback',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'old_state': 'play-show',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'reward': -1.0},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'obs': np.ndarray((129600,), dtype=float64, min=-0.0, max=0.979, mean=0.029),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_action': 2,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'prev_reward': -1.0,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'rnn_state': []},\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'type': 'PolicyEvalData'}]}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,587\tINFO sampler.py:926 -- Outputs of compute_actions():\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'default_policy': ( np.ndarray((1,), dtype=int64, min=2.0, max=2.0, mean=2.0),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m [],\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m { 'action_dist_inputs': np.ndarray((1, 3), dtype=float32, min=-0.051, max=0.052, mean=0.004),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_logp': np.ndarray((1,), dtype=float32, min=-1.092, max=-1.092, mean=-1.092),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'action_prob': np.ndarray((1,), dtype=float32, min=0.336, max=0.336, mean=0.336),\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'vf_preds': np.ndarray((1,), dtype=float32, min=0.515, max=0.515, mean=0.515)})}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m \n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,826\tINFO sampler.py:510 -- Raw obs from env: { 0: { 'agent0': { 'full': np.ndarray((100, 36, 36), dtype=float32, min=-0.0, max=0.964, mean=0.028)}}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 2021-09-12 09:51:16,827\tINFO sampler.py:511 -- Info return from env: { 0: { 'agent0': { 'action': 2,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'done': False,\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'new_state': 'play-feedback',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'old_state': 'play-feedback',\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m 'reward': 0.0}}}\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 16: reward -6.00/ -1.71/ 1.53 len 17.00\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 2, Correct response: 2\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m Response: 1, Correct response: 1\n", + "\u001b[2m\u001b[36m(pid=342)\u001b[0m GAME TYPE: shape\n", + " 17: reward -6.00/ -1.67/ 1.67 len 18.00\n", + "2021-09-12 09:51:31,983\tINFO rollout_worker.py:701 -- Apply gradients:\n", + "\n", + "[ np.ndarray((3, 256), dtype=float32, min=-3.243, max=3.198, mean=0.0),\n", + " np.ndarray((3,), dtype=float32, min=-1.818, max=3.256, mean=-0.0),\n", + " np.ndarray((256, 129600), dtype=float32, min=-0.013, max=0.012, mean=0.0),\n", + " np.ndarray((256,), dtype=float32, min=-0.015, max=0.014, mean=0.0),\n", + " np.ndarray((256, 256), dtype=float32, min=-0.037, max=0.037, mean=-0.0),\n", + " np.ndarray((256,), dtype=float32, min=-0.029, max=0.037, mean=-0.0),\n", + " np.ndarray((1, 256), dtype=float32, min=-0.273, max=0.277, mean=0.012),\n", + " np.ndarray((1,), dtype=float32, min=-0.278, max=-0.278, mean=-0.278)]\n", + "\n", + "Traceback (most recent call last):\n", + " File \"train_agent.py\", line 187, in \n", + " result = agent.train() # Runs a whole Episode, which includes several tasks and a tutoring phase\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/agents/trainer.py\", line 508, in train\n", + " result = Trainable.train(self)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/tune/trainable.py\", line 332, in train\n", + " result = self.step()\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/agents/trainer_template.py\", line 110, in step\n", + " res = next(self.train_exec_impl)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/util/iter.py\", line 758, in __next__\n", + " return next(self.built_iterator)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/util/iter.py\", line 785, in apply_foreach\n", + " for item in it:\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/util/iter.py\", line 845, in apply_filter\n", + " for item in it:\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/util/iter.py\", line 845, in apply_filter\n", + " for item in it:\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/util/iter.py\", line 793, in apply_foreach\n", + " result = fn(item)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/execution/train_ops.py\", line 301, in __call__\n", + " self.workers.local_worker().apply_gradients(gradients)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/rollout_worker.py\", line 717, in apply_gradients\n", + " return self.policy_map[DEFAULT_POLICY_ID].apply_gradients(grads)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/policy/torch_policy_template.py\", line 200, in apply_gradients\n", + " TorchPolicy.apply_gradients(self, gradients)\n", + " File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/policy/torch_policy.py\", line 415, in apply_gradients\n", + " self._optimizers[0].step()\n", + " File \"/usr/local/lib/python3.7/dist-packages/torch/autograd/grad_mode.py\", line 15, in decorate_context\n", + " return func(*args, **kwargs)\n", + " File \"/usr/local/lib/python3.7/dist-packages/torch/optim/adam.py\", line 100, in step\n", + " exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)\n", + "KeyboardInterrupt\n", + "^C\n" + ] + } + ] }, { "cell_type": "markdown", @@ -291,20 +1107,22 @@ "height": 821 }, "id": "b_4Ck7cblmA9", - "outputId": "6b790b76-e40c-4520-ae27-dcb24fd7fef1" + "outputId": "4340422a-a343-4744-db9d-347426b8942d" }, "source": [ "%reload_ext tensorboard\n", "%tensorboard --logdir /content/WM_Hackathon/runs" ], - "execution_count": null, + "execution_count": 7, "outputs": [ { + "output_type": "display_data", "data": { "application/javascript": [ "\n", " (async () => {\n", - " const url = await google.colab.kernel.proxyPort(6007, {\"cache\": true});\n", + " const url = new URL(await google.colab.kernel.proxyPort(6006, {'cache': true}));\n", + " url.searchParams.set('tensorboardColab', 'true');\n", " const iframe = document.createElement('iframe');\n", " iframe.src = url;\n", " iframe.setAttribute('width', '100%');\n", @@ -318,8 +1136,7 @@ "" ] }, - "metadata": {}, - "output_type": "display_data" + "metadata": {} } ] }