From 953e41a2211a93990e544d779d4a94b3ef1e3ec2 Mon Sep 17 00:00:00 2001 From: Gregory Horvath Date: Fri, 25 Oct 2024 19:00:38 -0400 Subject: [PATCH 1/3] add standard python .gitignore to avoid checking in venv --- .gitignore | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ce03cf6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ From 4b9567575f5de5b86f5730709f23386f3c107679 Mon Sep 17 00:00:00 2001 From: Gregory Horvath Date: Fri, 25 Oct 2024 19:49:09 -0400 Subject: [PATCH 2/3] add ignores for various directories used in this demo --- .gitignore | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.gitignore b/.gitignore index ce03cf6..8af9042 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,14 @@ +############################################################################### +# Repo specific .gitignore +############################################################################### +llama.cpp/ +original_model/ +quantized_model/ + + +############################################################################### +# Standard .gitignore file for Python projects +############################################################################### # Byte-compiled / optimized / DLL files __pycache__/ From 1dc6c90a044a7667ff6f695d800c984918148c61 Mon Sep 17 00:00:00 2001 From: Gregory Horvath Date: Fri, 25 Oct 2024 19:51:45 -0400 Subject: [PATCH 3/3] fix typos, pin llama.cpp, fix some drift in names --- QuantizeLLMs.ipynb | 429 ++++++++++++++++++++++----------------------- 1 file changed, 205 insertions(+), 224 deletions(-) diff --git a/QuantizeLLMs.ipynb b/QuantizeLLMs.ipynb index a5acf46..b7bd45f 100644 --- a/QuantizeLLMs.ipynb +++ b/QuantizeLLMs.ipynb @@ -3,8 +3,8 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -18,7 +18,7 @@ }, "outputs": [], "source": [ - "!!git clone https://github.com/ggerganov/llama.cpp" + "!!git clone --branch b3978 https://github.com/ggerganov/llama.cpp.git" ] }, { @@ -29,188 +29,181 @@ }, "outputs": [], "source": [ - "!cd llama.cpp && LLAMA_CUBLAS=1 make && pip install -r requirements.txt" + "!cd llama.cpp && GGML_CUDA=1 make && pip install -r requirements.txt" ] }, { "cell_type": "code", - "source": [ - "from huggingface_hub import snapshot_download" - ], + "execution_count": null, "metadata": { "id": "HF6yYzNZtd19" }, - "execution_count": 3, - "outputs": [] + "outputs": [], + "source": [ + "from huggingface_hub import snapshot_download" + ] }, { "cell_type": "code", - "source": [ - "model_name = \"google/gemma-2b-it\"" - ], + "execution_count": null, "metadata": { "id": "vw5v0tF_t6qX" }, - "execution_count": 4, - "outputs": [] + "outputs": [], + "source": [ + "model_name = \"google/gemma-2b-it\"" + ] }, { "cell_type": "code", - "source": [ - "methods = [\"q4_k_m\"]" - ], + "execution_count": null, "metadata": { "id": "aq5DxDOiubm-" }, - "execution_count": 5, - "outputs": [] + "outputs": [], + "source": [ + "methods = [\"q4_k_m\"]" + ] }, { "cell_type": "code", - "source": [ - "base_model = \"./orignal_model/\"" - ], + "execution_count": null, "metadata": { "id": "yR27LvA_uwYm" }, - "execution_count": 6, - "outputs": [] + "outputs": [], + "source": [ + "base_model = \"./original_model/\"" + ] }, { "cell_type": "code", - "source": [ - "quantized_path = \"./quantized_model/\"" - ], + "execution_count": null, "metadata": { "id": "x4Ciwal7u5jx" }, - "execution_count": 7, - "outputs": [] + "outputs": [], + "source": [ + "quantized_path = \"./quantized_model/\"" + ] }, { "cell_type": "code", - "source": [ - "snapshot_download(repo_id=model_name, local_dir=base_model, local_dir_use_symlinks=False)" - ], + "execution_count": null, "metadata": { "id": "mUBryZ1wvSF6" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "snapshot_download(repo_id=model_name, local_dir=base_model, local_dir_use_symlinks=False)" + ] }, { "cell_type": "code", - "source": [ - "orignal_model = quantized_path+\"/fp16.gguf\"" - ], + "execution_count": null, "metadata": { "id": "5oFgQ1Pgw7eD" }, - "execution_count": 9, - "outputs": [] + "outputs": [], + "source": [ + "original_model = quantized_path+\"/fp16.gguf\"" + ] }, { "cell_type": "code", - "source": [ - "!mkdir ./quantized_model/" - ], + "execution_count": null, "metadata": { "id": "K9ANvmOtxLEg" }, - "execution_count": 10, - "outputs": [] + "outputs": [], + "source": [ + "!mkdir ./quantized_model/" + ] }, { "cell_type": "code", - "source": [ - "!python llama.cpp/convert-hf-to-gguf.py ./orignal_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf" - ], + "execution_count": null, "metadata": { "id": "ue-353Tj2ZFv" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!python llama.cpp/convert_hf_to_gguf.py ./original_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf" + ] }, { "cell_type": "code", - "source": [ - "import os" - ], + "execution_count": null, "metadata": { "id": "c7XTrFZh25ww" }, - "execution_count": 14, - "outputs": [] + "outputs": [], + "source": [ + "import os" + ] }, { "cell_type": "code", - "source": [ - "for m in methods:\n", - " qtype = f\"{quantized_path}/{m.upper()}.gguf\"\n", - " os.system(\"./llama.cpp/quantize \"+quantized_path+\"/FP16.gguf \"+qtype+\" \"+m)" - ], + "execution_count": null, "metadata": { "id": "vGU2XSuH4W4I" }, - "execution_count": 15, - "outputs": [] + "outputs": [], + "source": [ + "for m in methods:\n", + " qtype = f\"{quantized_path}/{m.upper()}.gguf\"\n", + " os.system(\"./llama.cpp/llama-quantize \"+quantized_path+\"/FP16.gguf \"+qtype+\" \"+m)" + ] }, { "cell_type": "code", - "source": [ - "! ./llama.cpp/main -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r \"User: \" -f llama.cpp/prompts/chat-with-bob.txt" - ], + "execution_count": null, "metadata": { "id": "oML4taGB6I6l" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "! ./llama.cpp/llama-cli -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r \"User: \" -f llama.cpp/prompts/chat-with-bob.txt" + ] }, { "cell_type": "code", - "source": [ - "from huggingface_hub import HfApi, HfFolder, create_repo, upload_file" - ], + "execution_count": null, "metadata": { "id": "C3gXkTXLAetI" }, - "execution_count": 18, - "outputs": [] + "outputs": [], + "source": [ + "from huggingface_hub import HfApi, HfFolder, create_repo, upload_file" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kiMSy2PuAwG2" + }, + "outputs": [], "source": [ "model_path = \"./quantized_model/Q4_K_M.gguf\"\n", "repo_name = \"gemma-2b-it-GGUF-quantized\"\n", "repo_url = create_repo(repo_name, private=False)" - ], - "metadata": { - "id": "kiMSy2PuAwG2" - }, - "execution_count": 19, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "api = HfApi()" - ], + "execution_count": null, "metadata": { "id": "gOfEfRgbB47f" }, - "execution_count": 20, - "outputs": [] + "outputs": [], + "source": [ + "api = HfApi()" + ] }, { "cell_type": "code", - "source": [ - "api.upload_file(\n", - " path_or_fileobj=model_path,\n", - " path_in_repo=\"Q4_K_M.gguf\",\n", - " repo_id= \"yourusername/gemma-2b-it-GGUF-quantized\",\n", - " repo_type=\"model\",\n", - ")" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -232,59 +225,47 @@ "id": "DVd1C5IxB_eG", "outputId": "e82bac0b-8ff8-4753-f451-787114ee5d94" }, - "execution_count": 21, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Q4_K_M.gguf: 0%| | 0.00/1.63G [00:00/gemma-2b-it-GGUF-quantized\",\n", + " repo_type=\"model\",\n", + ")" ] } ], "metadata": { "accelerator": "GPU", "colab": { - "gpuType": "T4", - "provenance": [], "authorship_tag": "ABX9TyM0nMUdZgPDDhXk2W8dYuK5", - "include_colab_link": true + "gpuType": "T4", + "include_colab_link": true, + "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "142c94d6f7b54cdca12596df1892e09d": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -303,76 +284,10 @@ "layout": "IPY_MODEL_b1e1ba1085734f3a9fbd242c8a6080a9" } }, - "34f4c1e6cadf49b0ba0dcfcdd47b43bb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1b5c12306a3946148af361aaf9735645", - "placeholder": "​", - "style": "IPY_MODEL_ff4431bd2c2b4116832bb71354bb6d35", - "value": "Q4_K_M.gguf: 100%" - } - }, - "a978ccabd6234580a5a7af7bb6ae8555": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_65525d3928244f2c9c7a6119187094ba", - "max": 1630263008, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c61beea272824cd58303315783fe8da0", - "value": 1630263008 - } - }, - "99bdbf2d528048c1a2c6b4ce9e984603": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_aa9f16e7642e430589494c2c20c46359", - "placeholder": "​", - "style": "IPY_MODEL_4a94205967d94274bae21112ad03c8e0", - "value": " 1.63G/1.63G [00:34<00:00, 53.8MB/s]" - } - }, - "b1e1ba1085734f3a9fbd242c8a6080a9": { + "1b5c12306a3946148af361aaf9735645": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -421,10 +336,46 @@ "width": null } }, - "1b5c12306a3946148af361aaf9735645": { + "34f4c1e6cadf49b0ba0dcfcdd47b43bb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b5c12306a3946148af361aaf9735645", + "placeholder": "​", + "style": "IPY_MODEL_ff4431bd2c2b4116832bb71354bb6d35", + "value": "Q4_K_M.gguf: 100%" + } + }, + "4a94205967d94274bae21112ad03c8e0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "65525d3928244f2c9c7a6119187094ba": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -473,25 +424,55 @@ "width": null } }, - "ff4431bd2c2b4116832bb71354bb6d35": { + "99bdbf2d528048c1a2c6b4ce9e984603": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aa9f16e7642e430589494c2c20c46359", + "placeholder": "​", + "style": "IPY_MODEL_4a94205967d94274bae21112ad03c8e0", + "value": " 1.63G/1.63G [00:34<00:00, 53.8MB/s]" } }, - "65525d3928244f2c9c7a6119187094ba": { + "a978ccabd6234580a5a7af7bb6ae8555": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_65525d3928244f2c9c7a6119187094ba", + "max": 1630263008, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c61beea272824cd58303315783fe8da0", + "value": 1630263008 + } + }, + "aa9f16e7642e430589494c2c20c46359": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -540,26 +521,10 @@ "width": null } }, - "c61beea272824cd58303315783fe8da0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "aa9f16e7642e430589494c2c20c46359": { + "b1e1ba1085734f3a9fbd242c8a6080a9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -608,10 +573,26 @@ "width": null } }, - "4a94205967d94274bae21112ad03c8e0": { + "c61beea272824cd58303315783fe8da0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ff4431bd2c2b4116832bb71354bb6d35": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -628,4 +609,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +}