diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8af9042
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,174 @@
+###############################################################################
+# Repo specific .gitignore
+###############################################################################
+llama.cpp/
+original_model/
+quantized_model/
+
+
+###############################################################################
+# Standard .gitignore file for Python projects
+###############################################################################
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/QuantizeLLMs.ipynb b/QuantizeLLMs.ipynb
index a5acf46..b7bd45f 100644
--- a/QuantizeLLMs.ipynb
+++ b/QuantizeLLMs.ipynb
@@ -3,8 +3,8 @@
{
"cell_type": "markdown",
"metadata": {
- "id": "view-in-github",
- "colab_type": "text"
+ "colab_type": "text",
+ "id": "view-in-github"
},
"source": [
"
"
@@ -18,7 +18,7 @@
},
"outputs": [],
"source": [
- "!!git clone https://github.com/ggerganov/llama.cpp"
+ "!!git clone --branch b3978 https://github.com/ggerganov/llama.cpp.git"
]
},
{
@@ -29,188 +29,181 @@
},
"outputs": [],
"source": [
- "!cd llama.cpp && LLAMA_CUBLAS=1 make && pip install -r requirements.txt"
+ "!cd llama.cpp && GGML_CUDA=1 make && pip install -r requirements.txt"
]
},
{
"cell_type": "code",
- "source": [
- "from huggingface_hub import snapshot_download"
- ],
+ "execution_count": null,
"metadata": {
"id": "HF6yYzNZtd19"
},
- "execution_count": 3,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "from huggingface_hub import snapshot_download"
+ ]
},
{
"cell_type": "code",
- "source": [
- "model_name = \"google/gemma-2b-it\""
- ],
+ "execution_count": null,
"metadata": {
"id": "vw5v0tF_t6qX"
},
- "execution_count": 4,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "model_name = \"google/gemma-2b-it\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "methods = [\"q4_k_m\"]"
- ],
+ "execution_count": null,
"metadata": {
"id": "aq5DxDOiubm-"
},
- "execution_count": 5,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "methods = [\"q4_k_m\"]"
+ ]
},
{
"cell_type": "code",
- "source": [
- "base_model = \"./orignal_model/\""
- ],
+ "execution_count": null,
"metadata": {
"id": "yR27LvA_uwYm"
},
- "execution_count": 6,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "base_model = \"./original_model/\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "quantized_path = \"./quantized_model/\""
- ],
+ "execution_count": null,
"metadata": {
"id": "x4Ciwal7u5jx"
},
- "execution_count": 7,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "quantized_path = \"./quantized_model/\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "snapshot_download(repo_id=model_name, local_dir=base_model, local_dir_use_symlinks=False)"
- ],
+ "execution_count": null,
"metadata": {
"id": "mUBryZ1wvSF6"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "snapshot_download(repo_id=model_name, local_dir=base_model, local_dir_use_symlinks=False)"
+ ]
},
{
"cell_type": "code",
- "source": [
- "orignal_model = quantized_path+\"/fp16.gguf\""
- ],
+ "execution_count": null,
"metadata": {
"id": "5oFgQ1Pgw7eD"
},
- "execution_count": 9,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "original_model = quantized_path+\"/fp16.gguf\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "!mkdir ./quantized_model/"
- ],
+ "execution_count": null,
"metadata": {
"id": "K9ANvmOtxLEg"
},
- "execution_count": 10,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "!mkdir ./quantized_model/"
+ ]
},
{
"cell_type": "code",
- "source": [
- "!python llama.cpp/convert-hf-to-gguf.py ./orignal_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf"
- ],
+ "execution_count": null,
"metadata": {
"id": "ue-353Tj2ZFv"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "!python llama.cpp/convert_hf_to_gguf.py ./original_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf"
+ ]
},
{
"cell_type": "code",
- "source": [
- "import os"
- ],
+ "execution_count": null,
"metadata": {
"id": "c7XTrFZh25ww"
},
- "execution_count": 14,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "import os"
+ ]
},
{
"cell_type": "code",
- "source": [
- "for m in methods:\n",
- " qtype = f\"{quantized_path}/{m.upper()}.gguf\"\n",
- " os.system(\"./llama.cpp/quantize \"+quantized_path+\"/FP16.gguf \"+qtype+\" \"+m)"
- ],
+ "execution_count": null,
"metadata": {
"id": "vGU2XSuH4W4I"
},
- "execution_count": 15,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "for m in methods:\n",
+ " qtype = f\"{quantized_path}/{m.upper()}.gguf\"\n",
+ " os.system(\"./llama.cpp/llama-quantize \"+quantized_path+\"/FP16.gguf \"+qtype+\" \"+m)"
+ ]
},
{
"cell_type": "code",
- "source": [
- "! ./llama.cpp/main -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r \"User: \" -f llama.cpp/prompts/chat-with-bob.txt"
- ],
+ "execution_count": null,
"metadata": {
"id": "oML4taGB6I6l"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "! ./llama.cpp/llama-cli -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r \"User: \" -f llama.cpp/prompts/chat-with-bob.txt"
+ ]
},
{
"cell_type": "code",
- "source": [
- "from huggingface_hub import HfApi, HfFolder, create_repo, upload_file"
- ],
+ "execution_count": null,
"metadata": {
"id": "C3gXkTXLAetI"
},
- "execution_count": 18,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "from huggingface_hub import HfApi, HfFolder, create_repo, upload_file"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "kiMSy2PuAwG2"
+ },
+ "outputs": [],
"source": [
"model_path = \"./quantized_model/Q4_K_M.gguf\"\n",
"repo_name = \"gemma-2b-it-GGUF-quantized\"\n",
"repo_url = create_repo(repo_name, private=False)"
- ],
- "metadata": {
- "id": "kiMSy2PuAwG2"
- },
- "execution_count": 19,
- "outputs": []
+ ]
},
{
"cell_type": "code",
- "source": [
- "api = HfApi()"
- ],
+ "execution_count": null,
"metadata": {
"id": "gOfEfRgbB47f"
},
- "execution_count": 20,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "api = HfApi()"
+ ]
},
{
"cell_type": "code",
- "source": [
- "api.upload_file(\n",
- " path_or_fileobj=model_path,\n",
- " path_in_repo=\"Q4_K_M.gguf\",\n",
- " repo_id= \"yourusername/gemma-2b-it-GGUF-quantized\",\n",
- " repo_type=\"model\",\n",
- ")"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -232,59 +225,47 @@
"id": "DVd1C5IxB_eG",
"outputId": "e82bac0b-8ff8-4753-f451-787114ee5d94"
},
- "execution_count": 21,
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Q4_K_M.gguf: 0%| | 0.00/1.63G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "142c94d6f7b54cdca12596df1892e09d"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "CommitInfo(commit_url='https://huggingface.co/mickymultani/gemma-2b-it-GGUF-quantized/commit/92903f66bb4e303c20ff1e6262f77370172cd0ac', commit_message='Upload Q4_K_M.gguf with huggingface_hub', commit_description='', oid='92903f66bb4e303c20ff1e6262f77370172cd0ac', pr_url=None, pr_revision=None, pr_num=None)"
- ],
- "application/vnd.google.colaboratory.intrinsic+json": {
- "type": "string"
- }
- },
- "metadata": {},
- "execution_count": 21
- }
+ "outputs": [],
+ "source": [
+ "api.upload_file(\n",
+ " path_or_fileobj=model_path,\n",
+ " path_in_repo=\"Q4_K_M.gguf\",\n",
+ " repo_id= \"/gemma-2b-it-GGUF-quantized\",\n",
+ " repo_type=\"model\",\n",
+ ")"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
- "gpuType": "T4",
- "provenance": [],
"authorship_tag": "ABX9TyM0nMUdZgPDDhXk2W8dYuK5",
- "include_colab_link": true
+ "gpuType": "T4",
+ "include_colab_link": true,
+ "provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
- "name": "python"
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"142c94d6f7b54cdca12596df1892e09d": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -303,76 +284,10 @@
"layout": "IPY_MODEL_b1e1ba1085734f3a9fbd242c8a6080a9"
}
},
- "34f4c1e6cadf49b0ba0dcfcdd47b43bb": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1b5c12306a3946148af361aaf9735645",
- "placeholder": "",
- "style": "IPY_MODEL_ff4431bd2c2b4116832bb71354bb6d35",
- "value": "Q4_K_M.gguf: 100%"
- }
- },
- "a978ccabd6234580a5a7af7bb6ae8555": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_65525d3928244f2c9c7a6119187094ba",
- "max": 1630263008,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_c61beea272824cd58303315783fe8da0",
- "value": 1630263008
- }
- },
- "99bdbf2d528048c1a2c6b4ce9e984603": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_aa9f16e7642e430589494c2c20c46359",
- "placeholder": "",
- "style": "IPY_MODEL_4a94205967d94274bae21112ad03c8e0",
- "value": " 1.63G/1.63G [00:34<00:00, 53.8MB/s]"
- }
- },
- "b1e1ba1085734f3a9fbd242c8a6080a9": {
+ "1b5c12306a3946148af361aaf9735645": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -421,10 +336,46 @@
"width": null
}
},
- "1b5c12306a3946148af361aaf9735645": {
+ "34f4c1e6cadf49b0ba0dcfcdd47b43bb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1b5c12306a3946148af361aaf9735645",
+ "placeholder": "",
+ "style": "IPY_MODEL_ff4431bd2c2b4116832bb71354bb6d35",
+ "value": "Q4_K_M.gguf: 100%"
+ }
+ },
+ "4a94205967d94274bae21112ad03c8e0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "65525d3928244f2c9c7a6119187094ba": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -473,25 +424,55 @@
"width": null
}
},
- "ff4431bd2c2b4116832bb71354bb6d35": {
+ "99bdbf2d528048c1a2c6b4ce9e984603": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_aa9f16e7642e430589494c2c20c46359",
+ "placeholder": "",
+ "style": "IPY_MODEL_4a94205967d94274bae21112ad03c8e0",
+ "value": " 1.63G/1.63G [00:34<00:00, 53.8MB/s]"
}
},
- "65525d3928244f2c9c7a6119187094ba": {
+ "a978ccabd6234580a5a7af7bb6ae8555": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_65525d3928244f2c9c7a6119187094ba",
+ "max": 1630263008,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c61beea272824cd58303315783fe8da0",
+ "value": 1630263008
+ }
+ },
+ "aa9f16e7642e430589494c2c20c46359": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -540,26 +521,10 @@
"width": null
}
},
- "c61beea272824cd58303315783fe8da0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "aa9f16e7642e430589494c2c20c46359": {
+ "b1e1ba1085734f3a9fbd242c8a6080a9": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -608,10 +573,26 @@
"width": null
}
},
- "4a94205967d94274bae21112ad03c8e0": {
+ "c61beea272824cd58303315783fe8da0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "ff4431bd2c2b4116832bb71354bb6d35": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -628,4 +609,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
-}
\ No newline at end of file
+}