diff --git a/QuantizeLLMs.ipynb b/QuantizeLLMs.ipynb
index a5acf46..1d531b8 100644
--- a/QuantizeLLMs.ipynb
+++ b/QuantizeLLMs.ipynb
@@ -3,11 +3,11 @@
{
"cell_type": "markdown",
"metadata": {
- "id": "view-in-github",
- "colab_type": "text"
+ "colab_type": "text",
+ "id": "view-in-github"
},
"source": [
- "
"
+ "
"
]
},
{
@@ -29,188 +29,190 @@
},
"outputs": [],
"source": [
- "!cd llama.cpp && LLAMA_CUBLAS=1 make && pip install -r requirements.txt"
+ "!cd llama.cpp && GGML_CUDA=1 cmake -B build -DGGML_CUDA=ON && cmake --build build --config Release"
]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
- "from huggingface_hub import snapshot_download"
- ],
+ "!cd llama.cpp && pip install -r requirements.txt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {
"id": "HF6yYzNZtd19"
},
- "execution_count": 3,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "from huggingface_hub import snapshot_download"
+ ]
},
{
"cell_type": "code",
- "source": [
- "model_name = \"google/gemma-2b-it\""
- ],
+ "execution_count": null,
"metadata": {
"id": "vw5v0tF_t6qX"
},
- "execution_count": 4,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "model_name = \"google/gemma-2b-it\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "methods = [\"q4_k_m\"]"
- ],
+ "execution_count": null,
"metadata": {
"id": "aq5DxDOiubm-"
},
- "execution_count": 5,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "methods = [\"q4_k_m\"]"
+ ]
},
{
"cell_type": "code",
- "source": [
- "base_model = \"./orignal_model/\""
- ],
+ "execution_count": null,
"metadata": {
"id": "yR27LvA_uwYm"
},
- "execution_count": 6,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "base_model = \"./orignal_model/\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "quantized_path = \"./quantized_model/\""
- ],
+ "execution_count": null,
"metadata": {
"id": "x4Ciwal7u5jx"
},
- "execution_count": 7,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "quantized_path = \"./quantized_model/\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "snapshot_download(repo_id=model_name, local_dir=base_model, local_dir_use_symlinks=False)"
- ],
+ "execution_count": null,
"metadata": {
"id": "mUBryZ1wvSF6"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "snapshot_download(repo_id=model_name, local_dir=base_model)"
+ ]
},
{
"cell_type": "code",
- "source": [
- "orignal_model = quantized_path+\"/fp16.gguf\""
- ],
+ "execution_count": null,
"metadata": {
"id": "5oFgQ1Pgw7eD"
},
- "execution_count": 9,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "orignal_model = quantized_path+\"/fp16.gguf\""
+ ]
},
{
"cell_type": "code",
- "source": [
- "!mkdir ./quantized_model/"
- ],
+ "execution_count": null,
"metadata": {
"id": "K9ANvmOtxLEg"
},
- "execution_count": 10,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "!mkdir ./quantized_model/"
+ ]
},
{
"cell_type": "code",
- "source": [
- "!python llama.cpp/convert-hf-to-gguf.py ./orignal_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf"
- ],
+ "execution_count": null,
"metadata": {
"id": "ue-353Tj2ZFv"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "!python llama.cpp/convert_hf_to_gguf.py ./orignal_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf"
+ ]
},
{
"cell_type": "code",
- "source": [
- "import os"
- ],
+ "execution_count": null,
"metadata": {
"id": "c7XTrFZh25ww"
},
- "execution_count": 14,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "import os"
+ ]
},
{
"cell_type": "code",
- "source": [
- "for m in methods:\n",
- " qtype = f\"{quantized_path}/{m.upper()}.gguf\"\n",
- " os.system(\"./llama.cpp/quantize \"+quantized_path+\"/FP16.gguf \"+qtype+\" \"+m)"
- ],
+ "execution_count": null,
"metadata": {
"id": "vGU2XSuH4W4I"
},
- "execution_count": 15,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "for m in methods:\n",
+ " qtype = f\"{quantized_path}/{m.upper()}.gguf\"\n",
+ " os.system(\"./llama.cpp/build/bin/llama-quantize \"+quantized_path+\"/FP16.gguf \"+qtype+\" \"+m)"
+ ]
},
{
"cell_type": "code",
- "source": [
- "! ./llama.cpp/main -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r \"User: \" -f llama.cpp/prompts/chat-with-bob.txt"
- ],
+ "execution_count": null,
"metadata": {
"id": "oML4taGB6I6l"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "! ./llama.cpp/build/bin/llama-cli -m ./quantized_model/Q4_K_M.gguf -n 90 --repeat_penalty 1.0 --color -i -r \"User: \" -f llama.cpp/prompts/chat-with-bob.txt"
+ ]
},
{
"cell_type": "code",
- "source": [
- "from huggingface_hub import HfApi, HfFolder, create_repo, upload_file"
- ],
+ "execution_count": null,
"metadata": {
"id": "C3gXkTXLAetI"
},
- "execution_count": 18,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "from huggingface_hub import HfApi, HfFolder, create_repo, upload_file"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "kiMSy2PuAwG2"
+ },
+ "outputs": [],
"source": [
"model_path = \"./quantized_model/Q4_K_M.gguf\"\n",
"repo_name = \"gemma-2b-it-GGUF-quantized\"\n",
"repo_url = create_repo(repo_name, private=False)"
- ],
- "metadata": {
- "id": "kiMSy2PuAwG2"
- },
- "execution_count": 19,
- "outputs": []
+ ]
},
{
"cell_type": "code",
- "source": [
- "api = HfApi()"
- ],
+ "execution_count": null,
"metadata": {
"id": "gOfEfRgbB47f"
},
- "execution_count": 20,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "api = HfApi()"
+ ]
},
{
"cell_type": "code",
- "source": [
- "api.upload_file(\n",
- " path_or_fileobj=model_path,\n",
- " path_in_repo=\"Q4_K_M.gguf\",\n",
- " repo_id= \"yourusername/gemma-2b-it-GGUF-quantized\",\n",
- " repo_type=\"model\",\n",
- ")"
- ],
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -232,400 +234,43 @@
"id": "DVd1C5IxB_eG",
"outputId": "e82bac0b-8ff8-4753-f451-787114ee5d94"
},
- "execution_count": 21,
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Q4_K_M.gguf: 0%| | 0.00/1.63G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "142c94d6f7b54cdca12596df1892e09d"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "CommitInfo(commit_url='https://huggingface.co/mickymultani/gemma-2b-it-GGUF-quantized/commit/92903f66bb4e303c20ff1e6262f77370172cd0ac', commit_message='Upload Q4_K_M.gguf with huggingface_hub', commit_description='', oid='92903f66bb4e303c20ff1e6262f77370172cd0ac', pr_url=None, pr_revision=None, pr_num=None)"
- ],
- "application/vnd.google.colaboratory.intrinsic+json": {
- "type": "string"
- }
- },
- "metadata": {},
- "execution_count": 21
- }
+ "outputs": [],
+ "source": [
+ "api.upload_file(\n",
+ " path_or_fileobj=model_path,\n",
+ " path_in_repo=\"Q4_K_M.gguf\",\n",
+ " repo_id= \"2stacks/gemma-2b-it-GGUF-quantized\",\n",
+ " repo_type=\"model\",\n",
+ ")"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
- "gpuType": "T4",
- "provenance": [],
"authorship_tag": "ABX9TyM0nMUdZgPDDhXk2W8dYuK5",
- "include_colab_link": true
+ "gpuType": "T4",
+ "include_colab_link": true,
+ "provenance": []
},
"kernelspec": {
"display_name": "Python 3",
+ "language": "python",
"name": "python3"
},
"language_info": {
- "name": "python"
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "142c94d6f7b54cdca12596df1892e09d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_34f4c1e6cadf49b0ba0dcfcdd47b43bb",
- "IPY_MODEL_a978ccabd6234580a5a7af7bb6ae8555",
- "IPY_MODEL_99bdbf2d528048c1a2c6b4ce9e984603"
- ],
- "layout": "IPY_MODEL_b1e1ba1085734f3a9fbd242c8a6080a9"
- }
- },
- "34f4c1e6cadf49b0ba0dcfcdd47b43bb": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1b5c12306a3946148af361aaf9735645",
- "placeholder": "",
- "style": "IPY_MODEL_ff4431bd2c2b4116832bb71354bb6d35",
- "value": "Q4_K_M.gguf: 100%"
- }
- },
- "a978ccabd6234580a5a7af7bb6ae8555": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_65525d3928244f2c9c7a6119187094ba",
- "max": 1630263008,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_c61beea272824cd58303315783fe8da0",
- "value": 1630263008
- }
- },
- "99bdbf2d528048c1a2c6b4ce9e984603": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_aa9f16e7642e430589494c2c20c46359",
- "placeholder": "",
- "style": "IPY_MODEL_4a94205967d94274bae21112ad03c8e0",
- "value": " 1.63G/1.63G [00:34<00:00, 53.8MB/s]"
- }
- },
- "b1e1ba1085734f3a9fbd242c8a6080a9": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1b5c12306a3946148af361aaf9735645": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ff4431bd2c2b4116832bb71354bb6d35": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "65525d3928244f2c9c7a6119187094ba": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c61beea272824cd58303315783fe8da0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "aa9f16e7642e430589494c2c20c46359": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4a94205967d94274bae21112ad03c8e0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.11"
}
},
"nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file
+ "nbformat_minor": 4
+}