mirror of
https://github.com/mlabonne/llm-course.git
synced 2026-02-12 14:32:46 +00:00
remove local notebooks
This commit is contained in:
@@ -1,252 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"gpuType": "T4",
|
||||
"authorship_tag": "ABX9TyOS2QEuJ1BDI/3IFsLsFIZo",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "view-in-github",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/4_bit_LLM_Quantization_with_GPTQ.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# 4-bit LLM Quantization with GPTQ\n",
|
||||
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
|
||||
"\n",
|
||||
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
|
||||
"\n",
|
||||
"Companion notebook to execute the code from the following article: https://mlabonne.github.io/blog/4bit_quantization/"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "yezrHxYvg_wR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "BhufqqQAaz6e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import random\n",
|
||||
"\n",
|
||||
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"import torch\n",
|
||||
"from transformers import AutoTokenizer\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define base model and output directory\n",
|
||||
"model_id = \"gpt2\"\n",
|
||||
"out_dir = model_id + \"-GPTQ\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "dg8NyBL0ZNyw"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Load quantize config, model and tokenizer\n",
|
||||
"quantize_config = BaseQuantizeConfig(\n",
|
||||
" bits=4,\n",
|
||||
" group_size=128,\n",
|
||||
" damp_percent=0.01,\n",
|
||||
" desc_act=False,\n",
|
||||
")\n",
|
||||
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(model_id)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "C9352jN0ZP6I"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Load data and tokenize examples\n",
|
||||
"n_samples = 1024\n",
|
||||
"data = load_dataset(\"allenai/c4\", data_files=\"en/c4-train.00001-of-01024.json.gz\", split=f\"train[:{n_samples*5}]\")\n",
|
||||
"tokenized_data = tokenizer(\"\\n\\n\".join(data['text']), return_tensors='pt')\n",
|
||||
"\n",
|
||||
"# Format tokenized examples\n",
|
||||
"examples_ids = []\n",
|
||||
"for _ in range(n_samples):\n",
|
||||
" i = random.randint(0, tokenized_data.input_ids.shape[1] - tokenizer.model_max_length - 1)\n",
|
||||
" j = i + tokenizer.model_max_length\n",
|
||||
" input_ids = tokenized_data.input_ids[:, i:j]\n",
|
||||
" attention_mask = torch.ones_like(input_ids)\n",
|
||||
" examples_ids.append({'input_ids': input_ids, 'attention_mask': attention_mask})"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "6wuBLe6aZSe-",
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"outputId": "e4ebd71a-2854-4347-cebe-08cf040d1eb6"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"WARNING:datasets.builder:Found cached dataset json (/root/.cache/huggingface/datasets/allenai___json/allenai--c4-6e494e9c0ee1404e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)\n",
|
||||
"Token indices sequence length is longer than the specified maximum sequence length for this model (2441065 > 1024). Running this sequence through the model will result in indexing errors\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"# Quantize with GPTQ\n",
|
||||
"model.quantize(\n",
|
||||
" examples_ids,\n",
|
||||
" batch_size=1,\n",
|
||||
" use_triton=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Save model and tokenizer\n",
|
||||
"model.save_quantized(out_dir, use_safetensors=True)\n",
|
||||
"tokenizer.save_pretrained(out_dir)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "ETsG2iYrXaUg",
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"outputId": "e48b825e-0ebc-4a73-dbfd-b5571cafd24e"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"CPU times: user 4min 35s, sys: 3.49 s, total: 4min 39s\n",
|
||||
"Wall time: 5min 8s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"('gpt2-GPTQ/tokenizer_config.json',\n",
|
||||
" 'gpt2-GPTQ/special_tokens_map.json',\n",
|
||||
" 'gpt2-GPTQ/vocab.json',\n",
|
||||
" 'gpt2-GPTQ/merges.txt',\n",
|
||||
" 'gpt2-GPTQ/added_tokens.json',\n",
|
||||
" 'gpt2-GPTQ/tokenizer.json')"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
|
||||
"\n",
|
||||
"# Reload model and tokenizer\n",
|
||||
"model = AutoGPTQForCausalLM.from_quantized(\n",
|
||||
" out_dir,\n",
|
||||
" device=device,\n",
|
||||
" use_triton=True,\n",
|
||||
" use_safetensors=True,\n",
|
||||
")\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(out_dir)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "nktu1FsdZ9sd",
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"outputId": "9943c829-1b58-474a-f245-6aefa09d81dc"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"WARNING:accelerate.utils.modeling:The safetensors archive passed at gpt2-GPTQ/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.\n",
|
||||
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused attention module yet, will skip inject fused attention.\n",
|
||||
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused mlp module yet, will skip inject fused mlp.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from transformers import pipeline\n",
|
||||
"\n",
|
||||
"generator = pipeline('text-generation', model=model, tokenizer=tokenizer)\n",
|
||||
"result = generator(\"I have a dream\", do_sample=True, max_length=50)[0]['generated_text']\n",
|
||||
"print(result)"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "cRhIGrXdiFdt",
|
||||
"outputId": "6dca2078-6f01-44da-9895-3a03bdfb4b5b"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"The model 'GPT2GPTQForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'RwkvForCausalLM', 'Speech2Text2ForCausalLM', 'TransfoXLLMHeadModel', 'TrOCRForCausalLM', 'XGLMForCausalLM', 'XLMWithLMHeadModel', 'XLMProphetNetForCausalLM', 'XLMRobertaForCausalLM', 'XLMRobertaXLForCausalLM', 'XLNetLMHeadModel', 'XmodForCausalLM'].\n",
|
||||
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"I have a dream,\" she told CNN last week. \"I have this dream of helping my mother find her own. But, to tell that for the first time, now that I'm seeing my mother now, just knowing how wonderful it is that\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,757 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"machine_shape": "hm",
|
||||
"gpuType": "A100",
|
||||
"authorship_tag": "ABX9TyNuIN7/ICiXCX5xELzN1Y3R",
|
||||
"include_colab_link": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"widgets": {
|
||||
"application/vnd.jupyter.widget-state+json": {
|
||||
"22773c721a7c4221a9c14cd388461d4c": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "HBoxModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "HBoxModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "HBoxView",
|
||||
"box_style": "",
|
||||
"children": [
|
||||
"IPY_MODEL_6b54841f5de1482694c360095dae3039",
|
||||
"IPY_MODEL_448ccbc85e624ec3b3e71931a7ee4ff6",
|
||||
"IPY_MODEL_173769f6f465485f8848a11bf269850b"
|
||||
],
|
||||
"layout": "IPY_MODEL_60978b9b4e8348f0a71ce3e35c73bcff"
|
||||
}
|
||||
},
|
||||
"6b54841f5de1482694c360095dae3039": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "HTMLModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "HTMLModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "HTMLView",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_6a38dcbaf4674b448329ac0a16587d2a",
|
||||
"placeholder": "",
|
||||
"style": "IPY_MODEL_7eaeada2158e493189449af91f643553",
|
||||
"value": "Loading checkpoint shards: 100%"
|
||||
}
|
||||
},
|
||||
"448ccbc85e624ec3b3e71931a7ee4ff6": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "FloatProgressModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "FloatProgressModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "ProgressView",
|
||||
"bar_style": "success",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_6e32854952b340008edca0139d3471d6",
|
||||
"max": 3,
|
||||
"min": 0,
|
||||
"orientation": "horizontal",
|
||||
"style": "IPY_MODEL_db6d7cfcdade4b4baa213a5d0abc07d7",
|
||||
"value": 3
|
||||
}
|
||||
},
|
||||
"173769f6f465485f8848a11bf269850b": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "HTMLModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "HTMLModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "HTMLView",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_9083029642744c43b7705532cbe0cf79",
|
||||
"placeholder": "",
|
||||
"style": "IPY_MODEL_d028a98caa13425b907ceb513119006e",
|
||||
"value": " 3/3 [00:11<00:00, 2.89s/it]"
|
||||
}
|
||||
},
|
||||
"60978b9b4e8348f0a71ce3e35c73bcff": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"6a38dcbaf4674b448329ac0a16587d2a": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"7eaeada2158e493189449af91f643553": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "DescriptionStyleModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "DescriptionStyleModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "StyleView",
|
||||
"description_width": ""
|
||||
}
|
||||
},
|
||||
"6e32854952b340008edca0139d3471d6": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"db6d7cfcdade4b4baa213a5d0abc07d7": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "ProgressStyleModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "ProgressStyleModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "StyleView",
|
||||
"bar_color": null,
|
||||
"description_width": ""
|
||||
}
|
||||
},
|
||||
"9083029642744c43b7705532cbe0cf79": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"d028a98caa13425b907ceb513119006e": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "DescriptionStyleModel",
|
||||
"model_module_version": "1.5.0",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "DescriptionStyleModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "StyleView",
|
||||
"description_width": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "view-in-github",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Fine-tune a Mistral-7b model with DPO\n",
|
||||
"\n",
|
||||
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
|
||||
"\n",
|
||||
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne)."
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Pa8905-YsHAn"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "_zIBL8IssExG"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -q datasets trl peft bitsandbytes sentencepiece wandb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"import transformers\n",
|
||||
"from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n",
|
||||
"from trl import DPOTrainer\n",
|
||||
"import bitsandbytes as bnb\n",
|
||||
"from google.colab import userdata\n",
|
||||
"import wandb\n",
|
||||
"\n",
|
||||
"# Defined in the secrets tab in Google Colab\n",
|
||||
"hf_token = userdata.get('huggingface')\n",
|
||||
"wb_token = userdata.get('wandb')\n",
|
||||
"wandb.login(key=wb_token)\n",
|
||||
"\n",
|
||||
"model_name = \"teknium/OpenHermes-2.5-Mistral-7B\"\n",
|
||||
"new_model = \"NeuralHermes-2.5-Mistral-7B\""
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "YpdkZsMNylvp",
|
||||
"outputId": "6c2df234-1ce7-4cd2-a7e3-567e7536319f"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n",
|
||||
" warnings.warn(\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmlabonne\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
|
||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Format dataset"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "d8CvUgROUDw-"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def chatml_format(example):\n",
|
||||
" # Format system\n",
|
||||
" if len(example['system']) > 0:\n",
|
||||
" message = {\"role\": \"system\", \"content\": example['system']}\n",
|
||||
" system = tokenizer.apply_chat_template([message], tokenize=False)\n",
|
||||
" else:\n",
|
||||
" system = \"\"\n",
|
||||
"\n",
|
||||
" # Format instruction\n",
|
||||
" message = {\"role\": \"user\", \"content\": example['question']}\n",
|
||||
" prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)\n",
|
||||
"\n",
|
||||
" # Format chosen answer\n",
|
||||
" chosen = example['chosen'] + \"<|im_end|>\\n\"\n",
|
||||
"\n",
|
||||
" # Format rejected answer\n",
|
||||
" rejected = example['rejected'] + \"<|im_end|>\\n\"\n",
|
||||
"\n",
|
||||
" return {\n",
|
||||
" \"prompt\": system + prompt,\n",
|
||||
" \"chosen\": chosen,\n",
|
||||
" \"rejected\": rejected,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"# Load dataset\n",
|
||||
"dataset = load_dataset(\"Intel/orca_dpo_pairs\")['train']\n",
|
||||
"\n",
|
||||
"# Save columns\n",
|
||||
"original_columns = dataset.column_names\n",
|
||||
"\n",
|
||||
"# Tokenizer\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
||||
"tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
"tokenizer.padding_side = \"left\"\n",
|
||||
"\n",
|
||||
"# Format dataset\n",
|
||||
"dataset = dataset.map(\n",
|
||||
" chatml_format,\n",
|
||||
" remove_columns=original_columns\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Print sample\n",
|
||||
"dataset[1]"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "MCD77GZ60DOT",
|
||||
"outputId": "c7c6773c-5545-4fee-bfa3-6fa6d69c0f3f"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'prompt': '<|im_start|>system\\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.<|im_end|>\\n<|im_start|>user\\nGenerate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One<|im_end|>\\n<|im_start|>assistant\\n',\n",
|
||||
" 'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.<|im_end|>\\n',\n",
|
||||
" 'rejected': ' Sure! Here\\'s a sentence that describes all the data you provided:\\n\\n\"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes.\"<|im_end|>\\n'}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 3
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Train model with DPO"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "DeT5eUK_UJgK"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# LoRA configuration\n",
|
||||
"peft_config = LoraConfig(\n",
|
||||
" r=16,\n",
|
||||
" lora_alpha=16,\n",
|
||||
" lora_dropout=0.05,\n",
|
||||
" bias=\"none\",\n",
|
||||
" task_type=\"CAUSAL_LM\",\n",
|
||||
" target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Model to fine-tune\n",
|
||||
"model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" model_name,\n",
|
||||
" torch_dtype=torch.float16,\n",
|
||||
" load_in_4bit=True\n",
|
||||
")\n",
|
||||
"model.config.use_cache = False\n",
|
||||
"\n",
|
||||
"# Training arguments\n",
|
||||
"training_args = TrainingArguments(\n",
|
||||
" per_device_train_batch_size=4,\n",
|
||||
" gradient_accumulation_steps=4,\n",
|
||||
" gradient_checkpointing=True,\n",
|
||||
" learning_rate=5e-5,\n",
|
||||
" lr_scheduler_type=\"cosine\",\n",
|
||||
" max_steps=200,\n",
|
||||
" save_strategy=\"no\",\n",
|
||||
" logging_steps=1,\n",
|
||||
" output_dir=new_model,\n",
|
||||
" optim=\"paged_adamw_32bit\",\n",
|
||||
" warmup_steps=100,\n",
|
||||
" bf16=True,\n",
|
||||
" report_to=\"wandb\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create DPO trainer\n",
|
||||
"dpo_trainer = DPOTrainer(\n",
|
||||
" model,\n",
|
||||
" args=training_args,\n",
|
||||
" train_dataset=dataset,\n",
|
||||
" tokenizer=tokenizer,\n",
|
||||
" peft_config=peft_config,\n",
|
||||
" beta=0.1,\n",
|
||||
" max_prompt_length=1024,\n",
|
||||
" max_length=1536,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Fine-tune model with DPO\n",
|
||||
"dpo_trainer.train()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "rKPILNOLR-aK"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Upload model"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "3LdhPpcrUM3H"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Save artifacts\n",
|
||||
"dpo_trainer.model.save_pretrained(\"final_checkpoint\")\n",
|
||||
"tokenizer.save_pretrained(\"final_checkpoint\")\n",
|
||||
"\n",
|
||||
"# Flush memory\n",
|
||||
"del dpo_trainer, model\n",
|
||||
"gc.collect()\n",
|
||||
"torch.cuda.empty_cache()\n",
|
||||
"\n",
|
||||
"# Reload model in FP16 (instead of NF4)\n",
|
||||
"base_model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" model_name,\n",
|
||||
" return_dict=True,\n",
|
||||
" torch_dtype=torch.float16,\n",
|
||||
")\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
||||
"\n",
|
||||
"# Merge base model with the adapter\n",
|
||||
"model = PeftModel.from_pretrained(base_model, \"final_checkpoint\")\n",
|
||||
"model = model.merge_and_unload()\n",
|
||||
"\n",
|
||||
"# Save model and tokenizer\n",
|
||||
"model.save_pretrained(new_model)\n",
|
||||
"tokenizer.save_pretrained(new_model)\n",
|
||||
"\n",
|
||||
"# Push them to the HF Hub\n",
|
||||
"model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n",
|
||||
"tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "h7cIvxcTfBC4"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Inference"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "G6EFsmS4UOgV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Format prompt\n",
|
||||
"message = [\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant chatbot.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is a Large Language Model?\"}\n",
|
||||
"]\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(new_model)\n",
|
||||
"prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)\n",
|
||||
"\n",
|
||||
"# Create pipeline\n",
|
||||
"pipeline = transformers.pipeline(\n",
|
||||
" \"text-generation\",\n",
|
||||
" model=new_model,\n",
|
||||
" tokenizer=tokenizer\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Generate text\n",
|
||||
"sequences = pipeline(\n",
|
||||
" prompt,\n",
|
||||
" do_sample=True,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_p=0.9,\n",
|
||||
" num_return_sequences=1,\n",
|
||||
" max_length=200,\n",
|
||||
")\n",
|
||||
"print(sequences[0]['generated_text'])"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 251,
|
||||
"referenced_widgets": [
|
||||
"22773c721a7c4221a9c14cd388461d4c",
|
||||
"6b54841f5de1482694c360095dae3039",
|
||||
"448ccbc85e624ec3b3e71931a7ee4ff6",
|
||||
"173769f6f465485f8848a11bf269850b",
|
||||
"60978b9b4e8348f0a71ce3e35c73bcff",
|
||||
"6a38dcbaf4674b448329ac0a16587d2a",
|
||||
"7eaeada2158e493189449af91f643553",
|
||||
"6e32854952b340008edca0139d3471d6",
|
||||
"db6d7cfcdade4b4baa213a5d0abc07d7",
|
||||
"9083029642744c43b7705532cbe0cf79",
|
||||
"d028a98caa13425b907ceb513119006e"
|
||||
]
|
||||
},
|
||||
"id": "LAEUZFjvlJOv",
|
||||
"outputId": "9b5720c7-49ef-45c7-e5a7-f38d64899b1e"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "display_data",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
||||
],
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"version_major": 2,
|
||||
"version_minor": 0,
|
||||
"model_id": "22773c721a7c4221a9c14cd388461d4c"
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
|
||||
" warnings.warn(\n",
|
||||
"Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"<|im_start|>system\n",
|
||||
"You are a helpful assistant chatbot.<|im_end|>\n",
|
||||
"<|im_start|>user\n",
|
||||
"What is a Large Language Model?<|im_end|>\n",
|
||||
"<|im_start|>assistant\n",
|
||||
"A large language model is a type of artificial intelligence (AI) system that has been trained on vast amounts of text data. These models are designed to understand and generate human language, allowing them to perform various natural language processing tasks, such as text generation, language translation, and question answering. Large language models typically use deep learning techniques, like recurrent neural networks (RNNs) or transformers, to learn patterns and relationships in the data, enabling them to generate coherent and contextually relevant responses. The size of these models, in terms of the number of parameters and the volume of data they are trained on, plays a significant role in their ability to comprehend and produce complex language structures.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
1770
Mergekit.ipynb
1770
Mergekit.ipynb
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user